diff options
Diffstat (limited to 'libffi/src')
75 files changed, 32429 insertions, 0 deletions
diff --git a/libffi/src/alpha/ffi.c b/libffi/src/alpha/ffi.c new file mode 100644 index 000000000..8d6b2ba27 --- /dev/null +++ b/libffi/src/alpha/ffi.c @@ -0,0 +1,284 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1998, 2001, 2007, 2008 Red Hat, Inc. + + Alpha Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> + +/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; + all further uses in this file will refer to the 128-bit type. */ +#if defined(__LONG_DOUBLE_128__) +# if FFI_TYPE_LONGDOUBLE != 4 +# error FFI_TYPE_LONGDOUBLE out of date +# endif +#else +# undef FFI_TYPE_LONGDOUBLE +# define FFI_TYPE_LONGDOUBLE 4 +#endif + +extern void ffi_call_osf(void *, unsigned long, unsigned, void *, void (*)(void)) + FFI_HIDDEN; +extern void ffi_closure_osf(void) FFI_HIDDEN; + + +ffi_status +ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* Adjust cif->bytes to represent a minimum 6 words for the temporary + register argument loading area. */ + if (cif->bytes < 6*FFI_SIZEOF_ARG) + cif->bytes = 6*FFI_SIZEOF_ARG; + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_STRUCT: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags = cif->rtype->type; + break; + + case FFI_TYPE_LONGDOUBLE: + /* 128-bit long double is returned in memory, like a struct. */ + cif->flags = FFI_TYPE_STRUCT; + break; + + default: + cif->flags = FFI_TYPE_INT; + break; + } + + return FFI_OK; +} + + +void +ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + unsigned long *stack, *argp; + long i, avn; + ffi_type **arg_types; + + /* If the return value is a struct and we don't have a return + value address then we need to make one. */ + if (rvalue == NULL && cif->flags == FFI_TYPE_STRUCT) + rvalue = alloca(cif->rtype->size); + + /* Allocate the space for the arguments, plus 4 words of temp + space for ffi_call_osf. */ + argp = stack = alloca(cif->bytes + 4*FFI_SIZEOF_ARG); + + if (cif->flags == FFI_TYPE_STRUCT) + *(void **) argp++ = rvalue; + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + while (i < avn) + { + size_t size = (*arg_types)->size; + + switch ((*arg_types)->type) + { + case FFI_TYPE_SINT8: + *(SINT64 *) argp = *(SINT8 *)(* avalue); + break; + + case FFI_TYPE_UINT8: + *(SINT64 *) argp = *(UINT8 *)(* avalue); + break; + + case FFI_TYPE_SINT16: + *(SINT64 *) argp = *(SINT16 *)(* avalue); + break; + + case FFI_TYPE_UINT16: + *(SINT64 *) argp = *(UINT16 *)(* avalue); + break; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + /* Note that unsigned 32-bit quantities are sign extended. */ + *(SINT64 *) argp = *(SINT32 *)(* avalue); + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + *(UINT64 *) argp = *(UINT64 *)(* avalue); + break; + + case FFI_TYPE_FLOAT: + if (argp - stack < 6) + { + /* Note the conversion -- all the fp regs are loaded as + doubles. The in-register format is the same. */ + *(double *) argp = *(float *)(* avalue); + } + else + *(float *) argp = *(float *)(* avalue); + break; + + case FFI_TYPE_DOUBLE: + *(double *) argp = *(double *)(* avalue); + break; + + case FFI_TYPE_LONGDOUBLE: + /* 128-bit long double is passed by reference. */ + *(long double **) argp = (long double *)(* avalue); + size = sizeof (long double *); + break; + + case FFI_TYPE_STRUCT: + memcpy(argp, *avalue, (*arg_types)->size); + break; + + default: + FFI_ASSERT(0); + } + + argp += ALIGN(size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + i++, arg_types++, avalue++; + } + + ffi_call_osf(stack, cif->bytes, cif->flags, rvalue, fn); +} + + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp; + + tramp = (unsigned int *) &closure->tramp[0]; + tramp[0] = 0x47fb0401; /* mov $27,$1 */ + tramp[1] = 0xa77b0010; /* ldq $27,16($27) */ + tramp[2] = 0x6bfb0000; /* jmp $31,($27),0 */ + tramp[3] = 0x47ff041f; /* nop */ + *(void **) &tramp[4] = ffi_closure_osf; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Flush the Icache. + + Tru64 UNIX as doesn't understand the imb mnemonic, so use call_pal + instead, since both Compaq as and gas can handle it. + + 0x86 is PAL_imb in Tru64 UNIX <alpha/pal.h>. */ + asm volatile ("call_pal 0x86" : : : "memory"); + + return FFI_OK; +} + + +long FFI_HIDDEN +ffi_closure_osf_inner(ffi_closure *closure, void *rvalue, unsigned long *argp) +{ + ffi_cif *cif; + void **avalue; + ffi_type **arg_types; + long i, avn, argn; + + cif = closure->cif; + avalue = alloca(cif->nargs * sizeof(void *)); + + argn = 0; + + /* Copy the caller's structure return address to that the closure + returns the data directly to the caller. */ + if (cif->flags == FFI_TYPE_STRUCT) + { + rvalue = (void *) argp[0]; + argn = 1; + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + while (i < avn) + { + size_t size = arg_types[i]->size; + + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + case FFI_TYPE_STRUCT: + avalue[i] = &argp[argn]; + break; + + case FFI_TYPE_FLOAT: + if (argn < 6) + { + /* Floats coming from registers need conversion from double + back to float format. */ + *(float *)&argp[argn - 6] = *(double *)&argp[argn - 6]; + avalue[i] = &argp[argn - 6]; + } + else + avalue[i] = &argp[argn]; + break; + + case FFI_TYPE_DOUBLE: + avalue[i] = &argp[argn - (argn < 6 ? 6 : 0)]; + break; + + case FFI_TYPE_LONGDOUBLE: + /* 128-bit long double is passed by reference. */ + avalue[i] = (long double *) argp[argn]; + size = sizeof (long double *); + break; + + default: + abort (); + } + + argn += ALIGN(size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + i++; + } + + /* Invoke the closure. */ + closure->fun (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_osf how to perform return type promotions. */ + return cif->rtype->type; +} diff --git a/libffi/src/alpha/ffitarget.h b/libffi/src/alpha/ffitarget.h new file mode 100644 index 000000000..7d06eb0bc --- /dev/null +++ b/libffi/src/alpha/ffitarget.h @@ -0,0 +1,48 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for Alpha. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_OSF, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_OSF +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_NATIVE_RAW_API 0 + +#endif diff --git a/libffi/src/alpha/osf.S b/libffi/src/alpha/osf.S new file mode 100644 index 000000000..6b9f4dfa0 --- /dev/null +++ b/libffi/src/alpha/osf.S @@ -0,0 +1,387 @@ +/* ----------------------------------------------------------------------- + osf.S - Copyright (c) 1998, 2001, 2007, 2008, 2011 Red Hat + + Alpha/OSF Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + .arch ev6 + .text + +/* ffi_call_osf (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)(void)); + + Bit o trickiness here -- ARGS+BYTES is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 3 + .globl ffi_call_osf + .ent ffi_call_osf + FFI_HIDDEN(ffi_call_osf) + +ffi_call_osf: + .frame $15, 32, $26, 0 + .mask 0x4008000, -32 +$LFB1: + addq $16,$17,$1 + mov $16, $30 + stq $26, 0($1) + stq $15, 8($1) + stq $18, 16($1) + mov $1, $15 +$LCFI1: + .prologue 0 + + stq $19, 24($1) + mov $20, $27 + + # Load up all of the (potential) argument registers. + ldq $16, 0($30) + ldt $f16, 0($30) + ldt $f17, 8($30) + ldq $17, 8($30) + ldt $f18, 16($30) + ldq $18, 16($30) + ldt $f19, 24($30) + ldq $19, 24($30) + ldt $f20, 32($30) + ldq $20, 32($30) + ldt $f21, 40($30) + ldq $21, 40($30) + + # Deallocate the register argument area. + lda $30, 48($30) + + jsr $26, ($27), 0 + ldgp $29, 0($26) + + # If the return value pointer is NULL, assume no return value. + ldq $19, 24($15) + ldq $18, 16($15) + ldq $26, 0($15) +$LCFI2: + beq $19, $noretval + + # Store the return value out in the proper type. + cmpeq $18, FFI_TYPE_INT, $1 + bne $1, $retint + cmpeq $18, FFI_TYPE_FLOAT, $2 + bne $2, $retfloat + cmpeq $18, FFI_TYPE_DOUBLE, $3 + bne $3, $retdouble + + .align 3 +$noretval: + ldq $15, 8($15) + ret + + .align 4 +$retint: + stq $0, 0($19) + nop + ldq $15, 8($15) + ret + + .align 4 +$retfloat: + sts $f0, 0($19) + nop + ldq $15, 8($15) + ret + + .align 4 +$retdouble: + stt $f0, 0($19) + nop + ldq $15, 8($15) + ret +$LFE1: + + .end ffi_call_osf + +/* ffi_closure_osf(...) + + Receives the closure argument in $1. */ + + .align 3 + .globl ffi_closure_osf + .ent ffi_closure_osf + FFI_HIDDEN(ffi_closure_osf) + +ffi_closure_osf: + .frame $30, 16*8, $26, 0 + .mask 0x4000000, -16*8 +$LFB2: + ldgp $29, 0($27) + subq $30, 16*8, $30 +$LCFI5: + stq $26, 0($30) +$LCFI6: + .prologue 1 + + # Store all of the potential argument registers in va_list format. + stt $f16, 4*8($30) + stt $f17, 5*8($30) + stt $f18, 6*8($30) + stt $f19, 7*8($30) + stt $f20, 8*8($30) + stt $f21, 9*8($30) + stq $16, 10*8($30) + stq $17, 11*8($30) + stq $18, 12*8($30) + stq $19, 13*8($30) + stq $20, 14*8($30) + stq $21, 15*8($30) + + # Call ffi_closure_osf_inner to do the bulk of the work. + mov $1, $16 + lda $17, 2*8($30) + lda $18, 10*8($30) + jsr $26, ffi_closure_osf_inner + ldgp $29, 0($26) + ldq $26, 0($30) + + # Load up the return value in the proper type. + lda $1, $load_table + s4addq $0, $1, $1 + ldl $1, 0($1) + addq $1, $29, $1 + jmp $31, ($1), $load_32 + + .align 4 +$load_none: + addq $30, 16*8, $30 + ret + + .align 4 +$load_float: + lds $f0, 16($30) + nop + addq $30, 16*8, $30 + ret + + .align 4 +$load_double: + ldt $f0, 16($30) + nop + addq $30, 16*8, $30 + ret + + .align 4 +$load_u8: +#ifdef __alpha_bwx__ + ldbu $0, 16($30) + nop +#else + ldq $0, 16($30) + and $0, 255, $0 +#endif + addq $30, 16*8, $30 + ret + + .align 4 +$load_s8: +#ifdef __alpha_bwx__ + ldbu $0, 16($30) + sextb $0, $0 +#else + ldq $0, 16($30) + sll $0, 56, $0 + sra $0, 56, $0 +#endif + addq $30, 16*8, $30 + ret + + .align 4 +$load_u16: +#ifdef __alpha_bwx__ + ldwu $0, 16($30) + nop +#else + ldq $0, 16($30) + zapnot $0, 3, $0 +#endif + addq $30, 16*8, $30 + ret + + .align 4 +$load_s16: +#ifdef __alpha_bwx__ + ldwu $0, 16($30) + sextw $0, $0 +#else + ldq $0, 16($30) + sll $0, 48, $0 + sra $0, 48, $0 +#endif + addq $30, 16*8, $30 + ret + + .align 4 +$load_32: + ldl $0, 16($30) + nop + addq $30, 16*8, $30 + ret + + .align 4 +$load_64: + ldq $0, 16($30) + nop + addq $30, 16*8, $30 + ret +$LFE2: + + .end ffi_closure_osf + +#ifdef __ELF__ +.section .rodata +#else +.rdata +#endif +$load_table: + .gprel32 $load_none # FFI_TYPE_VOID + .gprel32 $load_32 # FFI_TYPE_INT + .gprel32 $load_float # FFI_TYPE_FLOAT + .gprel32 $load_double # FFI_TYPE_DOUBLE + .gprel32 $load_none # FFI_TYPE_LONGDOUBLE + .gprel32 $load_u8 # FFI_TYPE_UINT8 + .gprel32 $load_s8 # FFI_TYPE_SINT8 + .gprel32 $load_u16 # FFI_TYPE_UINT16 + .gprel32 $load_s16 # FFI_TYPE_SINT16 + .gprel32 $load_32 # FFI_TYPE_UINT32 + .gprel32 $load_32 # FFI_TYPE_SINT32 + .gprel32 $load_64 # FFI_TYPE_UINT64 + .gprel32 $load_64 # FFI_TYPE_SINT64 + .gprel32 $load_none # FFI_TYPE_STRUCT + .gprel32 $load_64 # FFI_TYPE_POINTER + +/* Assert that the table above is in sync with ffi.h. */ + +#if FFI_TYPE_FLOAT != 2 \ + || FFI_TYPE_DOUBLE != 3 \ + || FFI_TYPE_UINT8 != 5 \ + || FFI_TYPE_SINT8 != 6 \ + || FFI_TYPE_UINT16 != 7 \ + || FFI_TYPE_SINT16 != 8 \ + || FFI_TYPE_UINT32 != 9 \ + || FFI_TYPE_SINT32 != 10 \ + || FFI_TYPE_UINT64 != 11 \ + || FFI_TYPE_SINT64 != 12 \ + || FFI_TYPE_STRUCT != 13 \ + || FFI_TYPE_POINTER != 14 \ + || FFI_TYPE_LAST != 14 +#error "osf.S out of sync with ffi.h" +#endif + +#ifdef __ELF__ +# define UA_SI .4byte +# define FDE_ENCODING 0x1b /* pcrel sdata4 */ +# define FDE_ENCODE(X) .4byte X-. +# define FDE_ARANGE(X) .4byte X +#elif defined __osf__ +# define UA_SI .align 0; .long +# define FDE_ENCODING 0x50 /* aligned absolute */ +# define FDE_ENCODE(X) .align 3; .quad X +# define FDE_ARANGE(X) .align 0; .quad X +#endif + +#ifdef __ELF__ + .section .eh_frame,EH_FRAME_FLAGS,@progbits +#elif defined __osf__ + .data + .align 3 + .globl _GLOBAL__F_ffi_call_osf +_GLOBAL__F_ffi_call_osf: +#endif +__FRAME_BEGIN__: + UA_SI $LECIE1-$LSCIE1 # Length of Common Information Entry +$LSCIE1: + UA_SI 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version + .ascii "zR\0" # CIE Augmentation + .byte 0x1 # uleb128 0x1; CIE Code Alignment Factor + .byte 0x78 # sleb128 -8; CIE Data Alignment Factor + .byte 26 # CIE RA Column + .byte 0x1 # uleb128 0x1; Augmentation size + .byte FDE_ENCODING # FDE Encoding + .byte 0xc # DW_CFA_def_cfa + .byte 30 # uleb128 column 30 + .byte 0 # uleb128 offset 0 + .align 3 +$LECIE1: +$LSFDE1: + UA_SI $LEFDE1-$LASFDE1 # FDE Length +$LASFDE1: + UA_SI $LASFDE1-__FRAME_BEGIN__ # FDE CIE offset + FDE_ENCODE($LFB1) # FDE initial location + FDE_ARANGE($LFE1-$LFB1) # FDE address range + .byte 0x0 # uleb128 0x0; Augmentation size + + .byte 0x4 # DW_CFA_advance_loc4 + UA_SI $LCFI1-$LFB1 + .byte 0x9a # DW_CFA_offset, column 26 + .byte 4 # uleb128 4*-8 + .byte 0x8f # DW_CFA_offset, column 15 + .byte 0x3 # uleb128 3*-8 + .byte 0xc # DW_CFA_def_cfa + .byte 15 # uleb128 column 15 + .byte 32 # uleb128 offset 32 + + .byte 0x4 # DW_CFA_advance_loc4 + UA_SI $LCFI2-$LCFI1 + .byte 0xda # DW_CFA_restore, column 26 + .align 3 +$LEFDE1: + +$LSFDE3: + UA_SI $LEFDE3-$LASFDE3 # FDE Length +$LASFDE3: + UA_SI $LASFDE3-__FRAME_BEGIN__ # FDE CIE offset + FDE_ENCODE($LFB2) # FDE initial location + FDE_ARANGE($LFE2-$LFB2) # FDE address range + .byte 0x0 # uleb128 0x0; Augmentation size + + .byte 0x4 # DW_CFA_advance_loc4 + UA_SI $LCFI5-$LFB2 + .byte 0xe # DW_CFA_def_cfa_offset + .byte 0x80,0x1 # uleb128 128 + + .byte 0x4 # DW_CFA_advance_loc4 + UA_SI $LCFI6-$LCFI5 + .byte 0x9a # DW_CFA_offset, column 26 + .byte 16 # uleb128 offset 16*-8 + .align 3 +$LEFDE3: +#if defined __osf__ + .align 0 + .long 0 # End of Table +#endif + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/arm/ffi.c b/libffi/src/arm/ffi.c new file mode 100644 index 000000000..4e72c3bcd --- /dev/null +++ b/libffi/src/arm/ffi.c @@ -0,0 +1,504 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1998, 2008 Red Hat, Inc. + + ARM Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +/* Forward declares. */ +static int vfp_type_p (ffi_type *); +static void layout_vfp_args (ffi_cif *); + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments + + The vfp_space parameter is the load area for VFP regs, the return + value is cif->vfp_used (word bitset of VFP regs used for passing + arguments). These are only used for the VFP hard-float ABI. +*/ +int ffi_prep_args(char *stack, extended_cif *ecif, float *vfp_space) +{ + register unsigned int i, vi = 0; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + + if ( ecif->cif->flags == FFI_TYPE_STRUCT ) { + *(void **) argp = ecif->rvalue; + argp += 4; + } + + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + (i != 0); + i--, p_arg++) + { + size_t z; + + /* Allocated in VFP registers. */ + if (ecif->cif->abi == FFI_VFP + && vi < ecif->cif->vfp_nargs && vfp_type_p (*p_arg)) + { + float* vfp_slot = vfp_space + ecif->cif->vfp_args[vi++]; + if ((*p_arg)->type == FFI_TYPE_FLOAT) + *((float*)vfp_slot) = *((float*)*p_argv); + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + *((double*)vfp_slot) = *((double*)*p_argv); + else + memcpy(vfp_slot, *p_argv, (*p_arg)->size); + p_argv++; + continue; + } + + /* Align if necessary */ + if (((*p_arg)->alignment - 1) & (unsigned) argp) { + argp = (char *) ALIGN(argp, (*p_arg)->alignment); + } + + if ((*p_arg)->type == FFI_TYPE_STRUCT) + argp = (char *) ALIGN(argp, 4); + + z = (*p_arg)->size; + if (z < sizeof(int)) + { + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_STRUCT: + memcpy(argp, *p_argv, (*p_arg)->size); + break; + + default: + FFI_ASSERT(0); + } + } + else if (z == sizeof(int)) + { + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + } + else + { + memcpy(argp, *p_argv, z); + } + p_argv++; + argp += z; + } + + /* Indicate the VFP registers used. */ + return ecif->cif->vfp_used; +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + int type_code; + /* Round the stack up to a multiple of 8 bytes. This isn't needed + everywhere, but it is on some platforms, and it doesn't harm anything + when it isn't needed. */ + cif->bytes = (cif->bytes + 7) & ~7; + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags = (unsigned) cif->rtype->type; + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags = (unsigned) FFI_TYPE_SINT64; + break; + + case FFI_TYPE_STRUCT: + if (cif->abi == FFI_VFP + && (type_code = vfp_type_p (cif->rtype)) != 0) + { + /* A Composite Type passed in VFP registers, either + FFI_TYPE_STRUCT_VFP_FLOAT or FFI_TYPE_STRUCT_VFP_DOUBLE. */ + cif->flags = (unsigned) type_code; + } + else if (cif->rtype->size <= 4) + /* A Composite Type not larger than 4 bytes is returned in r0. */ + cif->flags = (unsigned)FFI_TYPE_INT; + else + /* A Composite Type larger than 4 bytes, or whose size cannot + be determined statically ... is stored in memory at an + address passed [in r0]. */ + cif->flags = (unsigned)FFI_TYPE_STRUCT; + break; + + default: + cif->flags = FFI_TYPE_INT; + break; + } + + /* Map out the register placements of VFP register args. + The VFP hard-float calling conventions are slightly more sophisticated than + the base calling conventions, so we do it here instead of in ffi_prep_args(). */ + if (cif->abi == FFI_VFP) + layout_vfp_args (cif); + + return FFI_OK; +} + +/* Prototypes for assembly functions, in sysv.S */ +extern void ffi_call_SYSV (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *); +extern void ffi_call_VFP (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *); + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + int small_struct = (cif->flags == FFI_TYPE_INT + && cif->rtype->type == FFI_TYPE_STRUCT); + int vfp_struct = (cif->flags == FFI_TYPE_STRUCT_VFP_FLOAT + || cif->flags == FFI_TYPE_STRUCT_VFP_DOUBLE); + + ecif.cif = cif; + ecif.avalue = avalue; + + unsigned int temp; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if ((rvalue == NULL) && + (cif->flags == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else if (small_struct) + ecif.rvalue = &temp; + else if (vfp_struct) + { + /* Largest case is double x 4. */ + ecif.rvalue = alloca(32); + } + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); + break; + + case FFI_VFP: + ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); + break; + + default: + FFI_ASSERT(0); + break; + } + if (small_struct) + memcpy (rvalue, &temp, cif->rtype->size); + else if (vfp_struct) + memcpy (rvalue, ecif.rvalue, cif->rtype->size); +} + +/** private members **/ + +static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, + void** args, ffi_cif* cif, float *vfp_stack); + +void ffi_closure_SYSV (ffi_closure *); + +void ffi_closure_VFP (ffi_closure *); + +/* This function is jumped to by the trampoline */ + +unsigned int +ffi_closure_SYSV_inner (closure, respp, args, vfp_args) + ffi_closure *closure; + void **respp; + void *args; + void *vfp_args; +{ + // our various things... + ffi_cif *cif; + void **arg_area; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* this call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will re-set RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif, vfp_args); + + (closure->fun) (cif, *respp, arg_area, closure->user_data); + + return cif->flags; +} + +/*@-exportheader@*/ +static void +ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, + void **avalue, ffi_cif *cif, + /* Used only under VFP hard-float ABI. */ + float *vfp_stack) +/*@=exportheader@*/ +{ + register unsigned int i, vi = 0; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + + if ( cif->flags == FFI_TYPE_STRUCT ) { + *rvalue = *(void **) argp; + argp += 4; + } + + p_argv = avalue; + + for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) + { + size_t z; + size_t alignment; + + if (cif->abi == FFI_VFP + && vi < cif->vfp_nargs && vfp_type_p (*p_arg)) + { + *p_argv++ = (void*)(vfp_stack + cif->vfp_args[vi++]); + continue; + } + + alignment = (*p_arg)->alignment; + if (alignment < 4) + alignment = 4; + /* Align if necessary */ + if ((alignment - 1) & (unsigned) argp) { + argp = (char *) ALIGN(argp, alignment); + } + + z = (*p_arg)->size; + + /* because we're little endian, this is what it turns into. */ + + *p_argv = (void*) argp; + + p_argv++; + argp += z; + } + + return; +} + +/* How to make a trampoline. */ + +#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ +({ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + unsigned int __fun = (unsigned int)(FUN); \ + unsigned int __ctx = (unsigned int)(CTX); \ + unsigned char *insns = (unsigned char *)(CTX); \ + *(unsigned int*) &__tramp[0] = 0xe92d000f; /* stmfd sp!, {r0-r3} */ \ + *(unsigned int*) &__tramp[4] = 0xe59f0000; /* ldr r0, [pc] */ \ + *(unsigned int*) &__tramp[8] = 0xe59ff000; /* ldr pc, [pc] */ \ + *(unsigned int*) &__tramp[12] = __ctx; \ + *(unsigned int*) &__tramp[16] = __fun; \ + __clear_cache((&__tramp[0]), (&__tramp[19])); /* Clear data mapping. */ \ + __clear_cache(insns, insns + 3 * sizeof (unsigned int)); \ + /* Clear instruction \ + mapping. */ \ + }) + + +/* the cif must already be prep'ed */ + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + void (*closure_func)(ffi_closure*) = NULL; + + if (cif->abi == FFI_SYSV) + closure_func = &ffi_closure_SYSV; + else if (cif->abi == FFI_VFP) + closure_func = &ffi_closure_VFP; + else + FFI_ASSERT (0); + + FFI_INIT_TRAMPOLINE (&closure->tramp[0], \ + closure_func, \ + codeloc); + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +/* Below are routines for VFP hard-float support. */ + +static int rec_vfp_type_p (ffi_type *t, int *elt, int *elnum) +{ + switch (t->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + *elt = (int) t->type; + *elnum = 1; + return 1; + + case FFI_TYPE_STRUCT_VFP_FLOAT: + *elt = FFI_TYPE_FLOAT; + *elnum = t->size / sizeof (float); + return 1; + + case FFI_TYPE_STRUCT_VFP_DOUBLE: + *elt = FFI_TYPE_DOUBLE; + *elnum = t->size / sizeof (double); + return 1; + + case FFI_TYPE_STRUCT:; + { + int base_elt = 0, total_elnum = 0; + ffi_type **el = t->elements; + while (*el) + { + int el_elt = 0, el_elnum = 0; + if (! rec_vfp_type_p (*el, &el_elt, &el_elnum) + || (base_elt && base_elt != el_elt) + || total_elnum + el_elnum > 4) + return 0; + base_elt = el_elt; + total_elnum += el_elnum; + el++; + } + *elnum = total_elnum; + *elt = base_elt; + return 1; + } + default: ; + } + return 0; +} + +static int vfp_type_p (ffi_type *t) +{ + int elt, elnum; + if (rec_vfp_type_p (t, &elt, &elnum)) + { + if (t->type == FFI_TYPE_STRUCT) + { + if (elnum == 1) + t->type = elt; + else + t->type = (elt == FFI_TYPE_FLOAT + ? FFI_TYPE_STRUCT_VFP_FLOAT + : FFI_TYPE_STRUCT_VFP_DOUBLE); + } + return (int) t->type; + } + return 0; +} + +static void place_vfp_arg (ffi_cif *cif, ffi_type *t) +{ + int reg = cif->vfp_reg_free; + int nregs = t->size / sizeof (float); + int align = ((t->type == FFI_TYPE_STRUCT_VFP_FLOAT + || t->type == FFI_TYPE_FLOAT) ? 1 : 2); + /* Align register number. */ + if ((reg & 1) && align == 2) + reg++; + while (reg + nregs <= 16) + { + int s, new_used = 0; + for (s = reg; s < reg + nregs; s++) + { + new_used |= (1 << s); + if (cif->vfp_used & (1 << s)) + { + reg += align; + goto next_reg; + } + } + /* Found regs to allocate. */ + cif->vfp_used |= new_used; + cif->vfp_args[cif->vfp_nargs++] = reg; + + /* Update vfp_reg_free. */ + if (cif->vfp_used & (1 << cif->vfp_reg_free)) + { + reg += nregs; + while (cif->vfp_used & (1 << reg)) + reg += 1; + cif->vfp_reg_free = reg; + } + return; + next_reg: ; + } +} + +static void layout_vfp_args (ffi_cif *cif) +{ + int i; + /* Init VFP fields */ + cif->vfp_used = 0; + cif->vfp_nargs = 0; + cif->vfp_reg_free = 0; + memset (cif->vfp_args, -1, 16); /* Init to -1. */ + + for (i = 0; i < cif->nargs; i++) + { + ffi_type *t = cif->arg_types[i]; + if (vfp_type_p (t)) + place_vfp_arg (cif, t); + } +} diff --git a/libffi/src/arm/ffitarget.h b/libffi/src/arm/ffitarget.h new file mode 100644 index 000000000..ce25b23f5 --- /dev/null +++ b/libffi/src/arm/ffitarget.h @@ -0,0 +1,65 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Copyright (c) 2010 CodeSourcery + + Target configuration macros for ARM. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_VFP, + FFI_LAST_ABI, +#ifdef __ARM_PCS_VFP + FFI_DEFAULT_ABI = FFI_VFP, +#else + FFI_DEFAULT_ABI = FFI_SYSV, +#endif +} ffi_abi; +#endif + +#define FFI_EXTRA_CIF_FIELDS \ + int vfp_used; \ + short vfp_reg_free, vfp_nargs; \ + signed char vfp_args[16] \ + +/* Internally used. */ +#define FFI_TYPE_STRUCT_VFP_FLOAT (FFI_TYPE_LAST + 1) +#define FFI_TYPE_STRUCT_VFP_DOUBLE (FFI_TYPE_LAST + 2) + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 20 +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/libffi/src/arm/sysv.S b/libffi/src/arm/sysv.S new file mode 100644 index 000000000..72f0ee0ca --- /dev/null +++ b/libffi/src/arm/sysv.S @@ -0,0 +1,466 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 1998, 2008 Red Hat, Inc. + + ARM Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +#ifdef __USER_LABEL_PREFIX__ +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ +#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#else +#define CNAME(x) x +#endif +#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x): +#endif + +#ifdef __ELF__ +#define LSYM(x) .x +#else +#define LSYM(x) x +#endif + +/* We need a better way of testing for this, but for now, this is all + we can do. */ +@ This selects the minimum architecture level required. +#define __ARM_ARCH__ 3 + +#if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) +# undef __ARM_ARCH__ +# define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# undef __ARM_ARCH__ +# define __ARM_ARCH__ 5 +#endif + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_6M__) +# undef __ARM_ARCH__ +# define __ARM_ARCH__ 6 +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# undef __ARM_ARCH__ +# define __ARM_ARCH__ 7 +#endif + +#if __ARM_ARCH__ >= 5 +# define call_reg(x) blx x +#elif defined (__ARM_ARCH_4T__) +# define call_reg(x) mov lr, pc ; bx x +# if defined(__thumb__) || defined(__THUMB_INTERWORK__) +# define __INTERWORKING__ +# endif +#else +# define call_reg(x) mov lr, pc ; mov pc, x +#endif + +/* Conditionally compile unwinder directives. */ +#ifdef __ARM_EABI__ +#define UNWIND +#else +#define UNWIND @ +#endif + + +#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) +.macro ARM_FUNC_START name + .text + .align 0 + .thumb + .thumb_func + ENTRY(\name) + bx pc + nop + .arm + UNWIND .fnstart +/* A hook to tell gdb that we've switched to ARM mode. Also used to call + directly from other local arm routines. */ +_L__\name: +.endm +#else +.macro ARM_FUNC_START name + .text + .align 0 + .arm + ENTRY(\name) + UNWIND .fnstart +.endm +#endif + +.macro RETLDM regs=, cond=, dirn=ia +#if defined (__INTERWORKING__) + .ifc "\regs","" + ldr\cond lr, [sp], #4 + .else + ldm\cond\dirn sp!, {\regs, lr} + .endif + bx\cond lr +#else + .ifc "\regs","" + ldr\cond pc, [sp], #4 + .else + ldm\cond\dirn sp!, {\regs, pc} + .endif +#endif +.endm + + + @ r0: fn + @ r1: &ecif + @ r2: cif->bytes + @ r3: fig->flags + @ sp+0: ecif.rvalue + + @ This assumes we are using gas. +ARM_FUNC_START ffi_call_SYSV + @ Save registers + stmfd sp!, {r0-r3, fp, lr} + UNWIND .save {r0-r3, fp, lr} + mov fp, sp + + UNWIND .setfp fp, sp + + @ Make room for all of the new args. + sub sp, fp, r2 + + @ Place all of the ffi_prep_args in position + mov r0, sp + @ r1 already set + + @ Call ffi_prep_args(stack, &ecif) + bl ffi_prep_args + + @ move first 4 parameters in registers + ldmia sp, {r0-r3} + + @ and adjust stack + sub lr, fp, sp @ cif->bytes == fp - sp + ldr ip, [fp] @ load fn() in advance + cmp lr, #16 + movhs lr, #16 + add sp, sp, lr + + @ call (fn) (...) + call_reg(ip) + + @ Remove the space we pushed for the args + mov sp, fp + + @ Load r2 with the pointer to storage for the return value + ldr r2, [sp, #24] + + @ Load r3 with the return type code + ldr r3, [sp, #12] + + @ If the return value pointer is NULL, assume no return value. + cmp r2, #0 + beq LSYM(Lepilogue) + +@ return INT + cmp r3, #FFI_TYPE_INT +#if defined(__SOFTFP__) || defined(__ARM_EABI__) + cmpne r3, #FFI_TYPE_FLOAT +#endif + streq r0, [r2] + beq LSYM(Lepilogue) + + @ return INT64 + cmp r3, #FFI_TYPE_SINT64 +#if defined(__SOFTFP__) || defined(__ARM_EABI__) + cmpne r3, #FFI_TYPE_DOUBLE +#endif + stmeqia r2, {r0, r1} + +#if !defined(__SOFTFP__) && !defined(__ARM_EABI__) + beq LSYM(Lepilogue) + +@ return FLOAT + cmp r3, #FFI_TYPE_FLOAT + stfeqs f0, [r2] + beq LSYM(Lepilogue) + +@ return DOUBLE or LONGDOUBLE + cmp r3, #FFI_TYPE_DOUBLE + stfeqd f0, [r2] +#endif + +LSYM(Lepilogue): + RETLDM "r0-r3,fp" + +.ffi_call_SYSV_end: + UNWIND .fnend + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + + +/* + unsigned int FFI_HIDDEN + ffi_closure_SYSV_inner (closure, respp, args) + ffi_closure *closure; + void **respp; + void *args; +*/ + +ARM_FUNC_START ffi_closure_SYSV + UNWIND .pad #16 + add ip, sp, #16 + stmfd sp!, {ip, lr} + UNWIND .save {r0, lr} + add r2, sp, #8 + UNWIND .pad #16 + sub sp, sp, #16 + str sp, [sp, #8] + add r1, sp, #8 + bl ffi_closure_SYSV_inner + cmp r0, #FFI_TYPE_INT + beq .Lretint + + cmp r0, #FFI_TYPE_FLOAT +#if defined(__SOFTFP__) || defined(__ARM_EABI__) + beq .Lretint +#else + beq .Lretfloat +#endif + + cmp r0, #FFI_TYPE_DOUBLE +#if defined(__SOFTFP__) || defined(__ARM_EABI__) + beq .Lretlonglong +#else + beq .Lretdouble +#endif + + cmp r0, #FFI_TYPE_LONGDOUBLE +#if defined(__SOFTFP__) || defined(__ARM_EABI__) + beq .Lretlonglong +#else + beq .Lretlongdouble +#endif + + cmp r0, #FFI_TYPE_SINT64 + beq .Lretlonglong +.Lclosure_epilogue: + add sp, sp, #16 + ldmfd sp, {sp, pc} +.Lretint: + ldr r0, [sp] + b .Lclosure_epilogue +.Lretlonglong: + ldr r0, [sp] + ldr r1, [sp, #4] + b .Lclosure_epilogue + +#if !defined(__SOFTFP__) && !defined(__ARM_EABI__) +.Lretfloat: + ldfs f0, [sp] + b .Lclosure_epilogue +.Lretdouble: + ldfd f0, [sp] + b .Lclosure_epilogue +.Lretlongdouble: + ldfd f0, [sp] + b .Lclosure_epilogue +#endif + +.ffi_closure_SYSV_end: + UNWIND .fnend + .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) + + +/* Below are VFP hard-float ABI call and closure implementations. + Add VFP FPU directive here. */ + .fpu vfp + + @ r0: fn + @ r1: &ecif + @ r2: cif->bytes + @ r3: fig->flags + @ sp+0: ecif.rvalue + +ARM_FUNC_START ffi_call_VFP + @ Save registers + stmfd sp!, {r0-r3, fp, lr} + UNWIND .save {r0-r3, fp, lr} + mov fp, sp + UNWIND .setfp fp, sp + + @ Make room for all of the new args. + sub sp, sp, r2 + + @ Make room for loading VFP args + sub sp, sp, #64 + + @ Place all of the ffi_prep_args in position + mov r0, sp + @ r1 already set + sub r2, fp, #64 @ VFP scratch space + + @ Call ffi_prep_args(stack, &ecif, vfp_space) + bl ffi_prep_args + + @ Load VFP register args if needed + cmp r0, #0 + beq LSYM(Lbase_args) + + @ Load only d0 if possible + cmp r0, #3 + sub ip, fp, #64 + flddle d0, [ip] + fldmiadgt ip, {d0-d7} + +LSYM(Lbase_args): + @ move first 4 parameters in registers + ldmia sp, {r0-r3} + + @ and adjust stack + sub lr, ip, sp @ cif->bytes == (fp - 64) - sp + ldr ip, [fp] @ load fn() in advance + cmp lr, #16 + movhs lr, #16 + add sp, sp, lr + + @ call (fn) (...) + call_reg(ip) + + @ Remove the space we pushed for the args + mov sp, fp + + @ Load r2 with the pointer to storage for + @ the return value + ldr r2, [sp, #24] + + @ Load r3 with the return type code + ldr r3, [sp, #12] + + @ If the return value pointer is NULL, + @ assume no return value. + cmp r2, #0 + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_INT + streq r0, [r2] + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_SINT64 + stmeqia r2, {r0, r1} + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_FLOAT + fstseq s0, [r2] + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_DOUBLE + fstdeq d0, [r2] + beq LSYM(Lepilogue_vfp) + + cmp r3, #FFI_TYPE_STRUCT_VFP_FLOAT + cmpne r3, #FFI_TYPE_STRUCT_VFP_DOUBLE + fstmiadeq r2, {d0-d3} + +LSYM(Lepilogue_vfp): + RETLDM "r0-r3,fp" + +.ffi_call_VFP_end: + UNWIND .fnend + .size CNAME(ffi_call_VFP),.ffi_call_VFP_end-CNAME(ffi_call_VFP) + + +ARM_FUNC_START ffi_closure_VFP + fstmfdd sp!, {d0-d7} + @ r0-r3, then d0-d7 + UNWIND .pad #80 + add ip, sp, #80 + stmfd sp!, {ip, lr} + UNWIND .save {r0, lr} + add r2, sp, #72 + add r3, sp, #8 + UNWIND .pad #72 + sub sp, sp, #72 + str sp, [sp, #64] + add r1, sp, #64 + bl ffi_closure_SYSV_inner + + cmp r0, #FFI_TYPE_INT + beq .Lretint_vfp + + cmp r0, #FFI_TYPE_FLOAT + beq .Lretfloat_vfp + + cmp r0, #FFI_TYPE_DOUBLE + cmpne r0, #FFI_TYPE_LONGDOUBLE + beq .Lretdouble_vfp + + cmp r0, #FFI_TYPE_SINT64 + beq .Lretlonglong_vfp + + cmp r0, #FFI_TYPE_STRUCT_VFP_FLOAT + beq .Lretfloat_struct_vfp + + cmp r0, #FFI_TYPE_STRUCT_VFP_DOUBLE + beq .Lretdouble_struct_vfp + +.Lclosure_epilogue_vfp: + add sp, sp, #72 + ldmfd sp, {sp, pc} + +.Lretfloat_vfp: + flds s0, [sp] + b .Lclosure_epilogue_vfp +.Lretdouble_vfp: + fldd d0, [sp] + b .Lclosure_epilogue_vfp +.Lretint_vfp: + ldr r0, [sp] + b .Lclosure_epilogue_vfp +.Lretlonglong_vfp: + ldmia sp, {r0, r1} + b .Lclosure_epilogue_vfp +.Lretfloat_struct_vfp: + fldmiad sp, {d0-d1} + b .Lclosure_epilogue_vfp +.Lretdouble_struct_vfp: + fldmiad sp, {d0-d3} + b .Lclosure_epilogue_vfp + +.ffi_closure_VFP_end: + UNWIND .fnend + .size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP) + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",%progbits +#endif diff --git a/libffi/src/avr32/ffi.c b/libffi/src/avr32/ffi.c new file mode 100644 index 000000000..39fba2b03 --- /dev/null +++ b/libffi/src/avr32/ffi.c @@ -0,0 +1,421 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2009 Bradley Smith <brad@brad-smith.co.uk> + + AVR32 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <asm/unistd.h> + +/* #define DEBUG */ + +extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, + unsigned int, unsigned int, unsigned int*, unsigned int, + void (*fn)(void)); +extern void ffi_closure_SYSV (ffi_closure *); + +unsigned int pass_struct_on_stack(ffi_type *type) +{ + if(type->type != FFI_TYPE_STRUCT) + return 0; + + if(type->alignment < type->size && + !(type->size == 4 || type->size == 8) && + !(type->size == 8 && type->alignment >= 4)) + return 1; + + if(type->size == 3 || type->size == 5 || type->size == 6 || + type->size == 7) + return 1; + + return 0; +} + +/* ffi_prep_args is called by the assembly routine once stack space + * has been allocated for the function's arguments + * + * This is annoyingly complex since we need to keep track of used + * registers. + */ + +void ffi_prep_args(char *stack, extended_cif *ecif) +{ + unsigned int i; + void **p_argv; + ffi_type **p_arg; + char *reg_base = stack; + char *stack_base = stack + 20; + unsigned int stack_offset = 0; + unsigned int reg_mask = 0; + + p_argv = ecif->avalue; + + /* If cif->flags is struct then we know it's not passed in registers */ + if(ecif->cif->flags == FFI_TYPE_STRUCT) + { + *(void**)reg_base = ecif->rvalue; + reg_mask |= 1; + } + + for(i = 0, p_arg = ecif->cif->arg_types; i < ecif->cif->nargs; + i++, p_arg++) + { + size_t z = (*p_arg)->size; + int alignment = (*p_arg)->alignment; + int type = (*p_arg)->type; + char *addr = 0; + + if(z % 4 != 0) + z += (4 - z % 4); + + if(reg_mask != 0x1f) + { + if(pass_struct_on_stack(*p_arg)) + { + addr = stack_base + stack_offset; + stack_offset += z; + } + else if(z == sizeof(int)) + { + char index = 0; + + while((reg_mask >> index) & 1) + index++; + + addr = reg_base + (index * 4); + reg_mask |= (1 << index); + } + else if(z == 2 * sizeof(int)) + { + if(!((reg_mask >> 1) & 1)) + { + addr = reg_base + 4; + reg_mask |= (3 << 1); + } + else if(!((reg_mask >> 3) & 1)) + { + addr = reg_base + 12; + reg_mask |= (3 << 3); + } + } + } + + if(!addr) + { + addr = stack_base + stack_offset; + stack_offset += z; + } + + if(type == FFI_TYPE_STRUCT && (*p_arg)->elements[1] == NULL) + type = (*p_arg)->elements[0]->type; + + switch(type) + { + case FFI_TYPE_UINT8: + *(unsigned int *)addr = (unsigned int)*(UINT8 *)(*p_argv); + break; + case FFI_TYPE_SINT8: + *(signed int *)addr = (signed int)*(SINT8 *)(*p_argv); + break; + case FFI_TYPE_UINT16: + *(unsigned int *)addr = (unsigned int)*(UINT16 *)(*p_argv); + break; + case FFI_TYPE_SINT16: + *(signed int *)addr = (signed int)*(SINT16 *)(*p_argv); + break; + default: + memcpy(addr, *p_argv, z); + } + + p_argv++; + } + +#ifdef DEBUG + /* Debugging */ + for(i = 0; i < 5; i++) + { + if((reg_mask & (1 << i)) == 0) + printf("r%d: (unused)\n", 12 - i); + else + printf("r%d: 0x%08x\n", 12 - i, ((unsigned int*)reg_base)[i]); + } + + for(i = 0; i < stack_offset / 4; i++) + { + printf("sp+%d: 0x%08x\n", i*4, ((unsigned int*)stack_base)[i]); + } +#endif +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* Round the stack up to a multiple of 8 bytes. This isn't needed + * everywhere, but it is on some platforms, and it doesn't harm + * anything when it isn't needed. */ + cif->bytes = (cif->bytes + 7) & ~7; + + /* Flag to indicate that he return value is in fact a struct */ + cif->rstruct_flag = 0; + + /* Set the return type flag */ + switch(cif->rtype->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + cif->flags = (unsigned)FFI_TYPE_UINT8; + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + cif->flags = (unsigned)FFI_TYPE_UINT16; + break; + case FFI_TYPE_FLOAT: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + cif->flags = (unsigned)FFI_TYPE_UINT32; + break; + case FFI_TYPE_DOUBLE: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags = (unsigned)FFI_TYPE_UINT64; + break; + case FFI_TYPE_STRUCT: + cif->rstruct_flag = 1; + if(!pass_struct_on_stack(cif->rtype)) + { + if(cif->rtype->size <= 1) + cif->flags = (unsigned)FFI_TYPE_UINT8; + else if(cif->rtype->size <= 2) + cif->flags = (unsigned)FFI_TYPE_UINT16; + else if(cif->rtype->size <= 4) + cif->flags = (unsigned)FFI_TYPE_UINT32; + else if(cif->rtype->size <= 8) + cif->flags = (unsigned)FFI_TYPE_UINT64; + else + cif->flags = (unsigned)cif->rtype->type; + } + else + cif->flags = (unsigned)cif->rtype->type; + break; + default: + cif->flags = (unsigned)cif->rtype->type; + break; + } + + return FFI_OK; +} + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + unsigned int size = 0, i = 0; + ffi_type **p_arg; + + ecif.cif = cif; + ecif.avalue = avalue; + + for(i = 0, p_arg = cif->arg_types; i < cif->nargs; i++, p_arg++) + size += (*p_arg)->size + (4 - (*p_arg)->size % 4); + + /* If the return value is a struct and we don't have a return value + * address then we need to make one */ + + /* If cif->flags is struct then it's not suitable for registers */ + if((rvalue == NULL) && (cif->flags == FFI_TYPE_STRUCT)) + ecif.rvalue = alloca(cif->rtype->size); + else + ecif.rvalue = rvalue; + + switch(cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, size, cif->flags, + ecif.rvalue, cif->rstruct_flag, fn); + break; + default: + FFI_ASSERT(0); + break; + } +} + +static void ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, + void **avalue, ffi_cif *cif) +{ + register unsigned int i, reg_mask = 0; + register void **p_argv; + register ffi_type **p_arg; + register char *reg_base = stack; + register char *stack_base = stack + 20; + register unsigned int stack_offset = 0; + +#ifdef DEBUG + /* Debugging */ + for(i = 0; i < cif->nargs + 7; i++) + { + printf("sp+%d: 0x%08x\n", i*4, ((unsigned int*)stack)[i]); + } +#endif + + /* If cif->flags is struct then we know it's not passed in registers */ + if(cif->flags == FFI_TYPE_STRUCT) + { + *rvalue = *(void **)reg_base; + reg_mask |= 1; + } + + p_argv = avalue; + + for(i = 0, p_arg = cif->arg_types; i < cif->nargs; i++, p_arg++) + { + size_t z = (*p_arg)->size; + int alignment = (*p_arg)->alignment; + + *p_argv = 0; + + if(z % 4 != 0) + z += (4 - z % 4); + + if(reg_mask != 0x1f) + { + if(pass_struct_on_stack(*p_arg)) + { + *p_argv = (void*)stack_base + stack_offset; + stack_offset += z; + } + else if(z <= sizeof(int)) + { + char index = 0; + + while((reg_mask >> index) & 1) + index++; + + *p_argv = (void*)reg_base + (index * 4); + reg_mask |= (1 << index); + } + else if(z == 2 * sizeof(int)) + { + if(!((reg_mask >> 1) & 1)) + { + *p_argv = (void*)reg_base + 4; + reg_mask |= (3 << 1); + } + else if(!((reg_mask >> 3) & 1)) + { + *p_argv = (void*)reg_base + 12; + reg_mask |= (3 << 3); + } + } + } + + if(!*p_argv) + { + *p_argv = (void*)stack_base + stack_offset; + stack_offset += z; + } + + if((*p_arg)->type != FFI_TYPE_STRUCT || + (*p_arg)->elements[1] == NULL) + { + if(alignment == 1) + **(unsigned int**)p_argv <<= 24; + else if(alignment == 2) + **(unsigned int**)p_argv <<= 16; + } + + p_argv++; + } + +#ifdef DEBUG + /* Debugging */ + for(i = 0; i < cif->nargs; i++) + { + printf("sp+%d: 0x%08x\n", i*4, *(((unsigned int**)avalue)[i])); + } +#endif +} + +/* This function is jumped to by the trampoline */ + +unsigned int ffi_closure_SYSV_inner(ffi_closure *closure, void **respp, + void *args) +{ + ffi_cif *cif; + void **arg_area; + unsigned int i, size = 0; + ffi_type **p_arg; + + cif = closure->cif; + + for(i = 0, p_arg = cif->arg_types; i < cif->nargs; i++, p_arg++) + size += (*p_arg)->size + (4 - (*p_arg)->size % 4); + + arg_area = (void **)alloca(size); + + /* this call will initialize ARG_AREA, such that each element in that + * array points to the corresponding value on the stack; and if the + * function returns a structure, it will re-set RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + + (closure->fun)(cif, *respp, arg_area, closure->user_data); + + return cif->flags; +} + +ffi_status ffi_prep_closure_loc(ffi_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), void *user_data, + void *codeloc) +{ + FFI_ASSERT(cif->abi == FFI_SYSV); + + unsigned char *__tramp = (unsigned char*)(&closure->tramp[0]); + unsigned int __fun = (unsigned int)(&ffi_closure_SYSV); + unsigned int __ctx = (unsigned int)(codeloc); + unsigned int __rstruct_flag = (unsigned int)(cif->rstruct_flag); + unsigned int __inner = (unsigned int)(&ffi_closure_SYSV_inner); + *(unsigned int*) &__tramp[0] = 0xebcd1f00; /* pushm r8-r12 */ + *(unsigned int*) &__tramp[4] = 0xfefc0010; /* ld.w r12, pc[16] */ + *(unsigned int*) &__tramp[8] = 0xfefb0010; /* ld.w r11, pc[16] */ + *(unsigned int*) &__tramp[12] = 0xfefa0010; /* ld.w r10, pc[16] */ + *(unsigned int*) &__tramp[16] = 0xfeff0010; /* ld.w pc, pc[16] */ + *(unsigned int*) &__tramp[20] = __ctx; + *(unsigned int*) &__tramp[24] = __rstruct_flag; + *(unsigned int*) &__tramp[28] = __inner; + *(unsigned int*) &__tramp[32] = __fun; + syscall(__NR_cacheflush, 0, (&__tramp[0]), 36); + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + diff --git a/libffi/src/avr32/ffitarget.h b/libffi/src/avr32/ffitarget.h new file mode 100644 index 000000000..1c799b1de --- /dev/null +++ b/libffi/src/avr32/ffitarget.h @@ -0,0 +1,50 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 2009 Bradley Smith <brad@brad-smith.co.uk> + Target configuration macros for AVR32. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +#define FFI_EXTRA_CIF_FIELDS unsigned int rstruct_flag + +/* Definitions for closures */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 36 +#define FFI_NATIVE_RAW_API 0 + +#endif diff --git a/libffi/src/avr32/sysv.S b/libffi/src/avr32/sysv.S new file mode 100644 index 000000000..a984b3c88 --- /dev/null +++ b/libffi/src/avr32/sysv.S @@ -0,0 +1,208 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2009 Bradley Smith <brad@brad-smith.co.uk> + + AVR32 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + --------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + /* r12: ffi_prep_args + * r11: &ecif + * r10: size + * r9: cif->flags + * r8: ecif.rvalue + * sp+0: cif->rstruct_flag + * sp+4: fn */ + + .text + .align 1 + .globl ffi_call_SYSV + .type ffi_call_SYSV, @function +ffi_call_SYSV: + stm --sp, r0,r1,lr + stm --sp, r8-r12 + mov r0, sp + + /* Make room for all of the new args. */ + sub sp, r10 + /* Pad to make way for potential skipped registers */ + sub sp, 20 + + /* Call ffi_prep_args(stack, &ecif). */ + /* r11 already set */ + mov r1, r12 + mov r12, sp + icall r1 + + /* Save new argument size */ + mov r1, r12 + + /* Move first 5 parameters in registers. */ + ldm sp++, r8-r12 + + /* call (fn) (...). */ + ld.w r1, r0[36] + icall r1 + + /* Remove the space we pushed for the args. */ + mov sp, r0 + + /* Load r1 with the rstruct flag. */ + ld.w r1, sp[32] + + /* Load r9 with the return type code. */ + ld.w r9, sp[12] + + /* Load r8 with the return value pointer. */ + ld.w r8, sp[16] + + /* If the return value pointer is NULL, assume no return value. */ + cp.w r8, 0 + breq .Lend + + /* Check if return type is actually a struct */ + cp.w r1, 0 + breq 1f + + /* Return 8bit */ + cp.w r9, FFI_TYPE_UINT8 + breq .Lstore8 + + /* Return 16bit */ + cp.w r9, FFI_TYPE_UINT16 + breq .Lstore16 + +1: + /* Return 32bit */ + cp.w r9, FFI_TYPE_UINT32 + breq .Lstore32 + cp.w r9, FFI_TYPE_UINT16 + breq .Lstore32 + cp.w r9, FFI_TYPE_UINT8 + breq .Lstore32 + + /* Return 64bit */ + cp.w r9, FFI_TYPE_UINT64 + breq .Lstore64 + + /* Didn't match anything */ + bral .Lend + +.Lstore64: + st.w r8[0], r11 + st.w r8[4], r10 + bral .Lend + +.Lstore32: + st.w r8[0], r12 + bral .Lend + +.Lstore16: + st.h r8[0], r12 + bral .Lend + +.Lstore8: + st.b r8[0], r12 + bral .Lend + +.Lend: + sub sp, -20 + ldm sp++, r0,r1,pc + + .size ffi_call_SYSV, . - ffi_call_SYSV + + + /* r12: __ctx + * r11: __rstruct_flag + * r10: __inner */ + + .align 1 + .globl ffi_closure_SYSV + .type ffi_closure_SYSV, @function +ffi_closure_SYSV: + stm --sp, r0,lr + mov r0, r11 + mov r8, r10 + sub r10, sp, -8 + sub sp, 12 + st.w sp[8], sp + sub r11, sp, -8 + icall r8 + + /* Check if return type is actually a struct */ + cp.w r0, 0 + breq 1f + + /* Return 8bit */ + cp.w r12, FFI_TYPE_UINT8 + breq .Lget8 + + /* Return 16bit */ + cp.w r12, FFI_TYPE_UINT16 + breq .Lget16 + +1: + /* Return 32bit */ + cp.w r12, FFI_TYPE_UINT32 + breq .Lget32 + cp.w r12, FFI_TYPE_UINT16 + breq .Lget32 + cp.w r12, FFI_TYPE_UINT8 + breq .Lget32 + + /* Return 64bit */ + cp.w r12, FFI_TYPE_UINT64 + breq .Lget64 + + /* Didn't match anything */ + bral .Lclend + +.Lget64: + ld.w r11, sp[0] + ld.w r10, sp[4] + bral .Lclend + +.Lget32: + ld.w r12, sp[0] + bral .Lclend + +.Lget16: + ld.uh r12, sp[0] + bral .Lclend + +.Lget8: + ld.ub r12, sp[0] + bral .Lclend + +.Lclend: + sub sp, -12 + ldm sp++, r0,lr + sub sp, -20 + mov pc, lr + + .size ffi_closure_SYSV, . - ffi_closure_SYSV + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/closures.c b/libffi/src/closures.c new file mode 100644 index 000000000..ff2b1bd21 --- /dev/null +++ b/libffi/src/closures.c @@ -0,0 +1,610 @@ +/* ----------------------------------------------------------------------- + closures.c - Copyright (c) 2007 Red Hat, Inc. + Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc + + Code to allocate and deallocate memory for closures. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#if defined __linux__ && !defined _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif + +#include <ffi.h> +#include <ffi_common.h> + +#ifndef FFI_MMAP_EXEC_WRIT +# if __gnu_linux__ +/* This macro indicates it may be forbidden to map anonymous memory + with both write and execute permission. Code compiled when this + option is defined will attempt to map such pages once, but if it + fails, it falls back to creating a temporary file in a writable and + executable filesystem and mapping pages from it into separate + locations in the virtual memory space, one location writable and + another executable. */ +# define FFI_MMAP_EXEC_WRIT 1 +# define HAVE_MNTENT 1 +# endif +# if defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__) +/* Windows systems may have Data Execution Protection (DEP) enabled, + which requires the use of VirtualMalloc/VirtualFree to alloc/free + executable memory. */ +# define FFI_MMAP_EXEC_WRIT 1 +# endif +#endif + +#if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX +# ifdef __linux__ +/* When defined to 1 check for SELinux and if SELinux is active, + don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that + might cause audit messages. */ +# define FFI_MMAP_EXEC_SELINUX 1 +# endif +#endif + +#if FFI_CLOSURES + +# if FFI_MMAP_EXEC_WRIT + +#define USE_LOCKS 1 +#define USE_DL_PREFIX 1 +#ifdef __GNUC__ +#ifndef USE_BUILTIN_FFS +#define USE_BUILTIN_FFS 1 +#endif +#endif + +/* We need to use mmap, not sbrk. */ +#define HAVE_MORECORE 0 + +/* We could, in theory, support mremap, but it wouldn't buy us anything. */ +#define HAVE_MREMAP 0 + +/* We have no use for this, so save some code and data. */ +#define NO_MALLINFO 1 + +/* We need all allocations to be in regular segments, otherwise we + lose track of the corresponding code address. */ +#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T + +/* Don't allocate more than a page unless needed. */ +#define DEFAULT_GRANULARITY ((size_t)malloc_getpagesize) + +#if FFI_CLOSURE_TEST +/* Don't release single pages, to avoid a worst-case scenario of + continuously allocating and releasing single pages, but release + pairs of pages, which should do just as well given that allocations + are likely to be small. */ +#define DEFAULT_TRIM_THRESHOLD ((size_t)malloc_getpagesize) +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#ifndef _MSC_VER +#include <unistd.h> +#endif +#include <string.h> +#include <stdio.h> +#if !defined(X86_WIN32) && !defined(X86_WIN64) +#ifdef HAVE_MNTENT +#include <mntent.h> +#endif /* HAVE_MNTENT */ +#include <sys/param.h> +#include <pthread.h> + +/* We don't want sys/mman.h to be included after we redefine mmap and + dlmunmap. */ +#include <sys/mman.h> +#define LACKS_SYS_MMAN_H 1 + +#if FFI_MMAP_EXEC_SELINUX +#include <sys/statfs.h> +#include <stdlib.h> + +static int selinux_enabled = -1; + +static int +selinux_enabled_check (void) +{ + struct statfs sfs; + FILE *f; + char *buf = NULL; + size_t len = 0; + + if (statfs ("/selinux", &sfs) >= 0 + && (unsigned int) sfs.f_type == 0xf97cff8cU) + return 1; + f = fopen ("/proc/mounts", "r"); + if (f == NULL) + return 0; + while (getline (&buf, &len, f) >= 0) + { + char *p = strchr (buf, ' '); + if (p == NULL) + break; + p = strchr (p + 1, ' '); + if (p == NULL) + break; + if (strncmp (p + 1, "selinuxfs ", 10) == 0) + { + free (buf); + fclose (f); + return 1; + } + } + free (buf); + fclose (f); + return 0; +} + +#define is_selinux_enabled() (selinux_enabled >= 0 ? selinux_enabled \ + : (selinux_enabled = selinux_enabled_check ())) + +#else + +#define is_selinux_enabled() 0 + +#endif /* !FFI_MMAP_EXEC_SELINUX */ + +#elif defined (__CYGWIN__) + +#include <sys/mman.h> + +/* Cygwin is Linux-like, but not quite that Linux-like. */ +#define is_selinux_enabled() 0 + +#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */ + +/* Declare all functions defined in dlmalloc.c as static. */ +static void *dlmalloc(size_t); +static void dlfree(void*); +static void *dlcalloc(size_t, size_t) MAYBE_UNUSED; +static void *dlrealloc(void *, size_t) MAYBE_UNUSED; +static void *dlmemalign(size_t, size_t) MAYBE_UNUSED; +static void *dlvalloc(size_t) MAYBE_UNUSED; +static int dlmallopt(int, int) MAYBE_UNUSED; +static size_t dlmalloc_footprint(void) MAYBE_UNUSED; +static size_t dlmalloc_max_footprint(void) MAYBE_UNUSED; +static void** dlindependent_calloc(size_t, size_t, void**) MAYBE_UNUSED; +static void** dlindependent_comalloc(size_t, size_t*, void**) MAYBE_UNUSED; +static void *dlpvalloc(size_t) MAYBE_UNUSED; +static int dlmalloc_trim(size_t) MAYBE_UNUSED; +static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED; +static void dlmalloc_stats(void) MAYBE_UNUSED; + +#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) +/* Use these for mmap and munmap within dlmalloc.c. */ +static void *dlmmap(void *, size_t, int, int, int, off_t); +static int dlmunmap(void *, size_t); +#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) */ + +#define mmap dlmmap +#define munmap dlmunmap + +#include "dlmalloc.c" + +#undef mmap +#undef munmap + +#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) + +/* A mutex used to synchronize access to *exec* variables in this file. */ +static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* A file descriptor of a temporary file from which we'll map + executable pages. */ +static int execfd = -1; + +/* The amount of space already allocated from the temporary file. */ +static size_t execsize = 0; + +/* Open a temporary file name, and immediately unlink it. */ +static int +open_temp_exec_file_name (char *name) +{ + int fd = mkstemp (name); + + if (fd != -1) + unlink (name); + + return fd; +} + +/* Open a temporary file in the named directory. */ +static int +open_temp_exec_file_dir (const char *dir) +{ + static const char suffix[] = "/ffiXXXXXX"; + int lendir = strlen (dir); + char *tempname = __builtin_alloca (lendir + sizeof (suffix)); + + if (!tempname) + return -1; + + memcpy (tempname, dir, lendir); + memcpy (tempname + lendir, suffix, sizeof (suffix)); + + return open_temp_exec_file_name (tempname); +} + +/* Open a temporary file in the directory in the named environment + variable. */ +static int +open_temp_exec_file_env (const char *envvar) +{ + const char *value = getenv (envvar); + + if (!value) + return -1; + + return open_temp_exec_file_dir (value); +} + +#ifdef HAVE_MNTENT +/* Open a temporary file in an executable and writable mount point + listed in the mounts file. Subsequent calls with the same mounts + keep searching for mount points in the same file. Providing NULL + as the mounts file closes the file. */ +static int +open_temp_exec_file_mnt (const char *mounts) +{ + static const char *last_mounts; + static FILE *last_mntent; + + if (mounts != last_mounts) + { + if (last_mntent) + endmntent (last_mntent); + + last_mounts = mounts; + + if (mounts) + last_mntent = setmntent (mounts, "r"); + else + last_mntent = NULL; + } + + if (!last_mntent) + return -1; + + for (;;) + { + int fd; + struct mntent mnt; + char buf[MAXPATHLEN * 3]; + + if (getmntent_r (last_mntent, &mnt, buf, sizeof (buf)) == NULL) + return -1; + + if (hasmntopt (&mnt, "ro") + || hasmntopt (&mnt, "noexec") + || access (mnt.mnt_dir, W_OK)) + continue; + + fd = open_temp_exec_file_dir (mnt.mnt_dir); + + if (fd != -1) + return fd; + } +} +#endif /* HAVE_MNTENT */ + +/* Instructions to look for a location to hold a temporary file that + can be mapped in for execution. */ +static struct +{ + int (*func)(const char *); + const char *arg; + int repeat; +} open_temp_exec_file_opts[] = { + { open_temp_exec_file_env, "TMPDIR", 0 }, + { open_temp_exec_file_dir, "/tmp", 0 }, + { open_temp_exec_file_dir, "/var/tmp", 0 }, + { open_temp_exec_file_dir, "/dev/shm", 0 }, + { open_temp_exec_file_env, "HOME", 0 }, +#ifdef HAVE_MNTENT + { open_temp_exec_file_mnt, "/etc/mtab", 1 }, + { open_temp_exec_file_mnt, "/proc/mounts", 1 }, +#endif /* HAVE_MNTENT */ +}; + +/* Current index into open_temp_exec_file_opts. */ +static int open_temp_exec_file_opts_idx = 0; + +/* Reset a current multi-call func, then advances to the next entry. + If we're at the last, go back to the first and return nonzero, + otherwise return zero. */ +static int +open_temp_exec_file_opts_next (void) +{ + if (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat) + open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func (NULL); + + open_temp_exec_file_opts_idx++; + if (open_temp_exec_file_opts_idx + == (sizeof (open_temp_exec_file_opts) + / sizeof (*open_temp_exec_file_opts))) + { + open_temp_exec_file_opts_idx = 0; + return 1; + } + + return 0; +} + +/* Return a file descriptor of a temporary zero-sized file in a + writable and exexutable filesystem. */ +static int +open_temp_exec_file (void) +{ + int fd; + + do + { + fd = open_temp_exec_file_opts[open_temp_exec_file_opts_idx].func + (open_temp_exec_file_opts[open_temp_exec_file_opts_idx].arg); + + if (!open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat + || fd == -1) + { + if (open_temp_exec_file_opts_next ()) + break; + } + } + while (fd == -1); + + return fd; +} + +/* Map in a chunk of memory from the temporary exec file into separate + locations in the virtual memory address space, one writable and one + executable. Returns the address of the writable portion, after + storing an offset to the corresponding executable portion at the + last word of the requested chunk. */ +static void * +dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset) +{ + void *ptr; + + if (execfd == -1) + { + open_temp_exec_file_opts_idx = 0; + retry_open: + execfd = open_temp_exec_file (); + if (execfd == -1) + return MFAIL; + } + + offset = execsize; + + if (ftruncate (execfd, offset + length)) + return MFAIL; + + flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS); + flags |= MAP_SHARED; + + ptr = mmap (NULL, length, (prot & ~PROT_WRITE) | PROT_EXEC, + flags, execfd, offset); + if (ptr == MFAIL) + { + if (!offset) + { + close (execfd); + goto retry_open; + } + ftruncate (execfd, offset); + return MFAIL; + } + else if (!offset + && open_temp_exec_file_opts[open_temp_exec_file_opts_idx].repeat) + open_temp_exec_file_opts_next (); + + start = mmap (start, length, prot, flags, execfd, offset); + + if (start == MFAIL) + { + munmap (ptr, length); + ftruncate (execfd, offset); + return start; + } + + mmap_exec_offset ((char *)start, length) = (char*)ptr - (char*)start; + + execsize += length; + + return start; +} + +/* Map in a writable and executable chunk of memory if possible. + Failing that, fall back to dlmmap_locked. */ +static void * +dlmmap (void *start, size_t length, int prot, + int flags, int fd, off_t offset) +{ + void *ptr; + + assert (start == NULL && length % malloc_getpagesize == 0 + && prot == (PROT_READ | PROT_WRITE) + && flags == (MAP_PRIVATE | MAP_ANONYMOUS) + && fd == -1 && offset == 0); + +#if FFI_CLOSURE_TEST + printf ("mapping in %zi\n", length); +#endif + + if (execfd == -1 && !is_selinux_enabled ()) + { + ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset); + + if (ptr != MFAIL || (errno != EPERM && errno != EACCES)) + /* Cool, no need to mess with separate segments. */ + return ptr; + + /* If MREMAP_DUP is ever introduced and implemented, try mmap + with ((prot & ~PROT_WRITE) | PROT_EXEC) and mremap with + MREMAP_DUP and prot at this point. */ + } + + if (execsize == 0 || execfd == -1) + { + pthread_mutex_lock (&open_temp_exec_file_mutex); + ptr = dlmmap_locked (start, length, prot, flags, offset); + pthread_mutex_unlock (&open_temp_exec_file_mutex); + + return ptr; + } + + return dlmmap_locked (start, length, prot, flags, offset); +} + +/* Release memory at the given address, as well as the corresponding + executable page if it's separate. */ +static int +dlmunmap (void *start, size_t length) +{ + /* We don't bother decreasing execsize or truncating the file, since + we can't quite tell whether we're unmapping the end of the file. + We don't expect frequent deallocation anyway. If we did, we + could locate pages in the file by writing to the pages being + deallocated and checking that the file contents change. + Yuck. */ + msegmentptr seg = segment_holding (gm, start); + void *code; + +#if FFI_CLOSURE_TEST + printf ("unmapping %zi\n", length); +#endif + + if (seg && (code = add_segment_exec_offset (start, seg)) != start) + { + int ret = munmap (code, length); + if (ret) + return ret; + } + + return munmap (start, length); +} + +#if FFI_CLOSURE_FREE_CODE +/* Return segment holding given code address. */ +static msegmentptr +segment_holding_code (mstate m, char* addr) +{ + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= add_segment_exec_offset (sp->base, sp) + && addr < add_segment_exec_offset (sp->base, sp) + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} +#endif + +#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(__OS2__)) || defined (__CYGWIN__) */ + +/* Allocate a chunk of memory with the given size. Returns a pointer + to the writable address, and sets *CODE to the executable + corresponding virtual address. */ +void * +ffi_closure_alloc (size_t size, void **code) +{ + void *ptr; + + if (!code) + return NULL; + + ptr = dlmalloc (size); + + if (ptr) + { + msegmentptr seg = segment_holding (gm, ptr); + + *code = add_segment_exec_offset (ptr, seg); + } + + return ptr; +} + +/* Release a chunk of memory allocated with ffi_closure_alloc. If + FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the + writable or the executable address given. Otherwise, only the + writable address can be provided here. */ +void +ffi_closure_free (void *ptr) +{ +#if FFI_CLOSURE_FREE_CODE + msegmentptr seg = segment_holding_code (gm, ptr); + + if (seg) + ptr = sub_segment_exec_offset (ptr, seg); +#endif + + dlfree (ptr); +} + + +#if FFI_CLOSURE_TEST +/* Do some internal sanity testing to make sure allocation and + deallocation of pages are working as intended. */ +int main () +{ + void *p[3]; +#define GET(idx, len) do { p[idx] = dlmalloc (len); printf ("allocated %zi for p[%i]\n", (len), (idx)); } while (0) +#define PUT(idx) do { printf ("freeing p[%i]\n", (idx)); dlfree (p[idx]); } while (0) + GET (0, malloc_getpagesize / 2); + GET (1, 2 * malloc_getpagesize - 64 * sizeof (void*)); + PUT (1); + GET (1, 2 * malloc_getpagesize); + GET (2, malloc_getpagesize / 2); + PUT (1); + PUT (0); + PUT (2); + return 0; +} +#endif /* FFI_CLOSURE_TEST */ +# else /* ! FFI_MMAP_EXEC_WRIT */ + +/* On many systems, memory returned by malloc is writable and + executable, so just use it. */ + +#include <stdlib.h> + +void * +ffi_closure_alloc (size_t size, void **code) +{ + if (!code) + return NULL; + + return *code = malloc (size); +} + +void +ffi_closure_free (void *ptr) +{ + free (ptr); +} + +# endif /* ! FFI_MMAP_EXEC_WRIT */ +#endif /* FFI_CLOSURES */ diff --git a/libffi/src/cris/ffi.c b/libffi/src/cris/ffi.c new file mode 100644 index 000000000..e9c39530c --- /dev/null +++ b/libffi/src/cris/ffi.c @@ -0,0 +1,383 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1998 Cygnus Solutions + Copyright (c) 2004 Simon Posnjak + Copyright (c) 2005 Axis Communications AB + Copyright (C) 2007 Free Software Foundation, Inc. + + CRIS Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL SIMON POSNJAK BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG) + +static ffi_status +initialize_aggregate_packed_struct (ffi_type * arg) +{ + ffi_type **ptr; + + FFI_ASSERT (arg != NULL); + + FFI_ASSERT (arg->elements != NULL); + FFI_ASSERT (arg->size == 0); + FFI_ASSERT (arg->alignment == 0); + + ptr = &(arg->elements[0]); + + while ((*ptr) != NULL) + { + if (((*ptr)->size == 0) + && (initialize_aggregate_packed_struct ((*ptr)) != FFI_OK)) + return FFI_BAD_TYPEDEF; + + FFI_ASSERT (ffi_type_test ((*ptr))); + + arg->size += (*ptr)->size; + + arg->alignment = (arg->alignment > (*ptr)->alignment) ? + arg->alignment : (*ptr)->alignment; + + ptr++; + } + + if (arg->size == 0) + return FFI_BAD_TYPEDEF; + else + return FFI_OK; +} + +int +ffi_prep_args (char *stack, extended_cif * ecif) +{ + unsigned int i; + unsigned int struct_count = 0; + void **p_argv; + char *argp; + ffi_type **p_arg; + + argp = stack; + + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + (i != 0); i--, p_arg++) + { + size_t z; + + switch ((*p_arg)->type) + { + case FFI_TYPE_STRUCT: + { + z = (*p_arg)->size; + if (z <= 4) + { + memcpy (argp, *p_argv, z); + z = 4; + } + else if (z <= 8) + { + memcpy (argp, *p_argv, z); + z = 8; + } + else + { + unsigned int uiLocOnStack; + z = sizeof (void *); + uiLocOnStack = 4 * ecif->cif->nargs + struct_count; + struct_count = struct_count + (*p_arg)->size; + *(unsigned int *) argp = + (unsigned int) (UINT32 *) (stack + uiLocOnStack); + memcpy ((stack + uiLocOnStack), *p_argv, (*p_arg)->size); + } + break; + } + default: + z = (*p_arg)->size; + if (z < sizeof (int)) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int) *(SINT8 *) (*p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = + (unsigned int) *(UINT8 *) (*p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int) *(SINT16 *) (*p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = + (unsigned int) *(UINT16 *) (*p_argv); + break; + + default: + FFI_ASSERT (0); + } + z = sizeof (int); + } + else if (z == sizeof (int)) + *(unsigned int *) argp = (unsigned int) *(UINT32 *) (*p_argv); + else + memcpy (argp, *p_argv, z); + break; + } + p_argv++; + argp += z; + } + + return (struct_count); +} + +ffi_status +ffi_prep_cif (ffi_cif * cif, + ffi_abi abi, unsigned int nargs, + ffi_type * rtype, ffi_type ** atypes) +{ + unsigned bytes = 0; + unsigned int i; + ffi_type **ptr; + + FFI_ASSERT (cif != NULL); + FFI_ASSERT ((abi > FFI_FIRST_ABI) && (abi <= FFI_DEFAULT_ABI)); + + cif->abi = abi; + cif->arg_types = atypes; + cif->nargs = nargs; + cif->rtype = rtype; + + cif->flags = 0; + + if ((cif->rtype->size == 0) + && (initialize_aggregate_packed_struct (cif->rtype) != FFI_OK)) + return FFI_BAD_TYPEDEF; + + FFI_ASSERT_VALID_TYPE (cif->rtype); + + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + if (((*ptr)->size == 0) + && (initialize_aggregate_packed_struct ((*ptr)) != FFI_OK)) + return FFI_BAD_TYPEDEF; + + FFI_ASSERT_VALID_TYPE (*ptr); + + if (((*ptr)->alignment - 1) & bytes) + bytes = ALIGN (bytes, (*ptr)->alignment); + if ((*ptr)->type == FFI_TYPE_STRUCT) + { + if ((*ptr)->size > 8) + { + bytes += (*ptr)->size; + bytes += sizeof (void *); + } + else + { + if ((*ptr)->size > 4) + bytes += 8; + else + bytes += 4; + } + } + else + bytes += STACK_ARG_SIZE ((*ptr)->size); + } + + cif->bytes = bytes; + + return ffi_prep_cif_machdep (cif); +} + +ffi_status +ffi_prep_cif_machdep (ffi_cif * cif) +{ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags = (unsigned) cif->rtype->type; + break; + + default: + cif->flags = FFI_TYPE_INT; + break; + } + + return FFI_OK; +} + +extern void ffi_call_SYSV (int (*)(char *, extended_cif *), + extended_cif *, + unsigned, unsigned, unsigned *, void (*fn) ()) + __attribute__ ((__visibility__ ("hidden"))); + +void +ffi_call (ffi_cif * cif, void (*fn) (), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca (cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV (ffi_prep_args, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + break; + default: + FFI_ASSERT (0); + break; + } +} + +/* Because the following variables are not exported outside libffi, we + mark them hidden. */ + +/* Assembly code for the jump stub. */ +extern const char ffi_cris_trampoline_template[] + __attribute__ ((__visibility__ ("hidden"))); + +/* Offset into ffi_cris_trampoline_template of where to put the + ffi_prep_closure_inner function. */ +extern const int ffi_cris_trampoline_fn_offset + __attribute__ ((__visibility__ ("hidden"))); + +/* Offset into ffi_cris_trampoline_template of where to put the + closure data. */ +extern const int ffi_cris_trampoline_closure_offset + __attribute__ ((__visibility__ ("hidden"))); + +/* This function is sibling-called (jumped to) by the closure + trampoline. We get R10..R13 at PARAMS[0..3] and a copy of [SP] at + PARAMS[4] to simplify handling of a straddling parameter. A copy + of R9 is at PARAMS[5] and SP at PARAMS[6]. These parameters are + put at the appropriate place in CLOSURE which is then executed and + the return value is passed back to the caller. */ + +static unsigned long long +ffi_prep_closure_inner (void **params, ffi_closure* closure) +{ + char *register_args = (char *) params; + void *struct_ret = params[5]; + char *stack_args = params[6]; + char *ptr = register_args; + ffi_cif *cif = closure->cif; + ffi_type **arg_types = cif->arg_types; + + /* Max room needed is number of arguments as 64-bit values. */ + void **avalue = alloca (closure->cif->nargs * sizeof(void *)); + int i; + int doing_regs; + long long llret = 0; + + /* Find the address of each argument. */ + for (i = 0, doing_regs = 1; i < cif->nargs; i++) + { + /* Types up to and including 8 bytes go by-value. */ + if (arg_types[i]->size <= 4) + { + avalue[i] = ptr; + ptr += 4; + } + else if (arg_types[i]->size <= 8) + { + avalue[i] = ptr; + ptr += 8; + } + else + { + FFI_ASSERT (arg_types[i]->type == FFI_TYPE_STRUCT); + + /* Passed by-reference, so copy the pointer. */ + avalue[i] = *(void **) ptr; + ptr += 4; + } + + /* If we've handled more arguments than fit in registers, start + looking at the those passed on the stack. Step over the + first one if we had a straddling parameter. */ + if (doing_regs && ptr >= register_args + 4*4) + { + ptr = stack_args + ((ptr > register_args + 4*4) ? 4 : 0); + doing_regs = 0; + } + } + + /* Invoke the closure. */ + (closure->fun) (cif, + + cif->rtype->type == FFI_TYPE_STRUCT + /* The caller allocated space for the return + structure, and passed a pointer to this space in + R9. */ + ? struct_ret + + /* We take advantage of being able to ignore that + the high part isn't set if the return value is + not in R10:R11, but in R10 only. */ + : (void *) &llret, + + avalue, closure->user_data); + + return llret; +} + +/* API function: Prepare the trampoline. */ + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif *, void *, void **, void*), + void *user_data, + void *codeloc) +{ + void *innerfn = ffi_prep_closure_inner; + FFI_ASSERT (cif->abi == FFI_SYSV); + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + memcpy (closure->tramp, ffi_cris_trampoline_template, + FFI_CRIS_TRAMPOLINE_CODE_PART_SIZE); + memcpy (closure->tramp + ffi_cris_trampoline_fn_offset, + &innerfn, sizeof (void *)); + memcpy (closure->tramp + ffi_cris_trampoline_closure_offset, + &codeloc, sizeof (void *)); + + return FFI_OK; +} diff --git a/libffi/src/cris/ffitarget.h b/libffi/src/cris/ffitarget.h new file mode 100644 index 000000000..4257f10a7 --- /dev/null +++ b/libffi/src/cris/ffitarget.h @@ -0,0 +1,51 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for CRIS. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_CRIS_TRAMPOLINE_CODE_PART_SIZE 36 +#define FFI_CRIS_TRAMPOLINE_DATA_PART_SIZE (7*4) +#define FFI_TRAMPOLINE_SIZE \ + (FFI_CRIS_TRAMPOLINE_CODE_PART_SIZE + FFI_CRIS_TRAMPOLINE_DATA_PART_SIZE) +#define FFI_NATIVE_RAW_API 0 + +#endif diff --git a/libffi/src/cris/sysv.S b/libffi/src/cris/sysv.S new file mode 100644 index 000000000..79abaee4d --- /dev/null +++ b/libffi/src/cris/sysv.S @@ -0,0 +1,215 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2004 Simon Posnjak + Copyright (c) 2005 Axis Communications AB + + CRIS Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL SIMON POSNJAK BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <ffi.h> +#define CONCAT(x,y) x ## y +#define XCONCAT(x,y) CONCAT (x, y) +#define L(x) XCONCAT (__USER_LABEL_PREFIX__, x) + + .text + + ;; OK, when we get called we should have this (according to + ;; AXIS ETRAX 100LX Programmer's Manual chapter 6.3). + ;; + ;; R10: ffi_prep_args (func. pointer) + ;; R11: &ecif + ;; R12: cif->bytes + ;; R13: fig->flags + ;; sp+0: ecif.rvalue + ;; sp+4: fn (function pointer to the function that we need to call) + + .globl L(ffi_call_SYSV) + .type L(ffi_call_SYSV),@function + .hidden L(ffi_call_SYSV) + +L(ffi_call_SYSV): + ;; Save the regs to the stack. + push $srp + ;; Used for stack pointer saving. + push $r6 + ;; Used for function address pointer. + push $r7 + ;; Used for stack pointer saving. + push $r8 + ;; We save fig->flags to stack we will need them after we + ;; call The Function. + push $r13 + + ;; Saving current stack pointer. + move.d $sp,$r8 + move.d $sp,$r6 + + ;; Move address of ffi_prep_args to r13. + move.d $r10,$r13 + + ;; Make room on the stack for the args of fn. + sub.d $r12,$sp + + ;; Function void ffi_prep_args(char *stack, extended_cif *ecif) parameters are: + ;; r10 <-- stack pointer + ;; r11 <-- &ecif (already there) + move.d $sp,$r10 + + ;; Call the function. + jsr $r13 + + ;; Save the size of the structures which are passed on stack. + move.d $r10,$r7 + + ;; Move first four args in to r10..r13. + move.d [$sp+0],$r10 + move.d [$sp+4],$r11 + move.d [$sp+8],$r12 + move.d [$sp+12],$r13 + + ;; Adjust the stack and check if any parameters are given on stack. + addq 16,$sp + sub.d $r7,$r6 + cmp.d $sp,$r6 + + bpl go_on + nop + +go_on_no_params_on_stack: + move.d $r6,$sp + +go_on: + ;; Discover if we need to put rval address in to r9. + move.d [$r8+0],$r7 + cmpq FFI_TYPE_STRUCT,$r7 + bne call_now + nop + + ;; Move rval address to $r9. + move.d [$r8+20],$r9 + +call_now: + ;; Move address of The Function in to r7. + move.d [$r8+24],$r7 + + ;; Call The Function. + jsr $r7 + + ;; Reset stack. + move.d $r8,$sp + + ;; Load rval type (fig->flags) in to r13. + pop $r13 + + ;; Detect rval type. + cmpq FFI_TYPE_VOID,$r13 + beq epilogue + + cmpq FFI_TYPE_STRUCT,$r13 + beq epilogue + + cmpq FFI_TYPE_DOUBLE,$r13 + beq return_double_or_longlong + + cmpq FFI_TYPE_UINT64,$r13 + beq return_double_or_longlong + + cmpq FFI_TYPE_SINT64,$r13 + beq return_double_or_longlong + nop + + ;; Just return the 32 bit value. + ba return + nop + +return_double_or_longlong: + ;; Load half of the rval to r10 and the other half to r11. + move.d [$sp+16],$r13 + move.d $r10,[$r13] + addq 4,$r13 + move.d $r11,[$r13] + ba epilogue + nop + +return: + ;; Load the rval to r10. + move.d [$sp+16],$r13 + move.d $r10,[$r13] + +epilogue: + pop $r8 + pop $r7 + pop $r6 + Jump [$sp+] + + .size ffi_call_SYSV,.-ffi_call_SYSV + +/* Save R10..R13 into an array, somewhat like varargs. Copy the next + argument too, to simplify handling of any straddling parameter. + Save R9 and SP after those. Jump to function handling the rest. + Since this is a template, copied and the main function filled in by + the user. */ + + .globl L(ffi_cris_trampoline_template) + .type L(ffi_cris_trampoline_template),@function + .hidden L(ffi_cris_trampoline_template) + +L(ffi_cris_trampoline_template): +0: + /* The value we get for "PC" is right after the prefix instruction, + two bytes from the beginning, i.e. 0b+2. */ + move.d $r10,[$pc+2f-(0b+2)] + move.d $pc,$r10 +1: + addq 2f-1b+4,$r10 + move.d $r11,[$r10+] + move.d $r12,[$r10+] + move.d $r13,[$r10+] + move.d [$sp],$r11 + move.d $r11,[$r10+] + move.d $r9,[$r10+] + move.d $sp,[$r10+] + subq FFI_CRIS_TRAMPOLINE_DATA_PART_SIZE,$r10 + move.d 0,$r11 +3: + jump 0 +2: + .size ffi_cris_trampoline_template,.-0b + +/* This macro create a constant usable as "extern const int \name" in + C from within libffi, when \name has no prefix decoration. */ + + .macro const name,value + .globl \name + .type \name,@object + .hidden \name +\name: + .dword \value + .size \name,4 + .endm + +/* Constants for offsets within the trampoline. We could do this with + just symbols, avoiding memory contents and memory accesses, but the + C usage code would look a bit stranger. */ + + const L(ffi_cris_trampoline_fn_offset),2b-4-0b + const L(ffi_cris_trampoline_closure_offset),3b-4-0b diff --git a/libffi/src/debug.c b/libffi/src/debug.c new file mode 100644 index 000000000..51dcfcf22 --- /dev/null +++ b/libffi/src/debug.c @@ -0,0 +1,59 @@ +/* ----------------------------------------------------------------------- + debug.c - Copyright (c) 1996 Red Hat, Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> +#include <stdio.h> + +/* General debugging routines */ + +void ffi_stop_here(void) +{ + /* This function is only useful for debugging purposes. + Place a breakpoint on ffi_stop_here to be notified of + significant events. */ +} + +/* This function should only be called via the FFI_ASSERT() macro */ + +void ffi_assert(char *expr, char *file, int line) +{ + fprintf(stderr, "ASSERTION FAILURE: %s at %s:%d\n", expr, file, line); + ffi_stop_here(); + abort(); +} + +/* Perform a sanity check on an ffi_type structure */ + +void ffi_type_test(ffi_type *a, char *file, int line) +{ + FFI_ASSERT_AT(a != NULL, file, line); + + FFI_ASSERT_AT(a->type <= FFI_TYPE_LAST, file, line); + FFI_ASSERT_AT(a->type == FFI_TYPE_VOID || a->size > 0, file, line); + FFI_ASSERT_AT(a->type == FFI_TYPE_VOID || a->alignment > 0, file, line); + FFI_ASSERT_AT(a->type != FFI_TYPE_STRUCT || a->elements != NULL, file, line); + +} diff --git a/libffi/src/dlmalloc.c b/libffi/src/dlmalloc.c new file mode 100644 index 000000000..0fa235af2 --- /dev/null +++ b/libffi/src/dlmalloc.c @@ -0,0 +1,5158 @@ +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at + http://creativecommons.org/licenses/publicdomain. Send questions, + comments, complaints, performance data, etc to dl@cs.oswego.edu + +* Version 2.8.3 Thu Sep 22 11:16:15 2005 Doug Lea (dl at gee) + + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O3), and link it into another program. All of + the compile-time options default to reasonable values for use on + most platforms. You might later want to step through various + compile-time and dynamic tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.3.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. Note that you may already by default be using a C + library containing a malloc that is based on some version of this + malloc (for example in linux). You might still want to use the one + in this file to customize settings or to avoid overheads associated + with library versions. + +* Vital statistics: + + Supported pointer/size_t representation: 4 or 8 bytes + size_t MUST be an unsigned type of the same width as + pointers. (If you are using an ancient system that declares + size_t as a signed type, or need it to be a different width + than pointers, you can use a previous release of this malloc + (e.g. 2.7.2) supporting these.) + + Alignment: 8 bytes (default) + This suffices for nearly all current machines and C compilers. + However, you can define MALLOC_ALIGNMENT to be wider than this + if necessary (up to 128bytes), at the expense of using more space. + + Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) + 8 or 16 bytes (if 8byte sizes) + Each malloced chunk has a hidden word of overhead holding size + and status information, and additional cross-check word + if FOOTERS is defined. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) + 8-byte ptrs: 32 bytes (including overhead) + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is about + 32 bytes plus the remainder from a system page (the minimal + mmap unit); typically 4096 or 8192 bytes. + + Security: static-safe; optionally more or less + The "security" of malloc refers to the ability of malicious + code to accentuate the effects of errors (for example, freeing + space that is not currently malloc'ed or overwriting past the + ends of chunks) in code that calls malloc. This malloc + guarantees not to modify any memory locations below the base of + heap, i.e., static variables, even in the presence of usage + errors. The routines additionally detect most improper frees + and reallocs. All this holds as long as the static bookkeeping + for malloc itself is not corrupted by some other means. This + is only one aspect of security -- these checks do not, and + cannot, detect all possible programming errors. + + If FOOTERS is defined nonzero, then each allocated chunk + carries an additional check word to verify that it was malloced + from its space. These check words are the same within each + execution of a program using malloc, but differ across + executions, so externally crafted fake chunks cannot be + freed. This improves security by rejecting frees/reallocs that + could corrupt heap memory, in addition to the checks preventing + writes to statics that are always on. This may further improve + security at the expense of time and space overhead. (Note that + FOOTERS may also be worth using with MSPACES.) + + By default detected errors cause the program to abort (calling + "abort()"). You can override this to instead proceed past + errors by defining PROCEED_ON_ERROR. In this case, a bad free + has no effect, and a malloc that encounters a bad address + caused by user overwrites will ignore the bad address by + dropping pointers and indices to all known memory. This may + be appropriate for programs that should continue if at all + possible in the face of programming errors, although they may + run out of memory because dropped memory is never reclaimed. + + If you don't like either of these options, you can define + CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything + else. And if if you are sure that your program using malloc has + no errors or vulnerabilities, you can define INSECURE to 1, + which might (or might not) provide a small performance improvement. + + Thread-safety: NOT thread-safe unless USE_LOCKS defined + When USE_LOCKS is defined, each public call to malloc, free, + etc is surrounded with either a pthread mutex or a win32 + spinlock (depending on WIN32). This is not especially fast, and + can be a major bottleneck. It is designed only to provide + minimal protection in concurrent environments, and to provide a + basis for extensions. If you are using malloc in a concurrent + program, consider instead using ptmalloc, which is derived from + a version of this malloc. (See http://www.malloc.de). + + System requirements: Any combination of MORECORE and/or MMAP/MUNMAP + This malloc can use unix sbrk or any emulation (invoked using + the CALL_MORECORE macro) and/or mmap/munmap or any emulation + (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system + memory. On most unix systems, it tends to work best if both + MORECORE and MMAP are enabled. On Win32, it uses emulations + based on VirtualAlloc. It also uses common C library functions + like memset. + + Compliance: I believe it is compliant with the Single Unix Specification + (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Overview of algorithms + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and + tunable. Consistent balance across these factors results in a good + general-purpose allocator for malloc-intensive programs. + + In most ways, this malloc is a best-fit allocator. Generally, it + chooses the best-fitting existing chunk for a request, with ties + broken in approximately least-recently-used order. (This strategy + normally maintains low fragmentation.) However, for requests less + than 256bytes, it deviates from best-fit when there is not an + exactly fitting available chunk by preferring to use space adjacent + to that used for the previous small request, as well as by breaking + ties in approximately most-recently-used order. (These enhance + locality of series of small allocations.) And for very large requests + (>= 256Kb by default), it relies on system memory mapping + facilities, if supported. (This helps avoid carrying around and + possibly fragmenting memory used only for large chunks.) + + All operations (except malloc_stats and mallinfo) have execution + times that are bounded by a constant factor of the number of bits in + a size_t, not counting any clearing in calloc or copying in realloc, + or actions surrounding MORECORE and MMAP that have times + proportional to the number of non-contiguous regions returned by + system allocation routines, which is often just 1. + + The implementation is not very modular and seriously overuses + macros. Perhaps someday all C compilers will do as good a job + inlining modular code as can now be done by brute-force expansion, + but now, enough of them seem not to. + + Some compilers issue a lot of warnings about code that is + dead/unreachable only on some platforms, and also about intentional + uses of negation on unsigned types. All known cases of each can be + ignored. + + For a longer but out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + +* MSPACES + If MSPACES is defined, then in addition to malloc, free, etc., + this file also defines mspace_malloc, mspace_free, etc. These + are versions of malloc routines that take an "mspace" argument + obtained using create_mspace, to control all internal bookkeeping. + If ONLY_MSPACES is defined, only these versions are compiled. + So if you would like to use this allocator for only some allocations, + and your system malloc for others, you can compile with + ONLY_MSPACES and then do something like... + static mspace mymspace = create_mspace(0,0); // for example + #define mymalloc(bytes) mspace_malloc(mymspace, bytes) + + (Note: If you only need one instance of an mspace, you can instead + use "USE_DL_PREFIX" to relabel the global malloc.) + + You can similarly create thread-local allocators by storing + mspaces as thread-locals. For example: + static __thread mspace tlms = 0; + void* tlmalloc(size_t bytes) { + if (tlms == 0) tlms = create_mspace(0, 0); + return mspace_malloc(tlms, bytes); + } + void tlfree(void* mem) { mspace_free(tlms, mem); } + + Unless FOOTERS is defined, each mspace is completely independent. + You cannot allocate from one and free to another (although + conformance is only weakly checked, so usage errors are not always + caught). If FOOTERS is defined, then each chunk carries around a tag + indicating its originating mspace, and frees are directed to their + originating spaces. + + ------------------------- Compile-time options --------------------------- + +Be careful in setting #define values for numerical constants of type +size_t. On some systems, literal values are not automatically extended +to size_t precision unless they are explicitly casted. + +WIN32 default: defined if _WIN32 defined + Defining WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. + +MALLOC_ALIGNMENT default: (size_t)8 + Controls the minimum alignment for malloc'ed chunks. It must be a + power of two and at least 8, even on machines for which smaller + alignments would suffice. It may be defined as larger than this + though. Note however that code and data structures are optimized for + the case of 8-byte alignment. + +MSPACES default: 0 (false) + If true, compile in support for independent allocation spaces. + This is only supported if HAVE_MMAP is true. + +ONLY_MSPACES default: 0 (false) + If true, only compile in mspace versions, not regular versions. + +USE_LOCKS default: 0 (false) + Causes each call to each public routine to be surrounded with + pthread or WIN32 mutex lock/unlock. (If set true, this can be + overridden on a per-mspace basis for mspace versions.) + +FOOTERS default: 0 + If true, provide extra checking and dispatching by placing + information in the footers of allocated chunks. This adds + space and time overhead. + +INSECURE default: 0 + If true, omit checks for usage errors and heap space overwrites. + +USE_DL_PREFIX default: NOT defined + Causes compiler to prefix all public routines with the string 'dl'. + This can be useful when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. + +ABORT default: defined as abort() + Defines how to abort on failed checks. On most systems, a failed + check cannot die with an "assert" or even print an informative + message, because the underlying print routines in turn call malloc, + which will fail again. Generally, the best policy is to simply call + abort(). It's not very useful to do more than this because many + errors due to overwriting will show up as address faults (null, odd + addresses etc) rather than malloc-triggered checks, so will also + abort. Also, most compilers know that abort() does not return, so + can better optimize code conditionally calling it. + +PROCEED_ON_ERROR default: defined as 0 (false) + Controls whether detected bad addresses cause them to bypassed + rather than aborting. If set, detected bad arguments to free and + realloc are ignored. And all bookkeeping information is zeroed out + upon a detected overwrite of freed heap space, thus losing the + ability to ever return it from malloc again, but enabling the + application to proceed. If PROCEED_ON_ERROR is defined, the + static variable malloc_corruption_error_count is compiled in + and can be examined to see if errors have occurred. This option + generates slower code than the default abort policy. + +DEBUG default: NOT defined + The DEBUG setting is mainly intended for people trying to modify + this code or diagnose problems when porting to new platforms. + However, it may also be able to better isolate user errors than just + using runtime checks. The assertions in the check routines spell + out in more detail the assumptions and invariants underlying the + algorithms. The checking is fairly extensive, and will slow down + execution noticeably. Calling malloc_stats or mallinfo with DEBUG + set will attempt to check every non-mmapped allocated and free chunk + in the course of computing the summaries. + +ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) + Debugging assertion failures can be nearly impossible if your + version of the assert macro causes malloc to be called, which will + lead to a cascade of further failures, blowing the runtime stack. + ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), + which will usually make debugging easier. + +MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 + The action to take before "return 0" when malloc fails to be able to + return memory because there is none available. + +HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES + True if this system supports sbrk or an emulation of it. + +MORECORE default: sbrk + The name of the sbrk-style system routine to call to obtain more + memory. See below for guidance on writing custom MORECORE + functions. The type of the argument to sbrk/MORECORE varies across + systems. It cannot be size_t, because it supports negative + arguments, so it is normally the signed type of the same width as + size_t (sometimes declared as "intptr_t"). It doesn't much matter + though. Internally, we only call it with arguments less than half + the max value of a size_t, which should work across all reasonable + possibilities, although sometimes generating compiler warnings. See + near the end of this file for guidelines for creating a custom + version of MORECORE. + +MORECORE_CONTIGUOUS default: 1 (true) + If true, take advantage of fact that consecutive calls to MORECORE + with positive arguments always return contiguous increasing + addresses. This is true of unix sbrk. It does not hurt too much to + set it true anyway, since malloc copes with non-contiguities. + Setting it false when definitely non-contiguous saves time + and possibly wasted space it would take to discover this though. + +MORECORE_CANNOT_TRIM default: NOT defined + True if MORECORE cannot release space back to the system when given + negative arguments. This is generally necessary only if you are + using a hand-crafted MORECORE function that cannot handle negative + arguments. + +HAVE_MMAP default: 1 (true) + True if this system supports mmap or an emulation of it. If so, and + HAVE_MORECORE is not true, MMAP is used for all system + allocation. If set and HAVE_MORECORE is true as well, MMAP is + primarily used to directly allocate very large blocks. It is also + used as a backup strategy in cases where MORECORE fails to provide + space from system. Note: A single call to MUNMAP is assumed to be + able to unmap memory that may have be allocated using multiple calls + to MMAP, so long as they are adjacent. + +HAVE_MREMAP default: 1 on linux, else 0 + If true realloc() uses mremap() to re-allocate large blocks and + extend or shrink allocation spaces. + +MMAP_CLEARS default: 1 on unix + True if mmap clears memory so calloc doesn't need to. This is true + for standard unix mmap using /dev/zero. + +USE_BUILTIN_FFS default: 0 (i.e., not used) + Causes malloc to use the builtin ffs() function to compute indices. + Some compilers may recognize and intrinsify ffs to be faster than the + supplied C version. Also, the case of x86 using gcc is special-cased + to an asm instruction, so is already as fast as it can be, and so + this setting has no effect. (On most x86s, the asm version is only + slightly faster than the C version.) + +malloc_getpagesize default: derive from system includes, or 4096. + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. This may be (and + usually is) a function rather than a constant. This is ignored + if WIN32, where page size is determined using getSystemInfo during + initialization. + +USE_DEV_RANDOM default: 0 (i.e., not used) + Causes malloc to use /dev/random to initialize secure magic seed for + stamping footers. Otherwise, the current time is used. + +NO_MALLINFO default: 0 + If defined, don't compile "mallinfo". This can be a simple way + of dealing with mismatches between system declarations and + those in this file. + +MALLINFO_FIELD_TYPE default: size_t + The type of the fields in the mallinfo struct. This was originally + defined as "int" in SVID etc, but is more usefully defined as + size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set + +REALLOC_ZERO_BYTES_FREES default: not defined + This should be set if a call to realloc with zero bytes should + be the same as a call to free. Some people think it should. Otherwise, + since this malloc returns a unique pointer for malloc(0), so does + realloc(p, 0). + +LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H +LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H +LACKS_STDLIB_H default: NOT defined unless on WIN32 + Define these if your system does not have these header files. + You might need to manually insert some of the declarations they provide. + +DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, + system_info.dwAllocationGranularity in WIN32, + otherwise 64K. + Also settable using mallopt(M_GRANULARITY, x) + The unit for allocating and deallocating memory from the system. On + most systems with contiguous MORECORE, there is no reason to + make this more than a page. However, systems with MMAP tend to + either require or encourage larger granularities. You can increase + this value to prevent system allocation functions to be called so + often, especially if they are slow. The value must be at least one + page and must be a power of two. Setting to 0 causes initialization + to either page size or win32 region size. (Note: In previous + versions of malloc, the equivalent of this option was called + "TOP_PAD") + +DEFAULT_TRIM_THRESHOLD default: 2MB + Also settable using mallopt(M_TRIM_THRESHOLD, x) + The maximum amount of unused top-most memory to keep before + releasing via malloc_trim in free(). Automatic trimming is mainly + useful in long-lived programs using contiguous MORECORE. Because + trimming via sbrk can be slow on some systems, and can sometimes be + wasteful (in cases where programs immediately afterward allocate + more large chunks) the value should be high enough so that your + overall system performance would improve by releasing this much + memory. As a rough guide, you might set to a value close to the + average size of a process (program) running on your system. + Releasing this much memory would allow such a process to run in + memory. Generally, it is worth tuning trim thresholds when a + program undergoes phases where several large chunks are allocated + and released in ways that can reuse each other's storage, perhaps + mixed with phases where there are no such chunks at all. The trim + value must be greater than page size to have any useful effect. To + disable trimming completely, you can set to MAX_SIZE_T. Note that the trick + some people use of mallocing a huge space and then freeing it at + program startup, in an attempt to reserve system memory, doesn't + have the intended effect under automatic trimming, since that memory + will immediately be returned to the system. + +DEFAULT_MMAP_THRESHOLD default: 256K + Also settable using mallopt(M_MMAP_THRESHOLD, x) + The request size threshold for using MMAP to directly service a + request. Requests of at least this size that cannot be allocated + using already-existing space will be serviced via mmap. (If enough + normal freed space already exists it is used instead.) Using mmap + segregates relatively large chunks of memory so that they can be + individually obtained and released from the host system. A request + serviced through mmap is never reused by any other request (at least + not directly; the system may just so happen to remap successive + requests to the same locations). Segregating space in this way has + the benefits that: Mmapped space can always be individually released + back to the system, which helps keep the system level memory demands + of a long-lived program low. Also, mapped memory doesn't become + `locked' between other chunks, as can happen with normally allocated + chunks, which means that even trimming via malloc_trim would not + release them. However, it has the disadvantage that the space + cannot be reclaimed, consolidated, and then used to service later + requests, as happens with normal chunks. The advantages of mmap + nearly always outweigh disadvantages for "large" chunks, but the + value of "large" may vary across systems. The default is an + empirically derived value that works well in most systems. You can + disable mmap by setting to MAX_SIZE_T. + +*/ + +#ifndef WIN32 +#ifdef _WIN32 +#define WIN32 1 +#endif /* _WIN32 */ +#endif /* WIN32 */ +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#define HAVE_MMAP 1 +#define HAVE_MORECORE 0 +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H +#define LACKS_STRING_H +#define LACKS_STRINGS_H +#define LACKS_SYS_TYPES_H +#define LACKS_ERRNO_H +#define MALLOC_FAILURE_ACTION +#define MMAP_CLEARS 0 /* WINCE and some others apparently don't clear */ +#endif /* WIN32 */ + +#ifdef __OS2__ +#define INCL_DOS +#include <os2.h> +#define HAVE_MMAP 1 +#define HAVE_MORECORE 0 +#define LACKS_SYS_MMAN_H +#endif /* __OS2__ */ + +#if defined(DARWIN) || defined(_DARWIN) +/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ +#ifndef HAVE_MORECORE +#define HAVE_MORECORE 0 +#define HAVE_MMAP 1 +#endif /* HAVE_MORECORE */ +#endif /* DARWIN */ + +#ifndef LACKS_SYS_TYPES_H +#include <sys/types.h> /* For size_t */ +#endif /* LACKS_SYS_TYPES_H */ + +/* The maximum possible size_t value has all bits set */ +#define MAX_SIZE_T (~(size_t)0) + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 +#endif /* ONLY_MSPACES */ +#ifndef MSPACES +#if ONLY_MSPACES +#define MSPACES 1 +#else /* ONLY_MSPACES */ +#define MSPACES 0 +#endif /* ONLY_MSPACES */ +#endif /* MSPACES */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)8U) +#endif /* MALLOC_ALIGNMENT */ +#ifndef FOOTERS +#define FOOTERS 0 +#endif /* FOOTERS */ +#ifndef ABORT +#define ABORT abort() +#endif /* ABORT */ +#ifndef ABORT_ON_ASSERT_FAILURE +#define ABORT_ON_ASSERT_FAILURE 1 +#endif /* ABORT_ON_ASSERT_FAILURE */ +#ifndef PROCEED_ON_ERROR +#define PROCEED_ON_ERROR 0 +#endif /* PROCEED_ON_ERROR */ +#ifndef USE_LOCKS +#define USE_LOCKS 0 +#endif /* USE_LOCKS */ +#ifndef INSECURE +#define INSECURE 0 +#endif /* INSECURE */ +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif /* HAVE_MMAP */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif /* MMAP_CLEARS */ +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#else /* linux */ +#define HAVE_MREMAP 0 +#endif /* linux */ +#endif /* HAVE_MREMAP */ +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION errno = ENOMEM; +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef HAVE_MORECORE +#if ONLY_MSPACES +#define HAVE_MORECORE 0 +#else /* ONLY_MSPACES */ +#define HAVE_MORECORE 1 +#endif /* ONLY_MSPACES */ +#endif /* HAVE_MORECORE */ +#if !HAVE_MORECORE +#define MORECORE_CONTIGUOUS 0 +#else /* !HAVE_MORECORE */ +#ifndef MORECORE +#define MORECORE sbrk +#endif /* MORECORE */ +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif /* MORECORE_CONTIGUOUS */ +#endif /* HAVE_MORECORE */ +#ifndef DEFAULT_GRANULARITY +#if MORECORE_CONTIGUOUS +#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ +#else /* MORECORE_CONTIGUOUS */ +#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) +#endif /* MORECORE_CONTIGUOUS */ +#endif /* DEFAULT_GRANULARITY */ +#ifndef DEFAULT_TRIM_THRESHOLD +#ifndef MORECORE_CANNOT_TRIM +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#else /* MORECORE_CANNOT_TRIM */ +#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T +#endif /* MORECORE_CANNOT_TRIM */ +#endif /* DEFAULT_TRIM_THRESHOLD */ +#ifndef DEFAULT_MMAP_THRESHOLD +#if HAVE_MMAP +#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +#else /* HAVE_MMAP */ +#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* DEFAULT_MMAP_THRESHOLD */ +#ifndef USE_BUILTIN_FFS +#define USE_BUILTIN_FFS 0 +#endif /* USE_BUILTIN_FFS */ +#ifndef USE_DEV_RANDOM +#define USE_DEV_RANDOM 0 +#endif /* USE_DEV_RANDOM */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ + +/* + mallopt tuning options. SVID/XPG defines four standard parameter + numbers for mallopt, normally defined in malloc.h. None of these + are used in this malloc, so setting them has no effect. But this + malloc does support the following options. +*/ + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + +/* ------------------------ Mallinfo declarations ------------------------ */ + +#if !NO_MALLINFO +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any system that has a + /usr/include/malloc.h defining struct mallinfo. The main + declaration needed is the mallinfo struct that is returned (by-copy) + by mallinfo(). The malloinfo struct contains a bunch of fields that + are not even meaningful in this version of malloc. These fields are + are instead filled by mallinfo() with other numbers that might be of + interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else a compliant version is + declared below. These must be precisely the same for mallinfo() to + work. The original SVID version of this struct, defined on most + systems with mallinfo, declares all fields as ints. But some others + define as unsigned long. If your system defines the fields using a + type of different width than listed here, you MUST #include your + system version and #define HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else /* HAVE_USR_INCLUDE_MALLOC_H */ + +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; + +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* NO_MALLINFO */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#if !ONLY_MSPACES + +/* ------------------- Declarations of public routines ------------------- */ + +#ifndef USE_DL_PREFIX +#define dlcalloc calloc +#define dlfree free +#define dlmalloc malloc +#define dlmemalign memalign +#define dlrealloc realloc +#define dlvalloc valloc +#define dlpvalloc pvalloc +#define dlmallinfo mallinfo +#define dlmallopt mallopt +#define dlmalloc_trim malloc_trim +#define dlmalloc_stats malloc_stats +#define dlmalloc_usable_size malloc_usable_size +#define dlmalloc_footprint malloc_footprint +#define dlmalloc_max_footprint malloc_max_footprint +#define dlindependent_calloc independent_calloc +#define dlindependent_comalloc independent_comalloc +#endif /* USE_DL_PREFIX */ + + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cause the current program to abort. +*/ +void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ + +void* dlrealloc(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +void* dlmemalign(size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (MAX_SIZE_T disables) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +int dlmallopt(int, int); + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. +*/ +size_t dlmalloc_max_footprint(void); + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +void** dlindependent_comalloc(size_t, size_t*, void**); + + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +int dlmalloc_trim(size_t); + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ +void dlmalloc_stats(void); + +#endif /* ONLY_MSPACES */ + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_malloc behaves as malloc, but operates within + the given space. +*/ +void* mspace_malloc(mspace msp, size_t bytes); + +/* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. +*/ +void mspace_free(mspace msp, void* mem); + +/* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. +*/ +void* mspace_realloc(mspace msp, void* mem, size_t newsize); + +/* + mspace_calloc behaves as calloc, but operates within + the given space. +*/ +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); + +/* + mspace_memalign behaves as memalign, but operates within + the given space. +*/ +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); + +/* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. +*/ +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); + +/* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. +*/ +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); + +/* + mspace_footprint() returns the number of bytes obtained from the + system for this space. +*/ +size_t mspace_footprint(mspace msp); + +/* + mspace_max_footprint() returns the peak number of bytes obtained from the + system for this space. +*/ +size_t mspace_max_footprint(mspace msp); + + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. +*/ +void mspace_malloc_stats(mspace msp); + +/* + mspace_trim behaves as malloc_trim, but + operates within the given space. +*/ +int mspace_trim(mspace msp, size_t pad); + +/* + An alias for mallopt. +*/ +int mspace_mallopt(int, int); + +#endif /* MSPACES */ + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif /* __cplusplus */ + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/*------------------------------ internal #includes ---------------------- */ + +#ifdef _MSC_VER +#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ +#endif /* _MSC_VER */ + +#include <stdio.h> /* for printing in malloc_stats */ + +#ifndef LACKS_ERRNO_H +#include <errno.h> /* for MALLOC_FAILURE_ACTION */ +#endif /* LACKS_ERRNO_H */ +#if FOOTERS +#include <time.h> /* for magic initialization */ +#endif /* FOOTERS */ +#ifndef LACKS_STDLIB_H +#include <stdlib.h> /* for abort() */ +#endif /* LACKS_STDLIB_H */ +#ifdef DEBUG +#if ABORT_ON_ASSERT_FAILURE +#define assert(x) if(!(x)) ABORT +#else /* ABORT_ON_ASSERT_FAILURE */ +#include <assert.h> +#endif /* ABORT_ON_ASSERT_FAILURE */ +#else /* DEBUG */ +#define assert(x) +#endif /* DEBUG */ +#ifndef LACKS_STRING_H +#include <string.h> /* for memset etc */ +#endif /* LACKS_STRING_H */ +#if USE_BUILTIN_FFS +#ifndef LACKS_STRINGS_H +#include <strings.h> /* for ffs */ +#endif /* LACKS_STRINGS_H */ +#endif /* USE_BUILTIN_FFS */ +#if HAVE_MMAP +#ifndef LACKS_SYS_MMAN_H +#include <sys/mman.h> /* for mmap */ +#endif /* LACKS_SYS_MMAN_H */ +#ifndef LACKS_FCNTL_H +#include <fcntl.h> +#endif /* LACKS_FCNTL_H */ +#endif /* HAVE_MMAP */ +#if HAVE_MORECORE +#ifndef LACKS_UNISTD_H +#include <unistd.h> /* for sbrk */ +#else /* LACKS_UNISTD_H */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +extern void* sbrk(ptrdiff_t); +#endif /* FreeBSD etc */ +#endif /* LACKS_UNISTD_H */ +#endif /* HAVE_MMAP */ + +#ifndef WIN32 +#ifndef malloc_getpagesize +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include <sys/param.h> +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize ((size_t)4096U) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif +#endif + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some plaftorms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) +#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* True if address a has acceptable alignment */ +#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* -------------------------- MMAP preliminaries ------------------------- */ + +/* + If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and + checks to fail so compiler optimizer can delete code rather than + using so many "#if"s. +*/ + + +/* MORECORE and MMAP must return MFAIL on failure */ +#define MFAIL ((void*)(MAX_SIZE_T)) +#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ + +#if !HAVE_MMAP +#define IS_MMAPPED_BIT (SIZE_T_ZERO) +#define USE_MMAP_BIT (SIZE_T_ZERO) +#define CALL_MMAP(s) MFAIL +#define CALL_MUNMAP(a, s) (-1) +#define DIRECT_MMAP(s) MFAIL + +#else /* HAVE_MMAP */ +#define IS_MMAPPED_BIT (SIZE_T_ONE) +#define USE_MMAP_BIT (SIZE_T_ONE) + +#if !defined(WIN32) && !defined (__OS2__) +#define CALL_MUNMAP(a, s) munmap((a), (s)) +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANON */ +#ifdef MAP_ANONYMOUS +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) +#define CALL_MMAP(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#else /* MAP_ANONYMOUS */ +/* + Nearly all versions of mmap support MAP_ANONYMOUS, so the following + is unlikely to be needed, but is supplied just in case. +*/ +#define MMAP_FLAGS (MAP_PRIVATE) +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ +#define CALL_MMAP(s) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) +#endif /* MAP_ANONYMOUS */ + +#define DIRECT_MMAP(s) CALL_MMAP(s) + +#elif defined(__OS2__) + +/* OS/2 MMAP via DosAllocMem */ +static void* os2mmap(size_t size) { + void* ptr; + if (DosAllocMem(&ptr, size, OBJ_ANY|PAG_COMMIT|PAG_READ|PAG_WRITE) && + DosAllocMem(&ptr, size, PAG_COMMIT|PAG_READ|PAG_WRITE)) + return MFAIL; + return ptr; +} + +#define os2direct_mmap(n) os2mmap(n) + +/* This function supports releasing coalesed segments */ +static int os2munmap(void* ptr, size_t size) { + while (size) { + ULONG ulSize = size; + ULONG ulFlags = 0; + if (DosQueryMem(ptr, &ulSize, &ulFlags) != 0) + return -1; + if ((ulFlags & PAG_BASE) == 0 ||(ulFlags & PAG_COMMIT) == 0 || + ulSize > size) + return -1; + if (DosFreeMem(ptr) != 0) + return -1; + ptr = ( void * ) ( ( char * ) ptr + ulSize ); + size -= ulSize; + } + return 0; +} + +#define CALL_MMAP(s) os2mmap(s) +#define CALL_MUNMAP(a, s) os2munmap((a), (s)) +#define DIRECT_MMAP(s) os2direct_mmap(s) + +#else /* WIN32 */ + +/* Win32 MMAP via VirtualAlloc */ +static void* win32mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +static void* win32direct_mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_EXECUTE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* This function supports releasing coalesed segments */ +static int win32munmap(void* ptr, size_t size) { + MEMORY_BASIC_INFORMATION minfo; + char* cptr = ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#define CALL_MMAP(s) win32mmap(s) +#define CALL_MUNMAP(a, s) win32munmap((a), (s)) +#define DIRECT_MMAP(s) win32direct_mmap(s) +#endif /* WIN32 */ +#endif /* HAVE_MMAP */ + +#if HAVE_MMAP && HAVE_MREMAP +#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#else /* HAVE_MMAP && HAVE_MREMAP */ +#define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif /* HAVE_MMAP && HAVE_MREMAP */ + +#if HAVE_MORECORE +#define CALL_MORECORE(S) MORECORE(S) +#else /* HAVE_MORECORE */ +#define CALL_MORECORE(S) MFAIL +#endif /* HAVE_MORECORE */ + +/* mstate bit set if continguous morecore disabled or failed */ +#define USE_NONCONTIGUOUS_BIT (4U) + +/* segment bit set in create_mspace_with_base */ +#define EXTERN_BIT (8U) + + +/* --------------------------- Lock preliminaries ------------------------ */ + +#if USE_LOCKS + +/* + When locks are defined, there are up to two global locks: + + * If HAVE_MORECORE, morecore_mutex protects sequences of calls to + MORECORE. In many cases sys_alloc requires two calls, that should + not be interleaved with calls by other threads. This does not + protect against direct calls to MORECORE by other threads not + using this lock, so there is still code to cope the best we can on + interference. + + * magic_init_mutex ensures that mparams.magic and other + unique mparams values are initialized only once. +*/ + +#if !defined(WIN32) && !defined(__OS2__) +/* By default use posix locks */ +#include <pthread.h> +#define MLOCK_T pthread_mutex_t +#define INITIAL_LOCK(l) pthread_mutex_init(l, NULL) +#define ACQUIRE_LOCK(l) pthread_mutex_lock(l) +#define RELEASE_LOCK(l) pthread_mutex_unlock(l) + +#if HAVE_MORECORE +static MLOCK_T morecore_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif /* HAVE_MORECORE */ + +static MLOCK_T magic_init_mutex = PTHREAD_MUTEX_INITIALIZER; + +#elif defined(__OS2__) +#define MLOCK_T HMTX +#define INITIAL_LOCK(l) DosCreateMutexSem(0, l, 0, FALSE) +#define ACQUIRE_LOCK(l) DosRequestMutexSem(*l, SEM_INDEFINITE_WAIT) +#define RELEASE_LOCK(l) DosReleaseMutexSem(*l) +#if HAVE_MORECORE +static MLOCK_T morecore_mutex; +#endif /* HAVE_MORECORE */ +static MLOCK_T magic_init_mutex; + +#else /* WIN32 */ +/* + Because lock-protected regions have bounded times, and there + are no recursive lock calls, we can use simple spinlocks. +*/ + +#define MLOCK_T long +static int win32_acquire_lock (MLOCK_T *sl) { + for (;;) { +#ifdef InterlockedCompareExchangePointer + if (!InterlockedCompareExchange(sl, 1, 0)) + return 0; +#else /* Use older void* version */ + if (!InterlockedCompareExchange((void**)sl, (void*)1, (void*)0)) + return 0; +#endif /* InterlockedCompareExchangePointer */ + Sleep (0); + } +} + +static void win32_release_lock (MLOCK_T *sl) { + InterlockedExchange (sl, 0); +} + +#define INITIAL_LOCK(l) *(l)=0 +#define ACQUIRE_LOCK(l) win32_acquire_lock(l) +#define RELEASE_LOCK(l) win32_release_lock(l) +#if HAVE_MORECORE +static MLOCK_T morecore_mutex; +#endif /* HAVE_MORECORE */ +static MLOCK_T magic_init_mutex; +#endif /* WIN32 */ + +#define USE_LOCK_BIT (2U) +#else /* USE_LOCKS */ +#define USE_LOCK_BIT (0U) +#define INITIAL_LOCK(l) +#endif /* USE_LOCKS */ + +#if USE_LOCKS && HAVE_MORECORE +#define ACQUIRE_MORECORE_LOCK() ACQUIRE_LOCK(&morecore_mutex); +#define RELEASE_MORECORE_LOCK() RELEASE_LOCK(&morecore_mutex); +#else /* USE_LOCKS && HAVE_MORECORE */ +#define ACQUIRE_MORECORE_LOCK() +#define RELEASE_MORECORE_LOCK() +#endif /* USE_LOCKS && HAVE_MORECORE */ + +#if USE_LOCKS +#define ACQUIRE_MAGIC_INIT_LOCK() ACQUIRE_LOCK(&magic_init_mutex); +#define RELEASE_MAGIC_INIT_LOCK() RELEASE_LOCK(&magic_init_mutex); +#else /* USE_LOCKS */ +#define ACQUIRE_MAGIC_INIT_LOCK() +#define RELEASE_MAGIC_INIT_LOCK() +#endif /* USE_LOCKS */ + + +/* ----------------------- Chunk representations ------------------------ */ + +/* + (The following includes lightly edited explanations by Colin Plumb.) + + The malloc_chunk declaration below is misleading (but accurate and + necessary). It declares a "view" into memory allowing access to + necessary fields at known offsets from a given base. + + Chunks of memory are maintained using a `boundary tag' method as + originally described by Knuth. (See the paper by Paul Wilson + ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such + techniques.) Sizes of free chunks are stored both in the front of + each chunk and at the end. This makes consolidating fragmented + chunks into bigger chunks fast. The head fields also hold bits + representing whether chunks are free or in use. + + Here are some pictures to make it clearer. They are "exploded" to + show that the state of a chunk can be thought of as extending from + the high 31 bits of the head field of its header through the + prev_foot and PINUSE_BIT bit of the following chunk header. + + A chunk that's in use looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk (if P = 1) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 1| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +- -+ + | | + +- -+ + | : + +- size - sizeof(size_t) available payload bytes -+ + : | + chunk-> +- -+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| + | Size of next chunk (may or may not be in use) | +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + And if it's free, it looks like this: + + chunk-> +- -+ + | User payload (must be in use, or we would have merged!) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 0| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Prev pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- size - sizeof(struct chunk) unused bytes -+ + : | + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| + | Size of next chunk (must be in use, or we would have merged)| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- User payload -+ + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0| + +-+ + Note that since we always merge adjacent free chunks, the chunks + adjacent to a free chunk must be in use. + + Given a pointer to a chunk (which can be derived trivially from the + payload pointer) we can, in O(1) time, find out whether the adjacent + chunks are free, and if so, unlink them from the lists that they + are on and merge them with the current chunk. + + Chunks always begin on even word boundaries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + The P (PINUSE_BIT) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, preventing + access to non-existent (or non-owned) memory. If pinuse is set for + any given chunk, then you CANNOT determine the size of the + previous chunk, and might even get a memory addressing fault when + trying to do so. + + The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of + the chunk size redundantly records whether the current chunk is + inuse. This redundancy enables usage checks within free and realloc, + and reduces indirection when freeing and consolidating chunks. + + Each freshly allocated chunk must have both cinuse and pinuse set. + That is, each allocated chunk borders either a previously allocated + and still in-use chunk, or the base of its memory arena. This is + ensured by making all allocations from the the `lowest' part of any + found chunk. Further, no free chunk physically borders another one, + so each free chunk is known to be preceded and followed by either + inuse chunks or the ends of memory. + + Note that the `foot' of the current chunk is actually represented + as the prev_foot of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The exceptions to all this are + + 1. The special chunk `top' is the top-most available chunk (i.e., + the one bordering the end of available memory). It is treated + specially. Top is never included in any bin, is used only if + no other chunk is available, and is released back to the + system if it is very large (see M_TRIM_THRESHOLD). In effect, + the top chunk is treated as larger (and thus less well + fitting) than any other available chunk. The top chunk + doesn't update its trailing size field since there is no next + contiguous chunk that would have to index off it. However, + space is still allocated for it (TOP_FOOT_SIZE) to enable + separation or merging when space is extended. + + 3. Chunks allocated via mmap, which have the lowest-order bit + (IS_MMAPPED_BIT) set in their prev_foot fields, and do not set + PINUSE_BIT in their head fields. Because they are allocated + one-by-one, each must carry its own prev_foot field, which is + also used to hold the offset this chunk has within its mmapped + region, which is needed to preserve alignment. Each mmapped + chunk is trailed by the first two fields of a fake next-chunk + for sake of usage checks. + +*/ + +struct malloc_chunk { + size_t prev_foot; /* Size of previous chunk (if free). */ + size_t head; /* Size and inuse bits. */ + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + +typedef struct malloc_chunk mchunk; +typedef struct malloc_chunk* mchunkptr; +typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ +typedef unsigned int bindex_t; /* Described below */ +typedef unsigned int binmap_t; /* Described below */ +typedef unsigned int flag_t; /* The type of various bit flag sets */ + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#if FOOTERS +#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +#else /* FOOTERS */ +#define CHUNK_OVERHEAD (SIZE_T_SIZE) +#endif /* FOOTERS */ + +/* MMapped chunks need a second word of overhead ... */ +#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +/* ... and additional padding for fake next-chunk at foot */ +#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MIN_CHUNK_SIZE\ + ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) +/* chunk associated with aligned address A */ +#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) + +/* Bounds on request (not chunk) sizes. */ +#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) + +/* pad request bytes into a usable size */ +#define pad_request(req) \ + (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* pad request, checking for minimum (but not maximum) */ +#define request2size(req) \ + (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) + + +/* ------------------ Operations on head and foot fields ----------------- */ + +/* + The head field of a chunk is or'ed with PINUSE_BIT when previous + adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in + use. If the chunk was obtained with mmap, the prev_foot field has + IS_MMAPPED_BIT set, otherwise holding the offset of the base of the + mmapped region to the base of the chunk. +*/ + +#define PINUSE_BIT (SIZE_T_ONE) +#define CINUSE_BIT (SIZE_T_TWO) +#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) + +/* Head value for fenceposts */ +#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) + +/* extraction of fields from head words */ +#define cinuse(p) ((p)->head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define chunksize(p) ((p)->head & ~(INUSE_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) +#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~INUSE_BITS))) +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +#define is_mmapped(p)\ + (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_MMAPPED_BIT)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* Return true if malloced space is not necessarily cleared */ +#if MMAP_CLEARS +#define calloc_must_clear(p) (!is_mmapped(p)) +#else /* MMAP_CLEARS */ +#define calloc_must_clear(p) (1) +#endif /* MMAP_CLEARS */ + +/* ---------------------- Overlaid data structures ----------------------- */ + +/* + When chunks are not in use, they are treated as nodes of either + lists or trees. + + "Small" chunks are stored in circular doubly-linked lists, and look + like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Larger chunks are kept in a form of bitwise digital trees (aka + tries) keyed on chunksizes. Because malloc_tree_chunks are only for + free chunks greater than 256 bytes, their size doesn't impose any + constraints on user chunk sizes. Each node looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to left child (child[0]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to right child (child[1]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to parent | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | bin index of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Each tree holding treenodes is a tree of unique chunk sizes. Chunks + of the same size are arranged in a circularly-linked list, with only + the oldest chunk (the next to be used, in our FIFO ordering) + actually in the tree. (Tree members are distinguished by a non-null + parent pointer.) If a chunk with the same size an an existing node + is inserted, it is linked off the existing node using pointers that + work in the same way as fd/bk pointers of small chunks. + + Each tree contains a power of 2 sized range of chunk sizes (the + smallest is 0x100 <= x < 0x180), which is is divided in half at each + tree level, with the chunks in the smaller half of the range (0x100 + <= x < 0x140 for the top nose) in the left subtree and the larger + half (0x140 <= x < 0x180) in the right subtree. This is, of course, + done by inspecting individual bits. + + Using these rules, each node's left subtree contains all smaller + sizes than its right subtree. However, the node at the root of each + subtree has no particular ordering relationship to either. (The + dividing line between the subtree sizes is based on trie relation.) + If we remove the last chunk of a given size from the interior of the + tree, we need to replace it with a leaf node. The tree ordering + rules permit a node to be replaced by any leaf below it. + + The smallest chunk in a tree (a common operation in a best-fit + allocator) can be found by walking a path to the leftmost leaf in + the tree. Unlike a usual binary tree, where we follow left child + pointers until we reach a null, here we follow the right child + pointer any time the left one is null, until we reach a leaf with + both child pointers null. The smallest chunk in the tree will be + somewhere along that path. + + The worst case number of steps to add, find, or remove a node is + bounded by the number of bits differentiating chunks within + bins. Under current bin calculations, this ranges from 6 up to 21 + (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case + is of course much better. +*/ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk* fd; + struct malloc_tree_chunk* bk; + + struct malloc_tree_chunk* child[2]; + struct malloc_tree_chunk* parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk* tchunkptr; +typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +/* + Each malloc space may include non-contiguous segments, held in a + list headed by an embedded malloc_segment record representing the + top-most space. Segments also include flags holding properties of + the space. Large chunks that are directly allocated by mmap are not + included in this list. They are instead independently created and + destroyed without otherwise keeping track of them. + + Segment management mainly comes into play for spaces allocated by + MMAP. Any call to MMAP might or might not return memory that is + adjacent to an existing segment. MORECORE normally contiguously + extends the current space, so this space is almost always adjacent, + which is simpler and faster to deal with. (This is why MORECORE is + used preferentially to MMAP when both are available -- see + sys_alloc.) When allocating using MMAP, we don't use any of the + hinting mechanisms (inconsistently) supported in various + implementations of unix mmap, or distinguish reserving from + committing memory. Instead, we just ask for space, and exploit + contiguity when we get it. It is probably possible to do + better than this on some systems, but no general scheme seems + to be significantly better. + + Management entails a simpler variant of the consolidation scheme + used for chunks to reduce fragmentation -- new adjacent memory is + normally prepended or appended to an existing segment. However, + there are limitations compared to chunk consolidation that mostly + reflect the fact that segment processing is relatively infrequent + (occurring only when getting memory from system) and that we + don't expect to have huge numbers of segments: + + * Segments are not indexed, so traversal requires linear scans. (It + would be possible to index these, but is not worth the extra + overhead and complexity for most programs on most platforms.) + * New segments are only appended to old ones when holding top-most + memory; if they cannot be prepended to others, they are held in + different segments. + + Except for the top-most segment of an mstate, each segment record + is kept at the tail of its segment. Segments are added by pushing + segment records onto the list headed by &mstate.seg for the + containing mstate. + + Segment flags control allocation/merge/deallocation policies: + * If EXTERN_BIT set, then we did not allocate this segment, + and so should not try to deallocate or merge with others. + (This currently holds only for the initial segment passed + into create_mspace_with_base.) + * If IS_MMAPPED_BIT set, the segment may be merged with + other surrounding mmapped segments and trimmed/de-allocated + using munmap. + * If neither bit is set, then the segment was obtained using + MORECORE so can be merged with surrounding MORECORE'd segments + and deallocated/trimmed using MORECORE with negative arguments. +*/ + +struct malloc_segment { + char* base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment* next; /* ptr to next segment */ +#if FFI_MMAP_EXEC_WRIT + /* The mmap magic is supposed to store the address of the executable + segment at the very end of the requested block. */ + +# define mmap_exec_offset(b,s) (*(ptrdiff_t*)((b)+(s)-sizeof(ptrdiff_t))) + + /* We can only merge segments if their corresponding executable + segments are at identical offsets. */ +# define check_segment_merge(S,b,s) \ + (mmap_exec_offset((b),(s)) == (S)->exec_offset) + +# define add_segment_exec_offset(p,S) ((char*)(p) + (S)->exec_offset) +# define sub_segment_exec_offset(p,S) ((char*)(p) - (S)->exec_offset) + + /* The removal of sflags only works with HAVE_MORECORE == 0. */ + +# define get_segment_flags(S) (IS_MMAPPED_BIT) +# define set_segment_flags(S,v) \ + (((v) != IS_MMAPPED_BIT) ? (ABORT, (v)) : \ + (((S)->exec_offset = \ + mmap_exec_offset((S)->base, (S)->size)), \ + (mmap_exec_offset((S)->base + (S)->exec_offset, (S)->size) != \ + (S)->exec_offset) ? (ABORT, (v)) : \ + (mmap_exec_offset((S)->base, (S)->size) = 0), (v))) + + /* We use an offset here, instead of a pointer, because then, when + base changes, we don't have to modify this. On architectures + with segmented addresses, this might not work. */ + ptrdiff_t exec_offset; +#else + +# define get_segment_flags(S) ((S)->sflags) +# define set_segment_flags(S,v) ((S)->sflags = (v)) +# define check_segment_merge(S,b,s) (1) + + flag_t sflags; /* mmap and extern flag */ +#endif +}; + +#define is_mmapped_segment(S) (get_segment_flags(S) & IS_MMAPPED_BIT) +#define is_extern_segment(S) (get_segment_flags(S) & EXTERN_BIT) + +typedef struct malloc_segment msegment; +typedef struct malloc_segment* msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* + A malloc_state holds all of the bookkeeping for a space. + The main fields are: + + Top + The topmost chunk of the currently active segment. Its size is + cached in topsize. The actual size of topmost space is + topsize+TOP_FOOT_SIZE, which includes space reserved for adding + fenceposts and segment records if necessary when getting more + space from the system. The size at which to autotrim top is + cached from mparams in trim_check, except that it is disabled if + an autotrim fails. + + Designated victim (dv) + This is the preferred chunk for servicing small requests that + don't have exact fits. It is normally the chunk split off most + recently to service another small request. Its size is cached in + dvsize. The link fields of this chunk are not maintained since it + is not kept in a bin. + + SmallBins + An array of bin headers for free chunks. These bins hold chunks + with sizes less than MIN_LARGE_SIZE bytes. Each bin contains + chunks of all the same size, spaced 8 bytes apart. To simplify + use in double-linked lists, each bin header acts as a malloc_chunk + pointing to the real first node, if it exists (else pointing to + itself). This avoids special-casing for headers. But to avoid + waste, we allocate only the fd/bk pointers of bins, and then use + repositioning tricks to treat these as the fields of a chunk. + + TreeBins + Treebins are pointers to the roots of trees holding a range of + sizes. There are 2 equally spaced treebins for each power of two + from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything + larger. + + Bin maps + There is one bit map for small bins ("smallmap") and one for + treebins ("treemap). Each bin sets its bit when non-empty, and + clears the bit when empty. Bit operations are then used to avoid + bin-by-bin searching -- nearly all "search" is done without ever + looking at bins that won't be selected. The bit maps + conservatively use 32 bits per map word, even if on 64bit system. + For a good description of some of the bit-based techniques used + here, see Henry S. Warren Jr's book "Hacker's Delight" (and + supplement at http://hackersdelight.org/). Many of these are + intended to reduce the branchiness of paths through malloc etc, as + well as to reduce the number of memory locations read or written. + + Segments + A list of segments headed by an embedded malloc_segment record + representing the initial space. + + Address check support + The least_addr field is the least address ever obtained from + MORECORE or MMAP. Attempted frees and reallocs of any address less + than this are trapped (unless INSECURE is defined). + + Magic tag + A cross-check field that should always hold same value as mparams.magic. + + Flags + Bits recording whether to use MMAP, locks, or contiguous MORECORE + + Statistics + Each space keeps track of current and maximum system memory + obtained via MORECORE or MMAP. + + Locking + If USE_LOCKS is defined, the "mutex" lock is acquired and released + around every public call using this mspace. +*/ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + char* least_addr; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t magic; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + size_t footprint; + size_t max_footprint; + flag_t mflags; +#if USE_LOCKS + MLOCK_T mutex; /* locate lock among fields that rarely change */ +#endif /* USE_LOCKS */ + msegment seg; +}; + +typedef struct malloc_state* mstate; + +/* ------------- Global malloc_state and malloc_params ------------------- */ + +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. +*/ + +struct malloc_params { + size_t magic; + size_t page_size; + size_t granularity; + size_t mmap_threshold; + size_t trim_threshold; + flag_t default_mflags; +}; + +static struct malloc_params mparams; + +/* The global malloc_state used for all non-"mspace" calls */ +static struct malloc_state _gm_; +#define gm (&_gm_) +#define is_global(M) ((M) == &_gm_) +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* Operations on mflags */ + +#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) +#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) +#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) + +#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) +#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) +#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) + +#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) +#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) + +#define set_lock(M,L)\ + ((M)->mflags = (L)?\ + ((M)->mflags | USE_LOCK_BIT) :\ + ((M)->mflags & ~USE_LOCK_BIT)) + +/* page-align a size */ +#define page_align(S)\ + (((S) + (mparams.page_size)) & ~(mparams.page_size - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (mparams.granularity)) & ~(mparams.granularity - SIZE_T_ONE)) + +#define is_page_aligned(S)\ + (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) +#define is_granularity_aligned(S)\ + (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char* addr) { + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) { + msegmentptr sp = &m->seg; + for (;;) { + if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +#ifndef MORECORE_CANNOT_TRIM +#define should_trim(M,s) ((s) > (M)->trim_check) +#else /* MORECORE_CANNOT_TRIM */ +#define should_trim(M,s) (0) +#endif /* MORECORE_CANNOT_TRIM */ + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + +/* ------------------------------- Hooks -------------------------------- */ + +/* + PREACTION should be defined to return 0 on success, and nonzero on + failure. If you are not using locking, you can redefine these to do + anything you like. +*/ + +#if USE_LOCKS + +/* Ensure locks are initialized */ +#define GLOBALLY_INITIALIZE() (mparams.page_size == 0 && init_mparams()) + +#define PREACTION(M) ((GLOBALLY_INITIALIZE() || use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) +#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } +#else /* USE_LOCKS */ + +#ifndef PREACTION +#define PREACTION(M) (0) +#endif /* PREACTION */ + +#ifndef POSTACTION +#define POSTACTION(M) +#endif /* POSTACTION */ + +#endif /* USE_LOCKS */ + +/* + CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. + USAGE_ERROR_ACTION is triggered on detected bad frees and + reallocs. The argument p is an address that might have triggered the + fault. It is ignored by the two predefined actions, but might be + useful in custom actions that try to help diagnose errors. +*/ + +#if PROCEED_ON_ERROR + +/* A count of the number of corruption errors causing resets */ +int malloc_corruption_error_count; + +/* default corruption action */ +static void reset_on_error(mstate m); + +#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) +#define USAGE_ERROR_ACTION(m, p) + +#else /* PROCEED_ON_ERROR */ + +#ifndef CORRUPTION_ERROR_ACTION +#define CORRUPTION_ERROR_ACTION(m) ABORT +#endif /* CORRUPTION_ERROR_ACTION */ + +#ifndef USAGE_ERROR_ACTION +#define USAGE_ERROR_ACTION(m,p) ABORT +#endif /* USAGE_ERROR_ACTION */ + +#endif /* PROCEED_ON_ERROR */ + +/* -------------------------- Debugging setup ---------------------------- */ + +#if ! DEBUG + +#define check_free_chunk(M,P) +#define check_inuse_chunk(M,P) +#define check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) +#define check_malloc_state(M) +#define check_top_chunk(M,P) + +#else /* DEBUG */ +#define check_free_chunk(M,P) do_check_free_chunk(M,P) +#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) +#define check_top_chunk(M,P) do_check_top_chunk(M,P) +#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) +#define check_malloc_state(M) do_check_malloc_state(M) + +static void do_check_any_chunk(mstate m, mchunkptr p); +static void do_check_top_chunk(mstate m, mchunkptr p); +static void do_check_mmapped_chunk(mstate m, mchunkptr p); +static void do_check_inuse_chunk(mstate m, mchunkptr p); +static void do_check_free_chunk(mstate m, mchunkptr p); +static void do_check_malloced_chunk(mstate m, void* mem, size_t s); +static void do_check_tree(mstate m, tchunkptr t); +static void do_check_treebin(mstate m, bindex_t i); +static void do_check_smallbin(mstate m, bindex_t i); +static void do_check_malloc_state(mstate m); +static int bin_find(mstate m, mchunkptr x); +static size_t traverse_and_check(mstate m); +#endif /* DEBUG */ + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) ((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I */ +#if defined(__GNUC__) && defined(i386) +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + __asm__("bsrl %1,%0\n\t" : "=r" (K) : "rm" (X));\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} +#else /* GNUC */ +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int Y = (unsigned int)X;\ + unsigned int N = ((Y - 0x100) >> 16) & 8;\ + unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ + N += K;\ + N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ + K = 14 - N + ((Y <<= K) >> 15);\ + I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ + }\ +} +#endif /* GNUC */ + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* index corresponding to given bit */ + +#if defined(__GNUC__) && defined(i386) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + __asm__("bsfl %1,%0\n\t" : "=r" (J) : "rm" (X));\ + I = (bindex_t)J;\ +} + +#else /* GNUC */ +#if USE_BUILTIN_FFS +#define compute_bit2idx(X, I) I = ffs(X)-1 + +#else /* USE_BUILTIN_FFS */ +#define compute_bit2idx(X, I)\ +{\ + unsigned int Y = X - 1;\ + unsigned int K = Y >> (16-4) & 16;\ + unsigned int N = K; Y >>= K;\ + N += K = Y >> (8-3) & 8; Y >>= K;\ + N += K = Y >> (4-2) & 4; Y >>= K;\ + N += K = Y >> (2-1) & 2; Y >>= K;\ + N += K = Y >> (1-0) & 1; Y >>= K;\ + I = (bindex_t)(N + Y);\ +} +#endif /* USE_BUILTIN_FFS */ +#endif /* GNUC */ + +/* isolate the least set bit of a bitmap */ +#define least_bit(x) ((x) & -(x)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | -(x<<1)) + +/* mask with all bits to left of or equal to least bit of x on */ +#define same_or_left_bits(x) ((x) | -(x)) + + +/* ----------------------- Runtime Check Support ------------------------- */ + +/* + For security, the main invariant is that malloc/free/etc never + writes to a static address other than malloc_state, unless static + malloc_state itself has been corrupted, which cannot occur via + malloc (because of these checks). In essence this means that we + believe all pointers, sizes, maps etc held in malloc_state, but + check all of those linked or offsetted from other embedded data + structures. These checks are interspersed with main code in a way + that tends to minimize their run-time cost. + + When FOOTERS is defined, in addition to range checking, we also + verify footer fields of inuse chunks, which can be used guarantee + that the mstate controlling malloc/free is intact. This is a + streamlined version of the approach described by William Robertson + et al in "Run-time Detection of Heap-based Overflows" LISA'03 + http://www.usenix.org/events/lisa03/tech/robertson.html The footer + of an inuse chunk holds the xor of its mstate and a random seed, + that is checked upon calls to free() and realloc(). This is + (probablistically) unguessable from outside the program, but can be + computed by any code successfully malloc'ing any chunk, so does not + itself provide protection against code that has already broken + security through some other means. Unlike Robertson et al, we + always dynamically check addresses of all offset chunks (previous, + next, etc). This turns out to be cheaper than relying on hashes. +*/ + +#if !INSECURE +/* Check if address a is at least as high as any from MORECORE or MMAP */ +#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) +/* Check if address of next chunk n is higher than base chunk p */ +#define ok_next(p, n) ((char*)(p) < (char*)(n)) +/* Check if p has its cinuse bit on */ +#define ok_cinuse(p) cinuse(p) +/* Check if p has its pinuse bit on */ +#define ok_pinuse(p) pinuse(p) + +#else /* !INSECURE */ +#define ok_address(M, a) (1) +#define ok_next(b, n) (1) +#define ok_cinuse(p) (1) +#define ok_pinuse(p) (1) +#endif /* !INSECURE */ + +#if (FOOTERS && !INSECURE) +/* Check if (alleged) mstate m has expected magic field */ +#define ok_magic(M) ((M)->magic == mparams.magic) +#else /* (FOOTERS && !INSECURE) */ +#define ok_magic(M) (1) +#endif /* (FOOTERS && !INSECURE) */ + + +/* In gcc, use __builtin_expect to minimize impact of checks */ +#if !INSECURE +#if defined(__GNUC__) && __GNUC__ >= 3 +#define RTCHECK(e) __builtin_expect(e, 1) +#else /* GNUC */ +#define RTCHECK(e) (e) +#endif /* GNUC */ +#else /* !INSECURE */ +#define RTCHECK(e) (1) +#endif /* !INSECURE */ + +/* macros to set up inuse chunks with or without footers */ + +#if !FOOTERS + +#define mark_inuse_foot(M,p,s) + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +#else /* FOOTERS */ + +/* Set foot of inuse chunk to be xor of mstate and seed */ +#define mark_inuse_foot(M,p,s)\ + (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) + +#define get_mstate_for(p)\ + ((mstate)(((mchunkptr)((char*)(p) +\ + (chunksize(p))))->prev_foot ^ mparams.magic)) + +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ + mark_inuse_foot(M,p,s)) + +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ + mark_inuse_foot(M,p,s)) + +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + mark_inuse_foot(M, p, s)) + +#endif /* !FOOTERS */ + +/* ---------------------------- setting mparams -------------------------- */ + +/* Initialize mparams */ +static int init_mparams(void) { + if (mparams.page_size == 0) { + size_t s; + + mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; + mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; +#else /* MORECORE_CONTIGUOUS */ + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; +#endif /* MORECORE_CONTIGUOUS */ + +#if (FOOTERS && !INSECURE) + { +#if USE_DEV_RANDOM + int fd; + unsigned char buf[sizeof(size_t)]; + /* Try to use /dev/urandom, else fall back on using time */ + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + read(fd, buf, sizeof(buf)) == sizeof(buf)) { + s = *((size_t *) buf); + close(fd); + } + else +#endif /* USE_DEV_RANDOM */ + s = (size_t)(time(0) ^ (size_t)0x55555555U); + + s |= (size_t)8U; /* ensure nonzero */ + s &= ~(size_t)7U; /* improve chances of fault for bad values */ + + } +#else /* (FOOTERS && !INSECURE) */ + s = (size_t)0x58585858U; +#endif /* (FOOTERS && !INSECURE) */ + ACQUIRE_MAGIC_INIT_LOCK(); + if (mparams.magic == 0) { + mparams.magic = s; + /* Set up lock for main malloc area */ + INITIAL_LOCK(&gm->mutex); + gm->mflags = mparams.default_mflags; + } + RELEASE_MAGIC_INIT_LOCK(); + +#if !defined(WIN32) && !defined(__OS2__) + mparams.page_size = malloc_getpagesize; + mparams.granularity = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : mparams.page_size); +#elif defined (__OS2__) + /* if low-memory is used, os2munmap() would break + if it were anything other than 64k */ + mparams.page_size = 4096u; + mparams.granularity = 65536u; +#else /* WIN32 */ + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + mparams.page_size = system_info.dwPageSize; + mparams.granularity = system_info.dwAllocationGranularity; + } +#endif /* WIN32 */ + + /* Sanity-check configuration: + size_t must be unsigned and as wide as pointer type. + ints must be at least 4 bytes. + alignment must be at least 8. + Alignment, min chunk size, and page size must all be powers of 2. + */ + if ((sizeof(size_t) != sizeof(char*)) || + (MAX_SIZE_T < MIN_CHUNK_SIZE) || + (sizeof(int) < 4) || + (MALLOC_ALIGNMENT < (size_t)8U) || + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || + ((mparams.granularity & (mparams.granularity-SIZE_T_ONE)) != 0) || + ((mparams.page_size & (mparams.page_size-SIZE_T_ONE)) != 0)) + ABORT; + } + return 0; +} + +/* support for mallopt */ +static int change_mparam(int param_number, int value) { + size_t val = (size_t)value; + init_mparams(); + switch(param_number) { + case M_TRIM_THRESHOLD: + mparams.trim_threshold = val; + return 1; + case M_GRANULARITY: + if (val >= mparams.page_size && ((val & (val-1)) == 0)) { + mparams.granularity = val; + return 1; + } + else + return 0; + case M_MMAP_THRESHOLD: + mparams.mmap_threshold = val; + return 1; + default: + return 0; + } +} + +#if DEBUG +/* ------------------------- Debugging Support --------------------------- */ + +/* Check properties of any chunk, whether free, inuse, mmapped etc */ +static void do_check_any_chunk(mstate m, mchunkptr p) { + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); +} + +/* Check properties of top chunk */ +static void do_check_top_chunk(mstate m, mchunkptr p) { + msegmentptr sp = segment_holding(m, (char*)p); + size_t sz = chunksize(p); + assert(sp != 0); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(sz == m->topsize); + assert(sz > 0); + assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); + assert(pinuse(p)); + assert(!next_pinuse(p)); +} + +/* Check properties of (inuse) mmapped chunks */ +static void do_check_mmapped_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + size_t len = (sz + (p->prev_foot & ~IS_MMAPPED_BIT) + MMAP_FOOT_PAD); + assert(is_mmapped(p)); + assert(use_mmap(m)); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(!is_small(sz)); + assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); + assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); + assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); +} + +/* Check properties of inuse chunks */ +static void do_check_inuse_chunk(mstate m, mchunkptr p) { + do_check_any_chunk(m, p); + assert(cinuse(p)); + assert(next_pinuse(p)); + /* If not pinuse and not mmapped, previous chunk has OK offset */ + assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); + if (is_mmapped(p)) + do_check_mmapped_chunk(m, p); +} + +/* Check properties of free chunks */ +static void do_check_free_chunk(mstate m, mchunkptr p) { + size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); + mchunkptr next = chunk_plus_offset(p, sz); + do_check_any_chunk(m, p); + assert(!cinuse(p)); + assert(!next_pinuse(p)); + assert (!is_mmapped(p)); + if (p != m->dv && p != m->top) { + if (sz >= MIN_CHUNK_SIZE) { + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(is_aligned(chunk2mem(p))); + assert(next->prev_foot == sz); + assert(pinuse(p)); + assert (next == m->top || cinuse(next)); + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_T_SIZE */ + assert(sz == SIZE_T_SIZE); + } +} + +/* Check properties of malloced chunks at the point they are malloced */ +static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); + do_check_inuse_chunk(m, p); + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(sz >= MIN_CHUNK_SIZE); + assert(sz >= s); + /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ + assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); + } +} + +/* Check a tree and its subtrees. */ +static void do_check_tree(mstate m, tchunkptr t) { + tchunkptr head = 0; + tchunkptr u = t; + bindex_t tindex = t->index; + size_t tsize = chunksize(t); + bindex_t idx; + compute_tree_index(tsize, idx); + assert(tindex == idx); + assert(tsize >= MIN_LARGE_SIZE); + assert(tsize >= minsize_for_tree_index(idx)); + assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); + + do { /* traverse through chain of same-sized nodes */ + do_check_any_chunk(m, ((mchunkptr)u)); + assert(u->index == tindex); + assert(chunksize(u) == tsize); + assert(!cinuse(u)); + assert(!next_pinuse(u)); + assert(u->fd->bk == u); + assert(u->bk->fd == u); + if (u->parent == 0) { + assert(u->child[0] == 0); + assert(u->child[1] == 0); + } + else { + assert(head == 0); /* only one node on chain has parent */ + head = u; + assert(u->parent != u); + assert (u->parent->child[0] == u || + u->parent->child[1] == u || + *((tbinptr*)(u->parent)) == u); + if (u->child[0] != 0) { + assert(u->child[0]->parent == u); + assert(u->child[0] != u); + do_check_tree(m, u->child[0]); + } + if (u->child[1] != 0) { + assert(u->child[1]->parent == u); + assert(u->child[1] != u); + do_check_tree(m, u->child[1]); + } + if (u->child[0] != 0 && u->child[1] != 0) { + assert(chunksize(u->child[0]) < chunksize(u->child[1])); + } + } + u = u->fd; + } while (u != t); + assert(head != 0); +} + +/* Check all the chunks in a treebin. */ +static void do_check_treebin(mstate m, bindex_t i) { + tbinptr* tb = treebin_at(m, i); + tchunkptr t = *tb; + int empty = (m->treemap & (1U << i)) == 0; + if (t == 0) + assert(empty); + if (!empty) + do_check_tree(m, t); +} + +/* Check all the chunks in a smallbin. */ +static void do_check_smallbin(mstate m, bindex_t i) { + sbinptr b = smallbin_at(m, i); + mchunkptr p = b->bk; + unsigned int empty = (m->smallmap & (1U << i)) == 0; + if (p == b) + assert(empty); + if (!empty) { + for (; p != b; p = p->bk) { + size_t size = chunksize(p); + mchunkptr q; + /* each chunk claims to be free */ + do_check_free_chunk(m, p); + /* chunk belongs in bin */ + assert(small_index(size) == i); + assert(p->bk == b || chunksize(p->bk) == chunksize(p)); + /* chunk is followed by an inuse chunk */ + q = next_chunk(p); + if (q->head != FENCEPOST_HEAD) + do_check_inuse_chunk(m, q); + } + } +} + +/* Find x in a bin. Used in other check functions. */ +static int bin_find(mstate m, mchunkptr x) { + size_t size = chunksize(x); + if (is_small(size)) { + bindex_t sidx = small_index(size); + sbinptr b = smallbin_at(m, sidx); + if (smallmap_is_marked(m, sidx)) { + mchunkptr p = b; + do { + if (p == x) + return 1; + } while ((p = p->fd) != b); + } + } + else { + bindex_t tidx; + compute_tree_index(size, tidx); + if (treemap_is_marked(m, tidx)) { + tchunkptr t = *treebin_at(m, tidx); + size_t sizebits = size << leftshift_for_tree_index(tidx); + while (t != 0 && chunksize(t) != size) { + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + sizebits <<= 1; + } + if (t != 0) { + tchunkptr u = t; + do { + if (u == (tchunkptr)x) + return 1; + } while ((u = u->fd) != t); + } + } + } + return 0; +} + +/* Traverse each chunk and check it; return total */ +static size_t traverse_and_check(mstate m) { + size_t sum = 0; + if (is_initialized(m)) { + msegmentptr s = &m->seg; + sum += m->topsize + TOP_FOOT_SIZE; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + mchunkptr lastq = 0; + assert(pinuse(q)); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + sum += chunksize(q); + if (cinuse(q)) { + assert(!bin_find(m, q)); + do_check_inuse_chunk(m, q); + } + else { + assert(q == m->dv || bin_find(m, q)); + assert(lastq == 0 || cinuse(lastq)); /* Not 2 consecutive free */ + do_check_free_chunk(m, q); + } + lastq = q; + q = next_chunk(q); + } + s = s->next; + } + } + return sum; +} + +/* Check all properties of malloc_state. */ +static void do_check_malloc_state(mstate m) { + bindex_t i; + size_t total; + /* check bins */ + for (i = 0; i < NSMALLBINS; ++i) + do_check_smallbin(m, i); + for (i = 0; i < NTREEBINS; ++i) + do_check_treebin(m, i); + + if (m->dvsize != 0) { /* check dv chunk */ + do_check_any_chunk(m, m->dv); + assert(m->dvsize == chunksize(m->dv)); + assert(m->dvsize >= MIN_CHUNK_SIZE); + assert(bin_find(m, m->dv) == 0); + } + + if (m->top != 0) { /* check top chunk */ + do_check_top_chunk(m, m->top); + assert(m->topsize == chunksize(m->top)); + assert(m->topsize > 0); + assert(bin_find(m, m->top) == 0); + } + + total = traverse_and_check(m); + assert(total <= m->footprint); + assert(m->footprint <= m->max_footprint); +} +#endif /* DEBUG */ + +/* ----------------------------- statistics ------------------------------ */ + +#if !NO_MALLINFO +static struct mallinfo internal_mallinfo(mstate m) { + struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + if (!PREACTION(m)) { + check_malloc_state(m); + if (is_initialized(m)) { + size_t nfree = SIZE_T_ONE; /* top always free */ + size_t mfree = m->topsize + TOP_FOOT_SIZE; + size_t sum = mfree; + msegmentptr s = &m->seg; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + size_t sz = chunksize(q); + sum += sz; + if (!cinuse(q)) { + mfree += sz; + ++nfree; + } + q = next_chunk(q); + } + s = s->next; + } + + nm.arena = sum; + nm.ordblks = nfree; + nm.hblkhd = m->footprint - sum; + nm.usmblks = m->max_footprint; + nm.uordblks = m->footprint - mfree; + nm.fordblks = mfree; + nm.keepcost = m->topsize; + } + + POSTACTION(m); + } + return nm; +} +#endif /* !NO_MALLINFO */ + +static void internal_malloc_stats(mstate m) { + if (!PREACTION(m)) { + size_t maxfp = 0; + size_t fp = 0; + size_t used = 0; + check_malloc_state(m); + if (is_initialized(m)) { + msegmentptr s = &m->seg; + maxfp = m->max_footprint; + fp = m->footprint; + used = fp - (m->topsize + TOP_FOOT_SIZE); + + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + if (!cinuse(q)) + used -= chunksize(q); + q = next_chunk(q); + } + s = s->next; + } + } + + fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); + fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); + fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); + + POSTACTION(m); + } +} + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* + Various forms of linking and unlinking are defined as macros. Even + the ones for trees, which are very long but have very short typical + paths. This is ugly but reduces reliance on inlining support of + compilers. +*/ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + assert(S >= MIN_CHUNK_SIZE);\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, B->fd)))\ + F = B->fd;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (F == B)\ + clear_smallmap(M, I);\ + else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\ + (B == smallbin_at(M,I) || ok_address(M, B)))) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (B == F)\ + clear_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, F))) {\ + B->fd = F;\ + F->bk = B;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + assert(is_small(DVS));\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr* H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + }\ + else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0)\ + T = *C;\ + else if (RTCHECK(ok_address(M, C))) {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + else {\ + tchunkptr F = T->fd;\ + if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + }\ + }\ +} + +/* + Unlink steps: + + 1. If x is a chained node, unlink it from its same-sized fd/bk links + and choose its bk node as its replacement. + 2. If x was the last node of its size, but not a leaf node, it must + be replaced with a leaf node (not merely one with an open left or + right), to make sure that lefts and rights of descendents + correspond properly to bit masks. We use the rightmost descendent + of x. We could use any other leaf, but this is easy to locate and + tends to counteract removal of leftmosts elsewhere, and so keeps + paths shorter than minimally guaranteed. This doesn't loop much + because on average a node in a tree is near the bottom. + 3. If x is the base of a chain (i.e., has parent links) relink + x's parent and children to x's replacement (or null if none). +*/ + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + if (RTCHECK(ok_address(M, F))) {\ + F->bk = R;\ + R->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + tchunkptr* RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr* CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + if (RTCHECK(ok_address(M, RP)))\ + *RP = 0;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + }\ + if (XP != 0) {\ + tbinptr* H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + }\ + else if (RTCHECK(ok_address(M, XP))) {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + if (R != 0) {\ + if (RTCHECK(ok_address(M, R))) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + if (RTCHECK(ok_address(M, C0))) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + if ((C1 = X->child[1]) != 0) {\ + if (RTCHECK(ok_address(M, C1))) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) insert_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) unlink_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + + +/* Relays to internal calls to malloc/free from realloc, memalign etc */ + +#if ONLY_MSPACES +#define internal_malloc(m, b) mspace_malloc(m, b) +#define internal_free(m, mem) mspace_free(m,mem); +#else /* ONLY_MSPACES */ +#if MSPACES +#define internal_malloc(m, b)\ + (m == gm)? dlmalloc(b) : mspace_malloc(m, b) +#define internal_free(m, mem)\ + if (m == gm) dlfree(mem); else mspace_free(m,mem); +#else /* MSPACES */ +#define internal_malloc(m, b) dlmalloc(b) +#define internal_free(m, mem) dlfree(mem) +#endif /* MSPACES */ +#endif /* ONLY_MSPACES */ + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +/* + Directly mmapped chunks are set up with an offset to the start of + the mmapped region stored in the prev_foot field of the chunk. This + allows reconstruction of the required argument to MUNMAP when freed, + and also allows adjustment of the returned chunk to meet alignment + requirements (especially in memalign). There is also enough space + allocated to hold a fake next chunk of size SIZE_T_SIZE to maintain + the PINUSE bit so frees can be checked. +*/ + +/* Malloc using mmap */ +static void* mmap_alloc(mstate m, size_t nb) { + size_t mmsize = granularity_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (mmsize > nb) { /* Check for wrap around 0 */ + char* mm = (char*)(DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - MMAP_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset | IS_MMAPPED_BIT; + (p)->head = (psize|CINUSE_BIT); + mark_inuse_foot(m, p, psize); + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + + if (mm < m->least_addr) + m->least_addr = mm; + if ((m->footprint += mmsize) > m->max_footprint) + m->max_footprint = m->footprint; + assert(is_aligned(chunk2mem(p))); + check_mmapped_chunk(m, p); + return chunk2mem(p); + } + } + return 0; +} + +/* Realloc using mmap */ +static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { + size_t oldsize = chunksize(oldp); + if (is_small(nb)) /* Can't shrink mmap regions below small size */ + return 0; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (mparams.granularity << 1)) + return oldp; + else { + size_t offset = oldp->prev_foot & ~IS_MMAPPED_BIT; + size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; + size_t newmmsize = granularity_align(nb + SIX_SIZE_T_SIZES + + CHUNK_ALIGN_MASK); + char* cp = (char*)CALL_MREMAP((char*)oldp - offset, + oldmmsize, newmmsize, 1); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - MMAP_FOOT_PAD; + newp->head = (psize|CINUSE_BIT); + mark_inuse_foot(m, newp, psize); + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + + if (cp < m->least_addr) + m->least_addr = cp; + if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) + m->max_footprint = m->footprint; + check_mmapped_chunk(m, newp); + return newp; + } + } + return 0; +} + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) { + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char*)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = mparams.trim_threshold; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) { + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; ++i) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +#if PROCEED_ON_ERROR + +/* default corruption action */ +static void reset_on_error(mstate m) { + int i; + ++malloc_corruption_error_count; + /* Reinitialize fields to forget about all memory */ + m->smallbins = m->treebins = 0; + m->dvsize = m->topsize = 0; + m->seg.base = 0; + m->seg.size = 0; + m->seg.next = 0; + m->top = m->dv = 0; + for (i = 0; i < NTREEBINS; ++i) + *treebin_at(m, i) = 0; + init_bins(m); +} +#endif /* PROCEED_ON_ERROR */ + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void* prepend_alloc(mstate m, char* newbase, char* oldbase, + size_t nb) { + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (char*)oldfirst - (char*)p; + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + assert((char*)oldfirst > (char*)q); + assert(pinuse(oldfirst)); + assert(qsize >= MIN_CHUNK_SIZE); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + check_top_chunk(m, q); + } + else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } + else { + if (!cinuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + check_free_chunk(m, q); + } + + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); +} + + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { + /* Determine locations and sizes of segment, fenceposts, old top */ + char* old_top = (char*)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char* old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char* asp = rawsp + offset; + char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + int nfences = 0; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + assert(is_aligned(ss)); + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + set_segment_flags(&m->seg, mmapped); + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + ++nfences; + if ((char*)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + assert(nfences >= 2); + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = csp - old_top; + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } + + check_top_chunk(m, m->top); +} + +/* -------------------------- System allocation -------------------------- */ + +/* Get memory from system using MORECORE or MMAP */ +static void* sys_alloc(mstate m, size_t nb) { + char* tbase = CMFAIL; + size_t tsize = 0; + flag_t mmap_flag = 0; + + init_mparams(); + + /* Directly map large chunks */ + if (use_mmap(m) && nb >= mparams.mmap_threshold) { + void* mem = mmap_alloc(m, nb); + if (mem != 0) + return mem; + } + + /* + Try getting memory in any of three ways (in most-preferred to + least-preferred order): + 1. A call to MORECORE that can normally contiguously extend memory. + (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or + or main space is mmapped or a previous contiguous call failed) + 2. A call to MMAP new space (disabled if not HAVE_MMAP). + Note that under the default settings, if MORECORE is unable to + fulfill a request, and HAVE_MMAP is true, then mmap is + used as a noncontiguous system allocator. This is a useful backup + strategy for systems with holes in address spaces -- in this case + sbrk cannot contiguously expand the heap, but mmap may be able to + find space. + 3. A call to MORECORE that cannot usually contiguously extend memory. + (disabled if not HAVE_MORECORE) + */ + + if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { + char* br = CMFAIL; + msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); + size_t asize = 0; + ACQUIRE_MORECORE_LOCK(); + + if (ss == 0) { /* First time through or recovery */ + char* base = (char*)CALL_MORECORE(0); + if (base != CMFAIL) { + asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE); + /* Adjust to end on a page boundary */ + if (!is_page_aligned(base)) + asize += (page_align((size_t)base) - (size_t)base); + /* Can't call MORECORE if size is negative when treated as signed */ + if (asize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(asize))) == base) { + tbase = base; + tsize = asize; + } + } + } + else { + /* Subtract out existing available top space from MORECORE request. */ + asize = granularity_align(nb - m->topsize + TOP_FOOT_SIZE + SIZE_T_ONE); + /* Use mem here only if it did continuously extend old space */ + if (asize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { + tbase = br; + tsize = asize; + } + } + + if (tbase == CMFAIL) { /* Cope with partial failure */ + if (br != CMFAIL) { /* Try to use/extend the space we did get */ + if (asize < HALF_MAX_SIZE_T && + asize < nb + TOP_FOOT_SIZE + SIZE_T_ONE) { + size_t esize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE - asize); + if (esize < HALF_MAX_SIZE_T) { + char* end = (char*)CALL_MORECORE(esize); + if (end != CMFAIL) + asize += esize; + else { /* Can't use; try to release */ + (void)CALL_MORECORE(-asize); + br = CMFAIL; + } + } + } + } + if (br != CMFAIL) { /* Use the space we did get */ + tbase = br; + tsize = asize; + } + else + disable_contiguous(m); /* Don't try contiguous path in the future */ + } + + RELEASE_MORECORE_LOCK(); + } + + if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ + size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; + size_t rsize = granularity_align(req); + if (rsize > nb) { /* Fail if wraps around zero */ + char* mp = (char*)(CALL_MMAP(rsize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = rsize; + mmap_flag = IS_MMAPPED_BIT; + } + } + } + + if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ + size_t asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE); + if (asize < HALF_MAX_SIZE_T) { + char* br = CMFAIL; + char* end = CMFAIL; + ACQUIRE_MORECORE_LOCK(); + br = (char*)(CALL_MORECORE(asize)); + end = (char*)(CALL_MORECORE(0)); + RELEASE_MORECORE_LOCK(); + if (br != CMFAIL && end != CMFAIL && br < end) { + size_t ssize = end - br; + if (ssize > nb + TOP_FOOT_SIZE) { + tbase = br; + tsize = ssize; + } + } + } + } + + if (tbase != CMFAIL) { + + if ((m->footprint += tsize) > m->max_footprint) + m->max_footprint = m->footprint; + + if (!is_initialized(m)) { /* first-time initialization */ + m->seg.base = m->least_addr = tbase; + m->seg.size = tsize; + set_segment_flags(&m->seg, mmap_flag); + m->magic = mparams.magic; + init_bins(m); + if (is_global(m)) + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + else { + /* Offset top by embedded malloc_state */ + mchunkptr mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); + } + } + + else { + /* Try to merge with an existing segment */ + msegmentptr sp = &m->seg; + while (sp != 0 && tbase != sp->base + sp->size) + sp = sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + check_segment_merge(sp, tbase, tsize) && + (get_segment_flags(sp) & IS_MMAPPED_BIT) == mmap_flag && + segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } + else { + if (tbase < m->least_addr) + m->least_addr = tbase; + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + check_segment_merge(sp, tbase, tsize) && + (get_segment_flags(sp) & IS_MMAPPED_BIT) == mmap_flag) { + char* oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } + else + add_segment(m, tbase, tsize, mmap_flag); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + check_top_chunk(m, m->top); + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); + } + } + + MALLOC_FAILURE_ACTION; + return 0; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) { + size_t released = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!cinuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + assert(segment_holds(sp, (char*)sp)); + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + m->footprint -= size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } + else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + pred = sp; + sp = next; + } + return released; +} + +static int sys_trim(mstate m, size_t pad) { + size_t released = 0; + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = mparams.granularity; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char*)m->top); + + if (!is_extern_segment(sp)) { + if (is_mmapped_segment(sp)) { + if (HAVE_MMAP && + sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ + size_t newsize = sp->size - extra; + /* Prefer mremap, fall back to munmap */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + } + else if (HAVE_MORECORE) { + if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ + extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; + ACQUIRE_MORECORE_LOCK(); + { + /* Make sure end of memory is where we last set it. */ + char* old_br = (char*)(CALL_MORECORE(0)); + if (old_br == sp->base + sp->size) { + char* rel_br = (char*)(CALL_MORECORE(-extra)); + char* new_br = (char*)(CALL_MORECORE(0)); + if (rel_br != CMFAIL && new_br < old_br) + released = old_br - new_br; + } + } + RELEASE_MORECORE_LOCK(); + } + } + + if (released != 0) { + sp->size -= released; + m->footprint -= released; + init_top(m, m->top, m->topsize - released); + check_top_chunk(m, m->top); + } + } + + /* Unmap any unused mmapped segments */ + if (HAVE_MMAP) + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + +/* ---------------------------- malloc support --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void* tmalloc_large(mstate m, size_t nb) { + tchunkptr v = 0; + size_t rsize = -nb; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) { + bindex_t i; + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + t = *treebin_at(m, i); + } + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return 0 so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + if (RTCHECK(ok_address(m, v))) { /* split */ + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + } + CORRUPTION_ERROR_ACTION(m); + } + return 0; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void* tmalloc_small(mstate m, size_t nb) { + tchunkptr t, v; + size_t rsize; + bindex_t i; + binmap_t leastbit = least_bit(m->treemap); + compute_bit2idx(leastbit, i); + + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + if (RTCHECK(ok_address(m, v))) { + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); + } + } + + CORRUPTION_ERROR_ACTION(m); + return 0; +} + +/* --------------------------- realloc support --------------------------- */ + +static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + return 0; + } + if (!PREACTION(m)) { + mchunkptr oldp = mem2chunk(oldmem); + size_t oldsize = chunksize(oldp); + mchunkptr next = chunk_plus_offset(oldp, oldsize); + mchunkptr newp = 0; + void* extra = 0; + + /* Try to either shrink or extend into top. Else malloc-copy-free */ + + if (RTCHECK(ok_address(m, oldp) && ok_cinuse(oldp) && + ok_next(oldp, next) && ok_pinuse(next))) { + size_t nb = request2size(bytes); + if (is_mmapped(oldp)) + newp = mmap_resize(m, oldp, nb); + else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + newp = oldp; + if (rsize >= MIN_CHUNK_SIZE) { + mchunkptr remainder = chunk_plus_offset(newp, nb); + set_inuse(m, newp, nb); + set_inuse(m, remainder, rsize); + extra = chunk2mem(remainder); + } + } + else if (next == m->top && oldsize + m->topsize > nb) { + /* Expand into top */ + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(oldp, nb); + set_inuse(m, oldp, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = oldp; + } + } + else { + USAGE_ERROR_ACTION(m, oldmem); + POSTACTION(m); + return 0; + } + + POSTACTION(m); + + if (newp != 0) { + if (extra != 0) { + internal_free(m, extra); + } + check_inuse_chunk(m, newp); + return chunk2mem(newp); + } + else { + void* newmem = internal_malloc(m, bytes); + if (newmem != 0) { + size_t oc = oldsize - overhead_for(oldp); + memcpy(newmem, oldmem, (oc < bytes)? oc : bytes); + internal_free(m, oldmem); + } + return newmem; + } + } + return 0; +} + +/* --------------------------- memalign support -------------------------- */ + +static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { + if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */ + return internal_malloc(m, bytes); + if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ + alignment = MIN_CHUNK_SIZE; + if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ + size_t a = MALLOC_ALIGNMENT << 1; + while (a < alignment) a <<= 1; + alignment = a; + } + + if (bytes >= MAX_REQUEST - alignment) { + if (m != 0) { /* Test isn't needed but avoids compiler warning */ + MALLOC_FAILURE_ACTION; + } + } + else { + size_t nb = request2size(bytes); + size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; + char* mem = (char*)internal_malloc(m, req); + if (mem != 0) { + void* leader = 0; + void* trailer = 0; + mchunkptr p = mem2chunk(mem); + + if (PREACTION(m)) return 0; + if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */ + /* + Find an aligned spot inside chunk. Since we need to give + back leading space in a chunk of at least MIN_CHUNK_SIZE, if + the first calculation places us at a spot with less than + MIN_CHUNK_SIZE leader, we can move to the next aligned spot. + We've allocated enough total room so that this is always + possible. + */ + char* br = (char*)mem2chunk((size_t)(((size_t)(mem + + alignment - + SIZE_T_ONE)) & + -alignment)); + char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? + br : br+alignment; + mchunkptr newp = (mchunkptr)pos; + size_t leadsize = pos - (char*)(p); + size_t newsize = chunksize(p) - leadsize; + + if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ + newp->prev_foot = p->prev_foot + leadsize; + newp->head = (newsize|CINUSE_BIT); + } + else { /* Otherwise, give back leader, use the rest */ + set_inuse(m, newp, newsize); + set_inuse(m, p, leadsize); + leader = chunk2mem(p); + } + p = newp; + } + + /* Give back spare room at the end */ + if (!is_mmapped(p)) { + size_t size = chunksize(p); + if (size > nb + MIN_CHUNK_SIZE) { + size_t remainder_size = size - nb; + mchunkptr remainder = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, remainder, remainder_size); + trailer = chunk2mem(remainder); + } + } + + assert (chunksize(p) >= nb); + assert((((size_t)(chunk2mem(p))) % alignment) == 0); + check_inuse_chunk(m, p); + POSTACTION(m); + if (leader != 0) { + internal_free(m, leader); + } + if (trailer != 0) { + internal_free(m, trailer); + } + return chunk2mem(p); + } + } + return 0; +} + +/* ------------------------ comalloc/coalloc support --------------------- */ + +static void** ialloc(mstate m, + size_t n_elements, + size_t* sizes, + int opts, + void* chunks[]) { + /* + This provides common support for independent_X routines, handling + all of the combinations that can result. + + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed + */ + + size_t element_size; /* chunksize of each element, if all same */ + size_t contents_size; /* total size of elements */ + size_t array_size; /* request size of pointer array */ + void* mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + size_t remainder_size; /* remaining bytes while splitting */ + void** marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + flag_t was_enabled; /* to disable mmap */ + size_t size; + size_t i; + + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) + return chunks; /* nothing to do */ + marray = chunks; + array_size = 0; + } + else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) + return (void**)internal_malloc(m, 0); + marray = 0; + array_size = request2size(n_elements * (sizeof(void*))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + size = contents_size + array_size; + + /* + Allocate the aggregate chunk. First disable direct-mmapping so + malloc won't use it, since we would not be able to later + free/realloc space internal to a segregated mmap region. + */ + was_enabled = use_mmap(m); + disable_mmap(m); + mem = internal_malloc(m, size - CHUNK_OVERHEAD); + if (was_enabled) + enable_mmap(m); + if (mem == 0) + return 0; + + if (PREACTION(m)) return 0; + p = mem2chunk(mem); + remainder_size = chunksize(p); + + assert(!is_mmapped(p)); + + if (opts & 0x2) { /* optionally clear the elements */ + memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + size_t array_chunk_size; + array_chunk = chunk_plus_offset(p, contents_size); + array_chunk_size = remainder_size - contents_size; + marray = (void**) (chunk2mem(array_chunk)); + set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements-1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_size_and_pinuse_of_inuse_chunk(m, p, size); + p = chunk_plus_offset(p, size); + } + else { /* the final element absorbs any overallocation slop */ + set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); + break; + } + } + +#if DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) { + assert(remainder_size == element_size); + } + else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(m, mem2chunk(marray)); + } + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(m, mem2chunk(marray[i])); + +#endif /* DEBUG */ + + POSTACTION(m); + return marray; +} + + +/* -------------------------- public routines ---------------------------- */ + +#if !ONLY_MSPACES + +void* dlmalloc(size_t bytes) { + /* + Basic algorithm: + If a small request (< 256 bytes minus per-chunk overhead): + 1. If one exists, use a remainderless chunk in associated smallbin. + (Remainderless means that there are too few excess bytes to + represent as a chunk.) + 2. If it is big enough, use the dv chunk, which is normally the + chunk adjacent to the one used for the most recent small request. + 3. If one exists, split the smallest available chunk in a bin, + saving remainder in dv. + 4. If it is big enough, use the top chunk. + 5. If available, get memory from system and use it + Otherwise, for a large request: + 1. Find the smallest available binned chunk that fits, and use it + if it is better fitting than dv chunk, splitting if necessary. + 2. If better fitting than any binned chunk, use the dv chunk. + 3. If it is big enough, use the top chunk. + 4. If request size >= mmap threshold, try to directly mmap this chunk. + 5. If available, get memory from system and use it + + The ugly goto's here ensure that postaction occurs along all paths. + */ + + if (!PREACTION(gm)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = gm->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(gm, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(gm, b, p, idx); + set_inuse_and_pinuse(gm, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb > gm->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(gm, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(gm, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(gm, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(gm, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + + if (nb <= gm->dvsize) { + size_t rsize = gm->dvsize - nb; + mchunkptr p = gm->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = gm->dv = chunk_plus_offset(p, nb); + gm->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + } + else { /* exhaust dv */ + size_t dvs = gm->dvsize; + gm->dvsize = 0; + gm->dv = 0; + set_inuse_and_pinuse(gm, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb < gm->topsize) { /* Split top */ + size_t rsize = gm->topsize -= nb; + mchunkptr p = gm->top; + mchunkptr r = gm->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + mem = chunk2mem(p); + check_top_chunk(gm, gm->top); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + mem = sys_alloc(gm, nb); + + postaction: + POSTACTION(gm); + return mem; + } + + return 0; +} + +void dlfree(void* mem) { + /* + Consolidate freed chunks with preceding or succeeding bordering + free chunks, if they exist, and then place in a bin. Intermixed + with special cases for top, dv, mmapped chunks, and usage errors. + */ + + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } +#else /* FOOTERS */ +#define fm gm +#endif /* FOOTERS */ + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if ((prevsize & IS_MMAPPED_BIT) != 0) { + prevsize &= ~IS_MMAPPED_BIT; + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + insert_chunk(fm, p, psize); + check_free_chunk(fm, p); + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +#if !FOOTERS +#undef fm +#endif /* FOOTERS */ +} + +void* dlcalloc(size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = dlmalloc(req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* dlrealloc(void* oldmem, size_t bytes) { + if (oldmem == 0) + return dlmalloc(bytes); +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + dlfree(oldmem); + return 0; + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(mem2chunk(oldmem)); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + return internal_realloc(m, oldmem, bytes); + } +} + +void* dlmemalign(size_t alignment, size_t bytes) { + return internal_memalign(gm, alignment, bytes); +} + +void** dlindependent_calloc(size_t n_elements, size_t elem_size, + void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + return ialloc(gm, n_elements, &sz, 3, chunks); +} + +void** dlindependent_comalloc(size_t n_elements, size_t sizes[], + void* chunks[]) { + return ialloc(gm, n_elements, sizes, 0, chunks); +} + +void* dlvalloc(size_t bytes) { + size_t pagesz; + init_mparams(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, bytes); +} + +void* dlpvalloc(size_t bytes) { + size_t pagesz; + init_mparams(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); +} + +int dlmalloc_trim(size_t pad) { + int result = 0; + if (!PREACTION(gm)) { + result = sys_trim(gm, pad); + POSTACTION(gm); + } + return result; +} + +size_t dlmalloc_footprint(void) { + return gm->footprint; +} + +size_t dlmalloc_max_footprint(void) { + return gm->max_footprint; +} + +#if !NO_MALLINFO +struct mallinfo dlmallinfo(void) { + return internal_mallinfo(gm); +} +#endif /* NO_MALLINFO */ + +void dlmalloc_stats() { + internal_malloc_stats(gm); +} + +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (cinuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +int dlmallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* !ONLY_MSPACES */ + +/* ----------------------------- user mspaces ---------------------------- */ + +#if MSPACES + +static mstate init_user_mstate(char* tbase, size_t tsize) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + INITIAL_LOCK(&m->mutex); + msp->head = (msize|PINUSE_BIT|CINUSE_BIT); + m->seg.base = m->least_addr = tbase; + m->seg.size = m->footprint = m->max_footprint = tsize; + m->magic = mparams.magic; + m->mflags = mparams.default_mflags; + disable_contiguous(m); + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); + check_top_chunk(m, m->top); + return m; +} + +mspace create_mspace(size_t capacity, int locked) { + mstate m = 0; + size_t msize = pad_request(sizeof(struct malloc_state)); + init_mparams(); /* Ensure pagesize etc initialized */ + + if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + size_t rs = ((capacity == 0)? mparams.granularity : + (capacity + TOP_FOOT_SIZE + msize)); + size_t tsize = granularity_align(rs); + char* tbase = (char*)(CALL_MMAP(tsize)); + if (tbase != CMFAIL) { + m = init_user_mstate(tbase, tsize); + set_segment_flags(&m->seg, IS_MMAPPED_BIT); + set_lock(m, locked); + } + } + return (mspace)m; +} + +mspace create_mspace_with_base(void* base, size_t capacity, int locked) { + mstate m = 0; + size_t msize = pad_request(sizeof(struct malloc_state)); + init_mparams(); /* Ensure pagesize etc initialized */ + + if (capacity > msize + TOP_FOOT_SIZE && + capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + m = init_user_mstate((char*)base, capacity); + set_segment_flags(&m->seg, EXTERN_BIT); + set_lock(m, locked); + } + return (mspace)m; +} + +size_t destroy_mspace(mspace msp) { + size_t freed = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + msegmentptr sp = &ms->seg; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + flag_t flag = get_segment_flags(sp); + sp = sp->next; + if ((flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) && + CALL_MUNMAP(base, size) == 0) + freed += size; + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return freed; +} + +/* + mspace versions of routines are near-clones of the global + versions. This is not so nice but better than the alternatives. +*/ + + +void* mspace_malloc(mspace msp, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (!PREACTION(ms)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(ms, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(ms, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } + else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + check_top_chunk(ms, ms->top); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + mem = sys_alloc(ms, nb); + + postaction: + POSTACTION(ms); + return mem; + } + + return 0; +} + +void mspace_free(mspace msp, void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); +#else /* FOOTERS */ + mstate fm = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if ((prevsize & IS_MMAPPED_BIT) != 0) { + prevsize &= ~IS_MMAPPED_BIT; + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + insert_chunk(fm, p, psize); + check_free_chunk(fm, p); + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +} + +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = internal_malloc(ms, req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { + if (oldmem == 0) + return mspace_malloc(msp, bytes); +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + mspace_free(msp, oldmem); + return 0; + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { +#if FOOTERS + mchunkptr p = mem2chunk(oldmem); + mstate ms = get_mstate_for(p); +#else /* FOOTERS */ + mstate ms = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return internal_realloc(ms, oldmem, bytes); + } +} + +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return internal_memalign(ms, alignment, bytes); +} + +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, &sz, 3, chunks); +} + +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, sizes, 0, chunks); +} + +int mspace_trim(mspace msp, size_t pad) { + int result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + result = sys_trim(ms, pad); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +void mspace_malloc_stats(mspace msp) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + internal_malloc_stats(ms); + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} + +size_t mspace_footprint(mspace msp) { + size_t result; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->footprint; + } + USAGE_ERROR_ACTION(ms,ms); + return result; +} + + +size_t mspace_max_footprint(mspace msp) { + size_t result; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->max_footprint; + } + USAGE_ERROR_ACTION(ms,ms); + return result; +} + + +#if !NO_MALLINFO +struct mallinfo mspace_mallinfo(mspace msp) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + } + return internal_mallinfo(ms); +} +#endif /* NO_MALLINFO */ + +int mspace_mallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* MSPACES */ + +/* -------------------- Alternative MORECORE functions ------------------- */ + +/* + Guidelines for creating a custom version of MORECORE: + + * For best performance, MORECORE should allocate in multiples of pagesize. + * MORECORE may allocate more memory than requested. (Or even less, + but this will usually result in a malloc failure.) + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. + * For best performance, consecutive calls to MORECORE with positive + arguments should return increasing addresses, indicating that + space has been contiguously extended. + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + * MORECORE need not handle negative arguments -- it may instead + just return MFAIL when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + As an example alternative MORECORE, here is a custom allocator + kindly contributed for pre-OSX macOS. It uses virtually but not + necessarily physically contiguous non-paged memory (locked in, + present and won't get swapped out). You can use it by uncommenting + this section, adding some #includes, and setting up the appropriate + defines above: + + #define MORECORE osMoreCore + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024U) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MFAIL; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MFAIL; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + +/* ----------------------------------------------------------------------- +History: + V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) + * Add max_footprint functions + * Ensure all appropriate literals are size_t + * Fix conditional compilation problem for some #define settings + * Avoid concatenating segments with the one provided + in create_mspace_with_base + * Rename some variables to avoid compiler shadowing warnings + * Use explicit lock initialization. + * Better handling of sbrk interference. + * Simplify and fix segment insertion, trimming and mspace_destroy + * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x + * Thanks especially to Dennis Flanagan for help on these. + + V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) + * Fix memalign brace error. + + V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) + * Fix improper #endif nesting in C++ + * Add explicit casts needed for C++ + + V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) + * Use trees for large bins + * Support mspaces + * Use segments to unify sbrk-based and mmap-based system allocation, + removing need for emulation on most platforms without sbrk. + * Default safety checks + * Optional footer checks. Thanks to William Robertson for the idea. + * Internal code refactoring + * Incorporate suggestions and platform-specific changes. + Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, + Aaron Bachmann, Emery Berger, and others. + * Speed up non-fastbin processing enough to remove fastbins. + * Remove useless cfree() to avoid conflicts with other apps. + * Remove internal memcpy, memset. Compilers handle builtins better. + * Remove some options that no one ever used and rename others. + + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>. + Thanks also to Andreas Mueller <a.mueller at paradatec.de>, + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sysmalloc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett <tbennett@nvidia.com> and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com> + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ diff --git a/libffi/src/frv/eabi.S b/libffi/src/frv/eabi.S new file mode 100644 index 000000000..379ea4bb0 --- /dev/null +++ b/libffi/src/frv/eabi.S @@ -0,0 +1,128 @@ +/* ----------------------------------------------------------------------- + eabi.S - Copyright (c) 2004 Anthony Green + + FR-V Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + .globl ffi_prep_args_EABI + + .text + .p2align 4 + .globl ffi_call_EABI + .type ffi_call_EABI, @function + + # gr8 : ffi_prep_args + # gr9 : &ecif + # gr10: cif->bytes + # gr11: fig->flags + # gr12: ecif.rvalue + # gr13: fn + +ffi_call_EABI: + addi sp, #-80, sp + sti fp, @(sp, #24) + addi sp, #24, fp + movsg lr, gr5 + + /* Make room for the new arguments. */ + /* subi sp, fp, gr10 */ + + /* Store return address and incoming args on stack. */ + sti gr5, @(fp, #8) + sti gr8, @(fp, #-4) + sti gr9, @(fp, #-8) + sti gr10, @(fp, #-12) + sti gr11, @(fp, #-16) + sti gr12, @(fp, #-20) + sti gr13, @(fp, #-24) + + sub sp, gr10, sp + + /* Call ffi_prep_args. */ + ldi @(fp, #-4), gr4 + addi sp, #0, gr8 + ldi @(fp, #-8), gr9 +#ifdef __FRV_FDPIC__ + ldd @(gr4, gr0), gr14 + calll @(gr14, gr0) +#else + calll @(gr4, gr0) +#endif + + /* ffi_prep_args returns the new stack pointer. */ + mov gr8, gr4 + + ldi @(sp, #0), gr8 + ldi @(sp, #4), gr9 + ldi @(sp, #8), gr10 + ldi @(sp, #12), gr11 + ldi @(sp, #16), gr12 + ldi @(sp, #20), gr13 + + /* Always copy the return value pointer into the hidden + parameter register. This is only strictly necessary + when we're returning an aggregate type, but it doesn't + hurt to do this all the time, and it saves a branch. */ + ldi @(fp, #-20), gr3 + + /* Use the ffi_prep_args return value for the new sp. */ + mov gr4, sp + + /* Call the target function. */ + ldi @(fp, -24), gr4 +#ifdef __FRV_FDPIC__ + ldd @(gr4, gr0), gr14 + calll @(gr14, gr0) +#else + calll @(gr4, gr0) +#endif + + /* Store the result. */ + ldi @(fp, #-16), gr10 /* fig->flags */ + ldi @(fp, #-20), gr4 /* ecif.rvalue */ + + /* Is the return value stored in two registers? */ + cmpi gr10, #8, icc0 + bne icc0, 0, .L2 + /* Yes, save them. */ + sti gr8, @(gr4, #0) + sti gr9, @(gr4, #4) + bra .L3 +.L2: + /* Is the return value a structure? */ + cmpi gr10, #-1, icc0 + beq icc0, 0, .L3 + /* No, save a 4 byte return value. */ + sti gr8, @(gr4, #0) +.L3: + + /* Restore the stack, and return. */ + ldi @(fp, 8), gr5 + ld @(fp, gr0), fp + addi sp,#80,sp + jmpl @(gr5,gr0) + .size ffi_call_EABI, .-ffi_call_EABI + diff --git a/libffi/src/frv/ffi.c b/libffi/src/frv/ffi.c new file mode 100644 index 000000000..5698c89c3 --- /dev/null +++ b/libffi/src/frv/ffi.c @@ -0,0 +1,292 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (C) 2004 Anthony Green + Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2008 Red Hat, Inc. + + FR-V Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +void *ffi_prep_args(char *stack, extended_cif *ecif) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + register int count = 0; + + p_argv = ecif->avalue; + argp = stack; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + (i != 0); + i--, p_arg++) + { + size_t z; + + z = (*p_arg)->size; + + if ((*p_arg)->type == FFI_TYPE_STRUCT) + { + z = sizeof(void*); + *(void **) argp = *p_argv; + } + /* if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + if (count > 24) + { + // This is going on the stack. Turn it into a double. + *(double *) argp = (double) *(float*)(* p_argv); + z = sizeof(double); + } + else + *(void **) argp = *(void **)(* p_argv); + } */ + else if (z < sizeof(int)) + { + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv); + break; + + default: + FFI_ASSERT(0); + } + } + else if (z == sizeof(int)) + { + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + } + else + { + memcpy(argp, *p_argv, z); + } + p_argv++; + argp += z; + count += z; + } + + return (stack + ((count > 24) ? 24 : ALIGN_DOWN(count, 8))); +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + if (cif->rtype->type == FFI_TYPE_STRUCT) + cif->flags = -1; + else + cif->flags = cif->rtype->size; + + cif->bytes = ALIGN (cif->bytes, 8); + + return FFI_OK; +} + +extern void ffi_call_EABI(void *(*)(char *, extended_cif *), + extended_cif *, + unsigned, unsigned, + unsigned *, + void (*fn)(void)); + +void ffi_call(ffi_cif *cif, + void (*fn)(void), + void *rvalue, + void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { + case FFI_EABI: + ffi_call_EABI(ffi_prep_args, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + break; + default: + FFI_ASSERT(0); + break; + } +} + +void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3, + unsigned arg4, unsigned arg5, unsigned arg6) +{ + /* This function is called by a trampoline. The trampoline stows a + pointer to the ffi_closure object in gr7. We must save this + pointer in a place that will persist while we do our work. */ + register ffi_closure *creg __asm__ ("gr7"); + ffi_closure *closure = creg; + + /* Arguments that don't fit in registers are found on the stack + at a fixed offset above the current frame pointer. */ + register char *frame_pointer __asm__ ("fp"); + char *stack_args = frame_pointer + 16; + + /* Lay the register arguments down in a continuous chunk of memory. */ + unsigned register_args[6] = + { arg1, arg2, arg3, arg4, arg5, arg6 }; + + ffi_cif *cif = closure->cif; + ffi_type **arg_types = cif->arg_types; + void **avalue = alloca (cif->nargs * sizeof(void *)); + char *ptr = (char *) register_args; + int i; + + /* Find the address of each argument. */ + for (i = 0; i < cif->nargs; i++) + { + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = ptr + 3; + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = ptr + 2; + break; + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_FLOAT: + avalue[i] = ptr; + break; + case FFI_TYPE_STRUCT: + avalue[i] = *(void**)ptr; + break; + default: + /* This is an 8-byte value. */ + avalue[i] = ptr; + ptr += 4; + break; + } + ptr += 4; + + /* If we've handled more arguments than fit in registers, + start looking at the those passed on the stack. */ + if (ptr == ((char *)register_args + (6*4))) + ptr = stack_args; + } + + /* Invoke the closure. */ + if (cif->rtype->type == FFI_TYPE_STRUCT) + { + /* The caller allocates space for the return structure, and + passes a pointer to this space in gr3. Use this value directly + as the return value. */ + register void *return_struct_ptr __asm__("gr3"); + (closure->fun) (cif, return_struct_ptr, avalue, closure->user_data); + } + else + { + /* Allocate space for the return value and call the function. */ + long long rvalue; + (closure->fun) (cif, &rvalue, avalue, closure->user_data); + + /* Functions return 4-byte or smaller results in gr8. 8-byte + values also use gr9. We fill the both, even for small return + values, just to avoid a branch. */ + asm ("ldi @(%0, #0), gr8" : : "r" (&rvalue)); + asm ("ldi @(%0, #0), gr9" : : "r" (&((int *) &rvalue)[1])); + } +} + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp = (unsigned int *) &closure->tramp[0]; + unsigned long fn = (long) ffi_closure_eabi; + unsigned long cls = (long) codeloc; +#ifdef __FRV_FDPIC__ + register void *got __asm__("gr15"); +#endif + int i; + + fn = (unsigned long) ffi_closure_eabi; + +#ifdef __FRV_FDPIC__ + tramp[0] = &((unsigned int *)codeloc)[2]; + tramp[1] = got; + tramp[2] = 0x8cfc0000 + (fn & 0xffff); /* setlos lo(fn), gr6 */ + tramp[3] = 0x8efc0000 + (cls & 0xffff); /* setlos lo(cls), gr7 */ + tramp[4] = 0x8cf80000 + (fn >> 16); /* sethi hi(fn), gr6 */ + tramp[5] = 0x8ef80000 + (cls >> 16); /* sethi hi(cls), gr7 */ + tramp[6] = 0x9cc86000; /* ldi @(gr6, #0), gr14 */ + tramp[7] = 0x8030e000; /* jmpl @(gr14, gr0) */ +#else + tramp[0] = 0x8cfc0000 + (fn & 0xffff); /* setlos lo(fn), gr6 */ + tramp[1] = 0x8efc0000 + (cls & 0xffff); /* setlos lo(cls), gr7 */ + tramp[2] = 0x8cf80000 + (fn >> 16); /* sethi hi(fn), gr6 */ + tramp[3] = 0x8ef80000 + (cls >> 16); /* sethi hi(cls), gr7 */ + tramp[4] = 0x80300006; /* jmpl @(gr0, gr6) */ +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Cache flushing. */ + for (i = 0; i < FFI_TRAMPOLINE_SIZE; i++) + __asm__ volatile ("dcf @(%0,%1)\n\tici @(%2,%1)" :: "r" (tramp), "r" (i), + "r" (codeloc)); + + return FFI_OK; +} diff --git a/libffi/src/frv/ffitarget.h b/libffi/src/frv/ffitarget.h new file mode 100644 index 000000000..1c319ea94 --- /dev/null +++ b/libffi/src/frv/ffitarget.h @@ -0,0 +1,61 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2004 Red Hat, Inc. + Target configuration macros for FR-V + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- System specific configurations ----------------------------------- */ + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + +#ifdef FRV + FFI_EABI, + FFI_DEFAULT_ABI = FFI_EABI, +#endif + + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 + +#ifdef __FRV_FDPIC__ +/* Trampolines are 8 4-byte instructions long. */ +#define FFI_TRAMPOLINE_SIZE (8*4) +#else +/* Trampolines are 5 4-byte instructions long. */ +#define FFI_TRAMPOLINE_SIZE (5*4) +#endif + +#endif diff --git a/libffi/src/ia64/ffi.c b/libffi/src/ia64/ffi.c new file mode 100644 index 000000000..84b144868 --- /dev/null +++ b/libffi/src/ia64/ffi.c @@ -0,0 +1,580 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1998, 2007, 2008 Red Hat, Inc. + Copyright (c) 2000 Hewlett Packard Company + + IA64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdbool.h> +#include <float.h> + +#include "ia64_flags.h" + +/* A 64-bit pointer value. In LP64 mode, this is effectively a plain + pointer. In ILP32 mode, it's a pointer that's been extended to + 64 bits by "addp4". */ +typedef void *PTR64 __attribute__((mode(DI))); + +/* Memory image of fp register contents. This is the implementation + specific format used by ldf.fill/stf.spill. All we care about is + that it wants a 16 byte aligned slot. */ +typedef struct +{ + UINT64 x[2] __attribute__((aligned(16))); +} fpreg; + + +/* The stack layout given to ffi_call_unix and ffi_closure_unix_inner. */ + +struct ia64_args +{ + fpreg fp_regs[8]; /* Contents of 8 fp arg registers. */ + UINT64 gp_regs[8]; /* Contents of 8 gp arg registers. */ + UINT64 other_args[]; /* Arguments passed on stack, variable size. */ +}; + + +/* Adjust ADDR, a pointer to an 8 byte slot, to point to the low LEN bytes. */ + +static inline void * +endian_adjust (void *addr, size_t len) +{ +#ifdef __BIG_ENDIAN__ + return addr + (8 - len); +#else + return addr; +#endif +} + +/* Store VALUE to ADDR in the current cpu implementation's fp spill format. + This is a macro instead of a function, so that it works for all 3 floating + point types without type conversions. Type conversion to long double breaks + the denorm support. */ + +#define stf_spill(addr, value) \ + asm ("stf.spill %0 = %1%P0" : "=m" (*addr) : "f"(value)); + +/* Load a value from ADDR, which is in the current cpu implementation's + fp spill format. As above, this must also be a macro. */ + +#define ldf_fill(result, addr) \ + asm ("ldf.fill %0 = %1%P1" : "=f"(result) : "m"(*addr)); + +/* Return the size of the C type associated with with TYPE. Which will + be one of the FFI_IA64_TYPE_HFA_* values. */ + +static size_t +hfa_type_size (int type) +{ + switch (type) + { + case FFI_IA64_TYPE_HFA_FLOAT: + return sizeof(float); + case FFI_IA64_TYPE_HFA_DOUBLE: + return sizeof(double); + case FFI_IA64_TYPE_HFA_LDOUBLE: + return sizeof(__float80); + default: + abort (); + } +} + +/* Load from ADDR a value indicated by TYPE. Which will be one of + the FFI_IA64_TYPE_HFA_* values. */ + +static void +hfa_type_load (fpreg *fpaddr, int type, void *addr) +{ + switch (type) + { + case FFI_IA64_TYPE_HFA_FLOAT: + stf_spill (fpaddr, *(float *) addr); + return; + case FFI_IA64_TYPE_HFA_DOUBLE: + stf_spill (fpaddr, *(double *) addr); + return; + case FFI_IA64_TYPE_HFA_LDOUBLE: + stf_spill (fpaddr, *(__float80 *) addr); + return; + default: + abort (); + } +} + +/* Load VALUE into ADDR as indicated by TYPE. Which will be one of + the FFI_IA64_TYPE_HFA_* values. */ + +static void +hfa_type_store (int type, void *addr, fpreg *fpaddr) +{ + switch (type) + { + case FFI_IA64_TYPE_HFA_FLOAT: + { + float result; + ldf_fill (result, fpaddr); + *(float *) addr = result; + break; + } + case FFI_IA64_TYPE_HFA_DOUBLE: + { + double result; + ldf_fill (result, fpaddr); + *(double *) addr = result; + break; + } + case FFI_IA64_TYPE_HFA_LDOUBLE: + { + __float80 result; + ldf_fill (result, fpaddr); + *(__float80 *) addr = result; + break; + } + default: + abort (); + } +} + +/* Is TYPE a struct containing floats, doubles, or extended doubles, + all of the same fp type? If so, return the element type. Return + FFI_TYPE_VOID if not. */ + +static int +hfa_element_type (ffi_type *type, int nested) +{ + int element = FFI_TYPE_VOID; + + switch (type->type) + { + case FFI_TYPE_FLOAT: + /* We want to return VOID for raw floating-point types, but the + synthetic HFA type if we're nested within an aggregate. */ + if (nested) + element = FFI_IA64_TYPE_HFA_FLOAT; + break; + + case FFI_TYPE_DOUBLE: + /* Similarly. */ + if (nested) + element = FFI_IA64_TYPE_HFA_DOUBLE; + break; + + case FFI_TYPE_LONGDOUBLE: + /* Similarly, except that that HFA is true for double extended, + but not quad precision. Both have sizeof == 16, so tell the + difference based on the precision. */ + if (LDBL_MANT_DIG == 64 && nested) + element = FFI_IA64_TYPE_HFA_LDOUBLE; + break; + + case FFI_TYPE_STRUCT: + { + ffi_type **ptr = &type->elements[0]; + + for (ptr = &type->elements[0]; *ptr ; ptr++) + { + int sub_element = hfa_element_type (*ptr, 1); + if (sub_element == FFI_TYPE_VOID) + return FFI_TYPE_VOID; + + if (element == FFI_TYPE_VOID) + element = sub_element; + else if (element != sub_element) + return FFI_TYPE_VOID; + } + } + break; + + default: + return FFI_TYPE_VOID; + } + + return element; +} + + +/* Perform machine dependent cif processing. */ + +ffi_status +ffi_prep_cif_machdep(ffi_cif *cif) +{ + int flags; + + /* Adjust cif->bytes to include space for the bits of the ia64_args frame + that precedes the integer register portion. The estimate that the + generic bits did for the argument space required is good enough for the + integer component. */ + cif->bytes += offsetof(struct ia64_args, gp_regs[0]); + if (cif->bytes < sizeof(struct ia64_args)) + cif->bytes = sizeof(struct ia64_args); + + /* Set the return type flag. */ + flags = cif->rtype->type; + switch (cif->rtype->type) + { + case FFI_TYPE_LONGDOUBLE: + /* Leave FFI_TYPE_LONGDOUBLE as meaning double extended precision, + and encode quad precision as a two-word integer structure. */ + if (LDBL_MANT_DIG != 64) + flags = FFI_IA64_TYPE_SMALL_STRUCT | (16 << 8); + break; + + case FFI_TYPE_STRUCT: + { + size_t size = cif->rtype->size; + int hfa_type = hfa_element_type (cif->rtype, 0); + + if (hfa_type != FFI_TYPE_VOID) + { + size_t nelts = size / hfa_type_size (hfa_type); + if (nelts <= 8) + flags = hfa_type | (size << 8); + } + else + { + if (size <= 32) + flags = FFI_IA64_TYPE_SMALL_STRUCT | (size << 8); + } + } + break; + + default: + break; + } + cif->flags = flags; + + return FFI_OK; +} + +extern int ffi_call_unix (struct ia64_args *, PTR64, void (*)(void), UINT64); + +void +ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + struct ia64_args *stack; + long i, avn, gpcount, fpcount; + ffi_type **p_arg; + + FFI_ASSERT (cif->abi == FFI_UNIX); + + /* If we have no spot for a return value, make one. */ + if (rvalue == NULL && cif->rtype->type != FFI_TYPE_VOID) + rvalue = alloca (cif->rtype->size); + + /* Allocate the stack frame. */ + stack = alloca (cif->bytes); + + gpcount = fpcount = 0; + avn = cif->nargs; + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + stack->gp_regs[gpcount++] = *(SINT8 *)avalue[i]; + break; + case FFI_TYPE_UINT8: + stack->gp_regs[gpcount++] = *(UINT8 *)avalue[i]; + break; + case FFI_TYPE_SINT16: + stack->gp_regs[gpcount++] = *(SINT16 *)avalue[i]; + break; + case FFI_TYPE_UINT16: + stack->gp_regs[gpcount++] = *(UINT16 *)avalue[i]; + break; + case FFI_TYPE_SINT32: + stack->gp_regs[gpcount++] = *(SINT32 *)avalue[i]; + break; + case FFI_TYPE_UINT32: + stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i]; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i]; + break; + + case FFI_TYPE_POINTER: + stack->gp_regs[gpcount++] = (UINT64)(PTR64) *(void **)avalue[i]; + break; + + case FFI_TYPE_FLOAT: + if (gpcount < 8 && fpcount < 8) + stf_spill (&stack->fp_regs[fpcount++], *(float *)avalue[i]); + stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i]; + break; + + case FFI_TYPE_DOUBLE: + if (gpcount < 8 && fpcount < 8) + stf_spill (&stack->fp_regs[fpcount++], *(double *)avalue[i]); + stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i]; + break; + + case FFI_TYPE_LONGDOUBLE: + if (gpcount & 1) + gpcount++; + if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8) + stf_spill (&stack->fp_regs[fpcount++], *(__float80 *)avalue[i]); + memcpy (&stack->gp_regs[gpcount], avalue[i], 16); + gpcount += 2; + break; + + case FFI_TYPE_STRUCT: + { + size_t size = (*p_arg)->size; + size_t align = (*p_arg)->alignment; + int hfa_type = hfa_element_type (*p_arg, 0); + + FFI_ASSERT (align <= 16); + if (align == 16 && (gpcount & 1)) + gpcount++; + + if (hfa_type != FFI_TYPE_VOID) + { + size_t hfa_size = hfa_type_size (hfa_type); + size_t offset = 0; + size_t gp_offset = gpcount * 8; + + while (fpcount < 8 + && offset < size + && gp_offset < 8 * 8) + { + hfa_type_load (&stack->fp_regs[fpcount], hfa_type, + avalue[i] + offset); + offset += hfa_size; + gp_offset += hfa_size; + fpcount += 1; + } + } + + memcpy (&stack->gp_regs[gpcount], avalue[i], size); + gpcount += (size + 7) / 8; + } + break; + + default: + abort (); + } + } + + ffi_call_unix (stack, rvalue, fn, cif->flags); +} + +/* Closures represent a pair consisting of a function pointer, and + some user data. A closure is invoked by reinterpreting the closure + as a function pointer, and branching to it. Thus we can make an + interpreted function callable as a C function: We turn the + interpreter itself, together with a pointer specifying the + interpreted procedure, into a closure. + + For IA64, function pointer are already pairs consisting of a code + pointer, and a gp pointer. The latter is needed to access global + variables. Here we set up such a pair as the first two words of + the closure (in the "trampoline" area), but we replace the gp + pointer with a pointer to the closure itself. We also add the real + gp pointer to the closure. This allows the function entry code to + both retrieve the user data, and to restire the correct gp pointer. */ + +extern void ffi_closure_unix (); + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + /* The layout of a function descriptor. A C function pointer really + points to one of these. */ + struct ia64_fd + { + UINT64 code_pointer; + UINT64 gp; + }; + + struct ffi_ia64_trampoline_struct + { + UINT64 code_pointer; /* Pointer to ffi_closure_unix. */ + UINT64 fake_gp; /* Pointer to closure, installed as gp. */ + UINT64 real_gp; /* Real gp value. */ + }; + + struct ffi_ia64_trampoline_struct *tramp; + struct ia64_fd *fd; + + FFI_ASSERT (cif->abi == FFI_UNIX); + + tramp = (struct ffi_ia64_trampoline_struct *)closure->tramp; + fd = (struct ia64_fd *)(void *)ffi_closure_unix; + + tramp->code_pointer = fd->code_pointer; + tramp->real_gp = fd->gp; + tramp->fake_gp = (UINT64)(PTR64)codeloc; + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + + +UINT64 +ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack, + void *rvalue, void *r8) +{ + ffi_cif *cif; + void **avalue; + ffi_type **p_arg; + long i, avn, gpcount, fpcount; + + cif = closure->cif; + avn = cif->nargs; + avalue = alloca (avn * sizeof (void *)); + + /* If the structure return value is passed in memory get that location + from r8 so as to pass the value directly back to the caller. */ + if (cif->flags == FFI_TYPE_STRUCT) + rvalue = r8; + + gpcount = fpcount = 0; + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 1); + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 2); + break; + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 4); + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + avalue[i] = &stack->gp_regs[gpcount++]; + break; + case FFI_TYPE_POINTER: + avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], sizeof(void*)); + break; + + case FFI_TYPE_FLOAT: + if (gpcount < 8 && fpcount < 8) + { + fpreg *addr = &stack->fp_regs[fpcount++]; + float result; + avalue[i] = addr; + ldf_fill (result, addr); + *(float *)addr = result; + } + else + avalue[i] = endian_adjust(&stack->gp_regs[gpcount], 4); + gpcount++; + break; + + case FFI_TYPE_DOUBLE: + if (gpcount < 8 && fpcount < 8) + { + fpreg *addr = &stack->fp_regs[fpcount++]; + double result; + avalue[i] = addr; + ldf_fill (result, addr); + *(double *)addr = result; + } + else + avalue[i] = &stack->gp_regs[gpcount]; + gpcount++; + break; + + case FFI_TYPE_LONGDOUBLE: + if (gpcount & 1) + gpcount++; + if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8) + { + fpreg *addr = &stack->fp_regs[fpcount++]; + __float80 result; + avalue[i] = addr; + ldf_fill (result, addr); + *(__float80 *)addr = result; + } + else + avalue[i] = &stack->gp_regs[gpcount]; + gpcount += 2; + break; + + case FFI_TYPE_STRUCT: + { + size_t size = (*p_arg)->size; + size_t align = (*p_arg)->alignment; + int hfa_type = hfa_element_type (*p_arg, 0); + + FFI_ASSERT (align <= 16); + if (align == 16 && (gpcount & 1)) + gpcount++; + + if (hfa_type != FFI_TYPE_VOID) + { + size_t hfa_size = hfa_type_size (hfa_type); + size_t offset = 0; + size_t gp_offset = gpcount * 8; + void *addr = alloca (size); + + avalue[i] = addr; + + while (fpcount < 8 + && offset < size + && gp_offset < 8 * 8) + { + hfa_type_store (hfa_type, addr + offset, + &stack->fp_regs[fpcount]); + offset += hfa_size; + gp_offset += hfa_size; + fpcount += 1; + } + + if (offset < size) + memcpy (addr + offset, (char *)stack->gp_regs + gp_offset, + size - offset); + } + else + avalue[i] = &stack->gp_regs[gpcount]; + + gpcount += (size + 7) / 8; + } + break; + + default: + abort (); + } + } + + closure->fun (cif, rvalue, avalue, closure->user_data); + + return cif->flags; +} diff --git a/libffi/src/ia64/ffitarget.h b/libffi/src/ia64/ffitarget.h new file mode 100644 index 000000000..d85c049ba --- /dev/null +++ b/libffi/src/ia64/ffitarget.h @@ -0,0 +1,50 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for IA-64. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long long ffi_arg; +typedef signed long long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_UNIX, /* Linux and all Unix variants use the same conventions */ + FFI_DEFAULT_ABI = FFI_UNIX, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 24 /* Really the following struct, which */ + /* can be interpreted as a C function */ + /* descriptor: */ + +#endif + diff --git a/libffi/src/ia64/ia64_flags.h b/libffi/src/ia64/ia64_flags.h new file mode 100644 index 000000000..9d652cef1 --- /dev/null +++ b/libffi/src/ia64/ia64_flags.h @@ -0,0 +1,40 @@ +/* ----------------------------------------------------------------------- + ia64_flags.h - Copyright (c) 2000 Hewlett Packard Company + + IA64/unix Foreign Function Interface + + Original author: Hans Boehm, HP Labs + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +/* "Type" codes used between assembly and C. When used as a part of + a cfi->flags value, the low byte will be these extra type codes, + and bits 8-31 will be the actual size of the type. */ + +/* Small structures containing N words in integer registers. */ +#define FFI_IA64_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 1) + +/* Homogeneous Floating Point Aggregates (HFAs) which are returned + in FP registers. */ +#define FFI_IA64_TYPE_HFA_FLOAT (FFI_TYPE_LAST + 2) +#define FFI_IA64_TYPE_HFA_DOUBLE (FFI_TYPE_LAST + 3) +#define FFI_IA64_TYPE_HFA_LDOUBLE (FFI_TYPE_LAST + 4) diff --git a/libffi/src/ia64/unix.S b/libffi/src/ia64/unix.S new file mode 100644 index 000000000..4d2a86d42 --- /dev/null +++ b/libffi/src/ia64/unix.S @@ -0,0 +1,560 @@ +/* ----------------------------------------------------------------------- + unix.S - Copyright (c) 1998, 2008 Red Hat, Inc. + Copyright (c) 2000 Hewlett Packard Company + + IA64/unix Foreign Function Interface + + Primary author: Hans Boehm, HP Labs + + Loosely modeled on Cygnus code for other platforms. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include "ia64_flags.h" + + .pred.safe_across_calls p1-p5,p16-p63 +.text + +/* int ffi_call_unix (struct ia64_args *stack, PTR64 rvalue, + void (*fn)(void), int flags); + */ + + .align 16 + .global ffi_call_unix + .proc ffi_call_unix +ffi_call_unix: + .prologue + /* Bit o trickiness. We actually share a stack frame with ffi_call. + Rely on the fact that ffi_call uses a vframe and don't bother + tracking one here at all. */ + .fframe 0 + .save ar.pfs, r36 // loc0 + alloc loc0 = ar.pfs, 4, 3, 8, 0 + .save rp, loc1 + mov loc1 = b0 + .body + add r16 = 16, in0 + mov loc2 = gp + mov r8 = in1 + ;; + + /* Load up all of the argument registers. */ + ldf.fill f8 = [in0], 32 + ldf.fill f9 = [r16], 32 + ;; + ldf.fill f10 = [in0], 32 + ldf.fill f11 = [r16], 32 + ;; + ldf.fill f12 = [in0], 32 + ldf.fill f13 = [r16], 32 + ;; + ldf.fill f14 = [in0], 32 + ldf.fill f15 = [r16], 24 + ;; + ld8 out0 = [in0], 16 + ld8 out1 = [r16], 16 + ;; + ld8 out2 = [in0], 16 + ld8 out3 = [r16], 16 + ;; + ld8 out4 = [in0], 16 + ld8 out5 = [r16], 16 + ;; + ld8 out6 = [in0] + ld8 out7 = [r16] + ;; + + /* Deallocate the register save area from the stack frame. */ + mov sp = in0 + + /* Call the target function. */ + ld8 r16 = [in2], 8 + ;; + ld8 gp = [in2] + mov b6 = r16 + br.call.sptk.many b0 = b6 + ;; + + /* Dispatch to handle return value. */ + mov gp = loc2 + zxt1 r16 = in3 + ;; + mov ar.pfs = loc0 + addl r18 = @ltoffx(.Lst_table), gp + ;; + ld8.mov r18 = [r18], .Lst_table + mov b0 = loc1 + ;; + shladd r18 = r16, 3, r18 + ;; + ld8 r17 = [r18] + shr in3 = in3, 8 + ;; + add r17 = r17, r18 + ;; + mov b6 = r17 + br b6 + ;; + +.Lst_void: + br.ret.sptk.many b0 + ;; +.Lst_uint8: + zxt1 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_sint8: + sxt1 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_uint16: + zxt2 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_sint16: + sxt2 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_uint32: + zxt4 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_sint32: + sxt4 r8 = r8 + ;; + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_int64: + st8 [in1] = r8 + br.ret.sptk.many b0 + ;; +.Lst_float: + stfs [in1] = f8 + br.ret.sptk.many b0 + ;; +.Lst_double: + stfd [in1] = f8 + br.ret.sptk.many b0 + ;; +.Lst_ldouble: + stfe [in1] = f8 + br.ret.sptk.many b0 + ;; + +.Lst_small_struct: + add sp = -16, sp + cmp.lt p6, p0 = 8, in3 + cmp.lt p7, p0 = 16, in3 + cmp.lt p8, p0 = 24, in3 + ;; + add r16 = 8, sp + add r17 = 16, sp + add r18 = 24, sp + ;; + st8 [sp] = r8 +(p6) st8 [r16] = r9 + mov out0 = in1 +(p7) st8 [r17] = r10 +(p8) st8 [r18] = r11 + mov out1 = sp + mov out2 = in3 + br.call.sptk.many b0 = memcpy# + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov gp = loc2 + br.ret.sptk.many b0 + +.Lst_hfa_float: + add r16 = 4, in1 + cmp.lt p6, p0 = 4, in3 + ;; + stfs [in1] = f8, 8 +(p6) stfs [r16] = f9, 8 + cmp.lt p7, p0 = 8, in3 + cmp.lt p8, p0 = 12, in3 + ;; +(p7) stfs [in1] = f10, 8 +(p8) stfs [r16] = f11, 8 + cmp.lt p9, p0 = 16, in3 + cmp.lt p10, p0 = 20, in3 + ;; +(p9) stfs [in1] = f12, 8 +(p10) stfs [r16] = f13, 8 + cmp.lt p6, p0 = 24, in3 + cmp.lt p7, p0 = 28, in3 + ;; +(p6) stfs [in1] = f14 +(p7) stfs [r16] = f15 + br.ret.sptk.many b0 + ;; + +.Lst_hfa_double: + add r16 = 8, in1 + cmp.lt p6, p0 = 8, in3 + ;; + stfd [in1] = f8, 16 +(p6) stfd [r16] = f9, 16 + cmp.lt p7, p0 = 16, in3 + cmp.lt p8, p0 = 24, in3 + ;; +(p7) stfd [in1] = f10, 16 +(p8) stfd [r16] = f11, 16 + cmp.lt p9, p0 = 32, in3 + cmp.lt p10, p0 = 40, in3 + ;; +(p9) stfd [in1] = f12, 16 +(p10) stfd [r16] = f13, 16 + cmp.lt p6, p0 = 48, in3 + cmp.lt p7, p0 = 56, in3 + ;; +(p6) stfd [in1] = f14 +(p7) stfd [r16] = f15 + br.ret.sptk.many b0 + ;; + +.Lst_hfa_ldouble: + add r16 = 16, in1 + cmp.lt p6, p0 = 16, in3 + ;; + stfe [in1] = f8, 32 +(p6) stfe [r16] = f9, 32 + cmp.lt p7, p0 = 32, in3 + cmp.lt p8, p0 = 48, in3 + ;; +(p7) stfe [in1] = f10, 32 +(p8) stfe [r16] = f11, 32 + cmp.lt p9, p0 = 64, in3 + cmp.lt p10, p0 = 80, in3 + ;; +(p9) stfe [in1] = f12, 32 +(p10) stfe [r16] = f13, 32 + cmp.lt p6, p0 = 96, in3 + cmp.lt p7, p0 = 112, in3 + ;; +(p6) stfe [in1] = f14 +(p7) stfe [r16] = f15 + br.ret.sptk.many b0 + ;; + + .endp ffi_call_unix + + .align 16 + .global ffi_closure_unix + .proc ffi_closure_unix + +#define FRAME_SIZE (8*16 + 8*8 + 8*16) + +ffi_closure_unix: + .prologue + .save ar.pfs, r40 // loc0 + alloc loc0 = ar.pfs, 8, 4, 4, 0 + .fframe FRAME_SIZE + add r12 = -FRAME_SIZE, r12 + .save rp, loc1 + mov loc1 = b0 + .save ar.unat, loc2 + mov loc2 = ar.unat + .body + + /* Retrieve closure pointer and real gp. */ +#ifdef _ILP32 + addp4 out0 = 0, gp + addp4 gp = 16, gp +#else + mov out0 = gp + add gp = 16, gp +#endif + ;; + ld8 gp = [gp] + + /* Spill all of the possible argument registers. */ + add r16 = 16 + 8*16, sp + add r17 = 16 + 8*16 + 16, sp + ;; + stf.spill [r16] = f8, 32 + stf.spill [r17] = f9, 32 + mov loc3 = gp + ;; + stf.spill [r16] = f10, 32 + stf.spill [r17] = f11, 32 + ;; + stf.spill [r16] = f12, 32 + stf.spill [r17] = f13, 32 + ;; + stf.spill [r16] = f14, 32 + stf.spill [r17] = f15, 24 + ;; + .mem.offset 0, 0 + st8.spill [r16] = in0, 16 + .mem.offset 8, 0 + st8.spill [r17] = in1, 16 + add out1 = 16 + 8*16, sp + ;; + .mem.offset 0, 0 + st8.spill [r16] = in2, 16 + .mem.offset 8, 0 + st8.spill [r17] = in3, 16 + add out2 = 16, sp + ;; + .mem.offset 0, 0 + st8.spill [r16] = in4, 16 + .mem.offset 8, 0 + st8.spill [r17] = in5, 16 + mov out3 = r8 + ;; + .mem.offset 0, 0 + st8.spill [r16] = in6 + .mem.offset 8, 0 + st8.spill [r17] = in7 + + /* Invoke ffi_closure_unix_inner for the hard work. */ + br.call.sptk.many b0 = ffi_closure_unix_inner + ;; + + /* Dispatch to handle return value. */ + mov gp = loc3 + zxt1 r16 = r8 + ;; + addl r18 = @ltoffx(.Lld_table), gp + mov ar.pfs = loc0 + ;; + ld8.mov r18 = [r18], .Lld_table + mov b0 = loc1 + ;; + shladd r18 = r16, 3, r18 + mov ar.unat = loc2 + ;; + ld8 r17 = [r18] + shr r8 = r8, 8 + ;; + add r17 = r17, r18 + add r16 = 16, sp + ;; + mov b6 = r17 + br b6 + ;; + .label_state 1 + +.Lld_void: + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_int: + .body + .copy_state 1 + ld8 r8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_float: + .body + .copy_state 1 + ldfs f8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_double: + .body + .copy_state 1 + ldfd f8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; +.Lld_ldouble: + .body + .copy_state 1 + ldfe f8 = [r16] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; + +.Lld_small_struct: + .body + .copy_state 1 + add r17 = 8, r16 + cmp.lt p6, p0 = 8, r8 + cmp.lt p7, p0 = 16, r8 + cmp.lt p8, p0 = 24, r8 + ;; + ld8 r8 = [r16], 16 +(p6) ld8 r9 = [r17], 16 + ;; +(p7) ld8 r10 = [r16] +(p8) ld8 r11 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; + +.Lld_hfa_float: + .body + .copy_state 1 + add r17 = 4, r16 + cmp.lt p6, p0 = 4, r8 + ;; + ldfs f8 = [r16], 8 +(p6) ldfs f9 = [r17], 8 + cmp.lt p7, p0 = 8, r8 + cmp.lt p8, p0 = 12, r8 + ;; +(p7) ldfs f10 = [r16], 8 +(p8) ldfs f11 = [r17], 8 + cmp.lt p9, p0 = 16, r8 + cmp.lt p10, p0 = 20, r8 + ;; +(p9) ldfs f12 = [r16], 8 +(p10) ldfs f13 = [r17], 8 + cmp.lt p6, p0 = 24, r8 + cmp.lt p7, p0 = 28, r8 + ;; +(p6) ldfs f14 = [r16] +(p7) ldfs f15 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; + +.Lld_hfa_double: + .body + .copy_state 1 + add r17 = 8, r16 + cmp.lt p6, p0 = 8, r8 + ;; + ldfd f8 = [r16], 16 +(p6) ldfd f9 = [r17], 16 + cmp.lt p7, p0 = 16, r8 + cmp.lt p8, p0 = 24, r8 + ;; +(p7) ldfd f10 = [r16], 16 +(p8) ldfd f11 = [r17], 16 + cmp.lt p9, p0 = 32, r8 + cmp.lt p10, p0 = 40, r8 + ;; +(p9) ldfd f12 = [r16], 16 +(p10) ldfd f13 = [r17], 16 + cmp.lt p6, p0 = 48, r8 + cmp.lt p7, p0 = 56, r8 + ;; +(p6) ldfd f14 = [r16] +(p7) ldfd f15 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; + +.Lld_hfa_ldouble: + .body + .copy_state 1 + add r17 = 16, r16 + cmp.lt p6, p0 = 16, r8 + ;; + ldfe f8 = [r16], 32 +(p6) ldfe f9 = [r17], 32 + cmp.lt p7, p0 = 32, r8 + cmp.lt p8, p0 = 48, r8 + ;; +(p7) ldfe f10 = [r16], 32 +(p8) ldfe f11 = [r17], 32 + cmp.lt p9, p0 = 64, r8 + cmp.lt p10, p0 = 80, r8 + ;; +(p9) ldfe f12 = [r16], 32 +(p10) ldfe f13 = [r17], 32 + cmp.lt p6, p0 = 96, r8 + cmp.lt p7, p0 = 112, r8 + ;; +(p6) ldfe f14 = [r16] +(p7) ldfe f15 = [r17] + .restore sp + add sp = FRAME_SIZE, sp + br.ret.sptk.many b0 + ;; + + .endp ffi_closure_unix + + .section .rodata + .align 8 +.Lst_table: + data8 @pcrel(.Lst_void) // FFI_TYPE_VOID + data8 @pcrel(.Lst_sint32) // FFI_TYPE_INT + data8 @pcrel(.Lst_float) // FFI_TYPE_FLOAT + data8 @pcrel(.Lst_double) // FFI_TYPE_DOUBLE + data8 @pcrel(.Lst_ldouble) // FFI_TYPE_LONGDOUBLE + data8 @pcrel(.Lst_uint8) // FFI_TYPE_UINT8 + data8 @pcrel(.Lst_sint8) // FFI_TYPE_SINT8 + data8 @pcrel(.Lst_uint16) // FFI_TYPE_UINT16 + data8 @pcrel(.Lst_sint16) // FFI_TYPE_SINT16 + data8 @pcrel(.Lst_uint32) // FFI_TYPE_UINT32 + data8 @pcrel(.Lst_sint32) // FFI_TYPE_SINT32 + data8 @pcrel(.Lst_int64) // FFI_TYPE_UINT64 + data8 @pcrel(.Lst_int64) // FFI_TYPE_SINT64 + data8 @pcrel(.Lst_void) // FFI_TYPE_STRUCT + data8 @pcrel(.Lst_int64) // FFI_TYPE_POINTER + data8 @pcrel(.Lst_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT + data8 @pcrel(.Lst_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT + data8 @pcrel(.Lst_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE + data8 @pcrel(.Lst_hfa_ldouble) // FFI_IA64_TYPE_HFA_LDOUBLE + +.Lld_table: + data8 @pcrel(.Lld_void) // FFI_TYPE_VOID + data8 @pcrel(.Lld_int) // FFI_TYPE_INT + data8 @pcrel(.Lld_float) // FFI_TYPE_FLOAT + data8 @pcrel(.Lld_double) // FFI_TYPE_DOUBLE + data8 @pcrel(.Lld_ldouble) // FFI_TYPE_LONGDOUBLE + data8 @pcrel(.Lld_int) // FFI_TYPE_UINT8 + data8 @pcrel(.Lld_int) // FFI_TYPE_SINT8 + data8 @pcrel(.Lld_int) // FFI_TYPE_UINT16 + data8 @pcrel(.Lld_int) // FFI_TYPE_SINT16 + data8 @pcrel(.Lld_int) // FFI_TYPE_UINT32 + data8 @pcrel(.Lld_int) // FFI_TYPE_SINT32 + data8 @pcrel(.Lld_int) // FFI_TYPE_UINT64 + data8 @pcrel(.Lld_int) // FFI_TYPE_SINT64 + data8 @pcrel(.Lld_void) // FFI_TYPE_STRUCT + data8 @pcrel(.Lld_int) // FFI_TYPE_POINTER + data8 @pcrel(.Lld_small_struct) // FFI_IA64_TYPE_SMALL_STRUCT + data8 @pcrel(.Lld_hfa_float) // FFI_IA64_TYPE_HFA_FLOAT + data8 @pcrel(.Lld_hfa_double) // FFI_IA64_TYPE_HFA_DOUBLE + data8 @pcrel(.Lld_hfa_ldouble) // FFI_IA64_TYPE_HFA_LDOUBLE + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/java_raw_api.c b/libffi/src/java_raw_api.c new file mode 100644 index 000000000..9c5383e6d --- /dev/null +++ b/libffi/src/java_raw_api.c @@ -0,0 +1,356 @@ +/* ----------------------------------------------------------------------- + java_raw_api.c - Copyright (c) 1999, 2007, 2008 Red Hat, Inc. + + Cloned from raw_api.c + + Raw_api.c author: Kresten Krab Thorup <krab@gnu.org> + Java_raw_api.c author: Hans-J. Boehm <hboehm@hpl.hp.com> + + $Id $ + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +/* This defines a Java- and 64-bit specific variant of the raw API. */ +/* It assumes that "raw" argument blocks look like Java stacks on a */ +/* 64-bit machine. Arguments that can be stored in a single stack */ +/* stack slots (longs, doubles) occupy 128 bits, but only the first */ +/* 64 bits are actually used. */ + +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> + +#if !defined(NO_JAVA_RAW_API) && !defined(FFI_NO_RAW_API) + +size_t +ffi_java_raw_size (ffi_cif *cif) +{ + size_t result = 0; + int i; + + ffi_type **at = cif->arg_types; + + for (i = cif->nargs-1; i >= 0; i--, at++) + { + switch((*at) -> type) { + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_DOUBLE: + result += 2 * FFI_SIZEOF_JAVA_RAW; + break; + case FFI_TYPE_STRUCT: + /* No structure parameters in Java. */ + abort(); + default: + result += FFI_SIZEOF_JAVA_RAW; + } + } + + return result; +} + + +void +ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args) +{ + unsigned i; + ffi_type **tp = cif->arg_types; + +#if WORDS_BIGENDIAN + + for (i = 0; i < cif->nargs; i++, tp++, args++) + { + switch ((*tp)->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + *args = (void*) ((char*)(raw++) + 3); + break; + + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + *args = (void*) ((char*)(raw++) + 2); + break; + +#if FFI_SIZEOF_JAVA_RAW == 8 + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_DOUBLE: + *args = (void *)raw; + raw += 2; + break; +#endif + + case FFI_TYPE_POINTER: + *args = (void*) &(raw++)->ptr; + break; + + default: + *args = raw; + raw += + ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw); + } + } + +#else /* WORDS_BIGENDIAN */ + +#if !PDP + + /* then assume little endian */ + for (i = 0; i < cif->nargs; i++, tp++, args++) + { +#if FFI_SIZEOF_JAVA_RAW == 8 + switch((*tp)->type) { + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_DOUBLE: + *args = (void*) raw; + raw += 2; + break; + default: + *args = (void*) raw++; + } +#else /* FFI_SIZEOF_JAVA_RAW != 8 */ + *args = (void*) raw; + raw += + ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw); +#endif /* FFI_SIZEOF_JAVA_RAW == 8 */ + } + +#else +#error "pdp endian not supported" +#endif /* ! PDP */ + +#endif /* WORDS_BIGENDIAN */ +} + +void +ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw) +{ + unsigned i; + ffi_type **tp = cif->arg_types; + + for (i = 0; i < cif->nargs; i++, tp++, args++) + { + switch ((*tp)->type) + { + case FFI_TYPE_UINT8: +#if WORDS_BIGENDIAN + *(UINT32*)(raw++) = *(UINT8*) (*args); +#else + (raw++)->uint = *(UINT8*) (*args); +#endif + break; + + case FFI_TYPE_SINT8: +#if WORDS_BIGENDIAN + *(SINT32*)(raw++) = *(SINT8*) (*args); +#else + (raw++)->sint = *(SINT8*) (*args); +#endif + break; + + case FFI_TYPE_UINT16: +#if WORDS_BIGENDIAN + *(UINT32*)(raw++) = *(UINT16*) (*args); +#else + (raw++)->uint = *(UINT16*) (*args); +#endif + break; + + case FFI_TYPE_SINT16: +#if WORDS_BIGENDIAN + *(SINT32*)(raw++) = *(SINT16*) (*args); +#else + (raw++)->sint = *(SINT16*) (*args); +#endif + break; + + case FFI_TYPE_UINT32: +#if WORDS_BIGENDIAN + *(UINT32*)(raw++) = *(UINT32*) (*args); +#else + (raw++)->uint = *(UINT32*) (*args); +#endif + break; + + case FFI_TYPE_SINT32: +#if WORDS_BIGENDIAN + *(SINT32*)(raw++) = *(SINT32*) (*args); +#else + (raw++)->sint = *(SINT32*) (*args); +#endif + break; + + case FFI_TYPE_FLOAT: + (raw++)->flt = *(FLOAT32*) (*args); + break; + +#if FFI_SIZEOF_JAVA_RAW == 8 + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_DOUBLE: + raw->uint = *(UINT64*) (*args); + raw += 2; + break; +#endif + + case FFI_TYPE_POINTER: + (raw++)->ptr = **(void***) args; + break; + + default: +#if FFI_SIZEOF_JAVA_RAW == 8 + FFI_ASSERT(0); /* Should have covered all cases */ +#else + memcpy ((void*) raw->data, (void*)*args, (*tp)->size); + raw += + ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw); +#endif + } + } +} + +#if !FFI_NATIVE_RAW_API + +static void +ffi_java_rvalue_to_raw (ffi_cif *cif, void *rvalue) +{ +#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8 + switch (cif->rtype->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_UINT32: + *(UINT64 *)rvalue <<= 32; + break; + + case FFI_TYPE_SINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_INT: +#if FFI_SIZEOF_JAVA_RAW == 4 + case FFI_TYPE_POINTER: +#endif + *(SINT64 *)rvalue <<= 32; + break; + + default: + break; + } +#endif +} + +static void +ffi_java_raw_to_rvalue (ffi_cif *cif, void *rvalue) +{ +#if WORDS_BIGENDIAN && FFI_SIZEOF_ARG == 8 + switch (cif->rtype->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_UINT32: + *(UINT64 *)rvalue >>= 32; + break; + + case FFI_TYPE_SINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_INT: + *(SINT64 *)rvalue >>= 32; + break; + + default: + break; + } +#endif +} + +/* This is a generic definition of ffi_raw_call, to be used if the + * native system does not provide a machine-specific implementation. + * Having this, allows code to be written for the raw API, without + * the need for system-specific code to handle input in that format; + * these following couple of functions will handle the translation forth + * and back automatically. */ + +void ffi_java_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, + ffi_java_raw *raw) +{ + void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); + ffi_java_raw_to_ptrarray (cif, raw, avalue); + ffi_call (cif, fn, rvalue, avalue); + ffi_java_rvalue_to_raw (cif, rvalue); +} + +#if FFI_CLOSURES /* base system provides closures */ + +static void +ffi_java_translate_args (ffi_cif *cif, void *rvalue, + void **avalue, void *user_data) +{ + ffi_java_raw *raw = (ffi_java_raw*)alloca (ffi_java_raw_size (cif)); + ffi_raw_closure *cl = (ffi_raw_closure*)user_data; + + ffi_java_ptrarray_to_raw (cif, avalue, raw); + (*cl->fun) (cif, rvalue, raw, cl->user_data); + ffi_java_raw_to_rvalue (cif, rvalue); +} + +ffi_status +ffi_prep_java_raw_closure_loc (ffi_java_raw_closure* cl, + ffi_cif *cif, + void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*), + void *user_data, + void *codeloc) +{ + ffi_status status; + + status = ffi_prep_closure_loc ((ffi_closure*) cl, + cif, + &ffi_java_translate_args, + codeloc, + codeloc); + if (status == FFI_OK) + { + cl->fun = fun; + cl->user_data = user_data; + } + + return status; +} + +/* Again, here is the generic version of ffi_prep_raw_closure, which + * will install an intermediate "hub" for translation of arguments from + * the pointer-array format, to the raw format */ + +ffi_status +ffi_prep_java_raw_closure (ffi_java_raw_closure* cl, + ffi_cif *cif, + void (*fun)(ffi_cif*,void*,ffi_java_raw*,void*), + void *user_data) +{ + return ffi_prep_java_raw_closure_loc (cl, cif, fun, user_data, cl); +} + +#endif /* FFI_CLOSURES */ +#endif /* !FFI_NATIVE_RAW_API */ +#endif /* !FFI_NO_RAW_API */ diff --git a/libffi/src/m32r/ffi.c b/libffi/src/m32r/ffi.c new file mode 100644 index 000000000..300006349 --- /dev/null +++ b/libffi/src/m32r/ffi.c @@ -0,0 +1,232 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2004 Renesas Technology + Copyright (c) 2008 Red Hat, Inc. + + M32R Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL RENESAS TECHNOLOGY BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +/* ffi_prep_args is called by the assembly routine once stack + space has been allocated for the function's arguments. */ + +void ffi_prep_args(char *stack, extended_cif *ecif) +{ + unsigned int i; + int tmp; + unsigned int avn; + void **p_argv; + char *argp; + ffi_type **p_arg; + + tmp = 0; + argp = stack; + + if (ecif->cif->rtype->type == FFI_TYPE_STRUCT && ecif->cif->rtype->size > 8) + { + *(void **) argp = ecif->rvalue; + argp += 4; + } + + avn = ecif->cif->nargs; + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + (i != 0) && (avn != 0); + i--, p_arg++) + { + size_t z; + + /* Align if necessary. */ + if (((*p_arg)->alignment - 1) & (unsigned) argp) + argp = (char *) ALIGN (argp, (*p_arg)->alignment); + + if (avn != 0) + { + avn--; + z = (*p_arg)->size; + if (z < sizeof (int)) + { + z = sizeof (int); + + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_STRUCT: + z = (*p_arg)->size; + if ((*p_arg)->alignment != 1) + memcpy (argp, *p_argv, z); + else + memcpy (argp + 4 - z, *p_argv, z); + z = sizeof (int); + break; + + default: + FFI_ASSERT(0); + } + } + else if (z == sizeof (int)) + { + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + } + else + { + if ((*p_arg)->type == FFI_TYPE_STRUCT) + { + if (z > 8) + { + *(unsigned int *) argp = (unsigned int)(void *)(* p_argv); + z = sizeof(void *); + } + else + { + memcpy(argp, *p_argv, z); + z = 8; + } + } + else + { + /* Double or long long 64bit. */ + memcpy (argp, *p_argv, z); + } + } + p_argv++; + argp += z; + } + } + + return; +} + +/* Perform machine dependent cif processing. */ +ffi_status +ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* Set the return type flag. */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + cif->flags = (unsigned) cif->rtype->type; + break; + + case FFI_TYPE_STRUCT: + if (cif->rtype->size <= 4) + cif->flags = FFI_TYPE_INT; + + else if (cif->rtype->size <= 8) + cif->flags = FFI_TYPE_DOUBLE; + + else + cif->flags = (unsigned) cif->rtype->type; + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_DOUBLE: + cif->flags = FFI_TYPE_DOUBLE; + break; + + case FFI_TYPE_FLOAT: + default: + cif->flags = FFI_TYPE_INT; + break; + } + + return FFI_OK; +} + +extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have + a return value address then we need to make one. */ + if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca (cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + if (cif->rtype->type == FFI_TYPE_STRUCT) + { + int size = cif->rtype->size; + int align = cif->rtype->alignment; + + if (size < 4) + { + if (align == 1) + *(unsigned long *)(ecif.rvalue) <<= (4 - size) * 8; + } + else if (4 < size && size < 8) + { + if (align == 1) + { + memcpy (ecif.rvalue, ecif.rvalue + 8-size, size); + } + else if (align == 2) + { + if (size & 1) + size += 1; + + if (size != 8) + memcpy (ecif.rvalue, ecif.rvalue + 8-size, size); + } + } + } + break; + + default: + FFI_ASSERT(0); + break; + } +} diff --git a/libffi/src/m32r/ffitarget.h b/libffi/src/m32r/ffitarget.h new file mode 100644 index 000000000..6a761f659 --- /dev/null +++ b/libffi/src/m32r/ffitarget.h @@ -0,0 +1,48 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 2004 Renesas Technology. + Target configuration macros for M32R. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL RENESAS TECHNOLOGY BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- Generic type definitions ----------------------------------------- */ + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi + { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 + } ffi_abi; +#endif + +#define FFI_CLOSURES 0 +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_NATIVE_RAW_API 0 + +#endif diff --git a/libffi/src/m32r/sysv.S b/libffi/src/m32r/sysv.S new file mode 100644 index 000000000..06b75c226 --- /dev/null +++ b/libffi/src/m32r/sysv.S @@ -0,0 +1,121 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2004 Renesas Technology + + M32R Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL RENESAS TECHNOLOGY BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +/* XXX these lose for some platforms, I'm sure. */ +#define CNAME(x) x +#define ENTRY(x) .globl CNAME(x)! .type CNAME(x),%function! CNAME(x): +#endif + +.text + + /* R0: ffi_prep_args */ + /* R1: &ecif */ + /* R2: cif->bytes */ + /* R3: fig->flags */ + /* sp+0: ecif.rvalue */ + /* sp+4: fn */ + + /* This assumes we are using gas. */ +ENTRY(ffi_call_SYSV) + /* Save registers. */ + push fp + push lr + push r3 + push r2 + push r1 + push r0 + mv fp, sp + + /* Make room for all of the new args. */ + sub sp, r2 + + /* Place all of the ffi_prep_args in position. */ + mv lr, r0 + mv r0, sp + /* R1 already set. */ + + /* And call. */ + jl lr + + /* Move first 4 parameters in registers... */ + ld r0, @(0,sp) + ld r1, @(4,sp) + ld r2, @(8,sp) + ld r3, @(12,sp) + + /* ...and adjust the stack. */ + ld lr, @(8,fp) + cmpi lr, #16 + bc adjust_stack + ldi lr, #16 +adjust_stack: + add sp, lr + + /* Call the function. */ + ld lr, @(28,fp) + jl lr + + /* Remove the space we pushed for the args. */ + mv sp, fp + + /* Load R2 with the pointer to storage for the return value. */ + ld r2, @(24,sp) + + /* Load R3 with the return type code. */ + ld r3, @(12,sp) + + /* If the return value pointer is NULL, assume no return value. */ + beqz r2, epilogue + + /* Return INT. */ + ldi r4, #FFI_TYPE_INT + bne r3, r4, return_double + st r0, @r2 + bra epilogue + +return_double: + /* Return DOUBLE or LONGDOUBLE. */ + ldi r4, #FFI_TYPE_DOUBLE + bne r3, r4, epilogue + st r0, @r2 + st r1, @(4,r2) + +epilogue: + pop r0 + pop r1 + pop r2 + pop r3 + pop lr + pop fp + jmp lr + +.ffi_call_SYSV_end: + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) diff --git a/libffi/src/m68k/ffi.c b/libffi/src/m68k/ffi.c new file mode 100644 index 000000000..0d4df1e23 --- /dev/null +++ b/libffi/src/m68k/ffi.c @@ -0,0 +1,288 @@ +/* ----------------------------------------------------------------------- + ffi.c + + m68k Foreign Function Interface + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <unistd.h> +#ifdef __rtems__ +void rtems_cache_flush_multiple_data_lines( const void *, size_t ); +#else +#include <sys/syscall.h> +#include <asm/cachectl.h> +#endif + +void ffi_call_SYSV (extended_cif *, + unsigned, unsigned, + void *, void (*fn) ()); +void *ffi_prep_args (void *stack, extended_cif *ecif); +void ffi_closure_SYSV (ffi_closure *); +void ffi_closure_struct_SYSV (ffi_closure *); +unsigned int ffi_closure_SYSV_inner (ffi_closure *closure, + void *resp, void *args); + +/* ffi_prep_args is called by the assembly routine once stack space has + been allocated for the function's arguments. */ + +void * +ffi_prep_args (void *stack, extended_cif *ecif) +{ + unsigned int i; + void **p_argv; + char *argp; + ffi_type **p_arg; + void *struct_value_ptr; + + argp = stack; + + if (ecif->cif->rtype->type == FFI_TYPE_STRUCT + && !ecif->cif->flags) + struct_value_ptr = ecif->rvalue; + else + struct_value_ptr = NULL; + + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + i != 0; + i--, p_arg++) + { + size_t z; + + z = (*p_arg)->size; + if (z < sizeof (int)) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int) *(SINT8 *) *p_argv; + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int) *(UINT8 *) *p_argv; + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int) *(SINT16 *) *p_argv; + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int) *(UINT16 *) *p_argv; + break; + + case FFI_TYPE_STRUCT: + memcpy (argp + sizeof (int) - z, *p_argv, z); + break; + + default: + FFI_ASSERT (0); + } + z = sizeof (int); + } + else + { + memcpy (argp, *p_argv, z); + + /* Align if necessary. */ + if ((sizeof(int) - 1) & z) + z = ALIGN(z, sizeof(int)); + } + + p_argv++; + argp += z; + } + + return struct_value_ptr; +} + +#define CIF_FLAGS_INT 1 +#define CIF_FLAGS_DINT 2 +#define CIF_FLAGS_FLOAT 4 +#define CIF_FLAGS_DOUBLE 8 +#define CIF_FLAGS_LDOUBLE 16 +#define CIF_FLAGS_POINTER 32 +#define CIF_FLAGS_STRUCT1 64 +#define CIF_FLAGS_STRUCT2 128 + +/* Perform machine dependent cif processing */ +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + cif->flags = 0; + break; + + case FFI_TYPE_STRUCT: + switch (cif->rtype->size) + { + case 1: + cif->flags = CIF_FLAGS_STRUCT1; + break; + case 2: + cif->flags = CIF_FLAGS_STRUCT2; + break; + case 4: + cif->flags = CIF_FLAGS_INT; + break; + case 8: + cif->flags = CIF_FLAGS_DINT; + break; + default: + cif->flags = 0; + break; + } + break; + + case FFI_TYPE_FLOAT: + cif->flags = CIF_FLAGS_FLOAT; + break; + + case FFI_TYPE_DOUBLE: + cif->flags = CIF_FLAGS_DOUBLE; + break; + +#if (FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE) + case FFI_TYPE_LONGDOUBLE: + cif->flags = CIF_FLAGS_LDOUBLE; + break; +#endif + + case FFI_TYPE_POINTER: + cif->flags = CIF_FLAGS_POINTER; + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags = CIF_FLAGS_DINT; + break; + + default: + cif->flags = CIF_FLAGS_INT; + break; + } + + return FFI_OK; +} + +void +ffi_call (ffi_cif *cif, void (*fn) (), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return value + address then we need to make one. */ + + if (rvalue == NULL + && cif->rtype->type == FFI_TYPE_STRUCT + && cif->rtype->size > 8) + ecif.rvalue = alloca (cif->rtype->size); + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV (&ecif, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; + + default: + FFI_ASSERT (0); + break; + } +} + +static void +ffi_prep_incoming_args_SYSV (char *stack, void **avalue, ffi_cif *cif) +{ + unsigned int i; + void **p_argv; + char *argp; + ffi_type **p_arg; + + argp = stack; + p_argv = avalue; + + for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) + { + size_t z; + + z = (*p_arg)->size; + if (z <= 4) + { + *p_argv = (void *) (argp + 4 - z); + + z = 4; + } + else + { + *p_argv = (void *) argp; + + /* Align if necessary */ + if ((sizeof(int) - 1) & z) + z = ALIGN(z, sizeof(int)); + } + + p_argv++; + argp += z; + } +} + +unsigned int +ffi_closure_SYSV_inner (ffi_closure *closure, void *resp, void *args) +{ + ffi_cif *cif; + void **arg_area; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void *)); + + ffi_prep_incoming_args_SYSV(args, arg_area, cif); + + (closure->fun) (cif, resp, arg_area, closure->user_data); + + return cif->flags; +} + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + FFI_ASSERT (cif->abi == FFI_SYSV); + + *(unsigned short *)closure->tramp = 0x207c; + *(void **)(closure->tramp + 2) = codeloc; + *(unsigned short *)(closure->tramp + 6) = 0x4ef9; + if (cif->rtype->type == FFI_TYPE_STRUCT + && !cif->flags) + *(void **)(closure->tramp + 8) = ffi_closure_struct_SYSV; + else + *(void **)(closure->tramp + 8) = ffi_closure_SYSV; + +#ifdef __rtems__ + rtems_cache_flush_multiple_data_lines( codeloc, FFI_TRAMPOLINE_SIZE ); +#else + syscall(SYS_cacheflush, codeloc, FLUSH_SCOPE_LINE, + FLUSH_CACHE_BOTH, FFI_TRAMPOLINE_SIZE); +#endif + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + diff --git a/libffi/src/m68k/ffitarget.h b/libffi/src/m68k/ffitarget.h new file mode 100644 index 000000000..633717bbf --- /dev/null +++ b/libffi/src/m68k/ffitarget.h @@ -0,0 +1,49 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for Motorola 68K. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 16 +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/libffi/src/m68k/sysv.S b/libffi/src/m68k/sysv.S new file mode 100644 index 000000000..c782f5192 --- /dev/null +++ b/libffi/src/m68k/sysv.S @@ -0,0 +1,270 @@ +/* ----------------------------------------------------------------------- + + sysv.S - Copyright (c) 1998 Andreas Schwab + Copyright (c) 2008 Red Hat, Inc. + + m68k Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#ifdef HAVE_AS_CFI_PSEUDO_OP +#define CFI_STARTPROC() .cfi_startproc +#define CFI_OFFSET(reg,off) .cfi_offset reg,off +#define CFI_DEF_CFA(reg,off) .cfi_def_cfa reg,off +#define CFI_ENDPROC() .cfi_endproc +#else +#define CFI_STARTPROC() +#define CFI_OFFSET(reg,off) +#define CFI_DEF_CFA(reg,off) +#define CFI_ENDPROC() +#endif + + .text + + .globl ffi_call_SYSV + .type ffi_call_SYSV,@function + .align 4 + +ffi_call_SYSV: + CFI_STARTPROC() + link %fp,#0 + CFI_OFFSET(14,-8) + CFI_DEF_CFA(14,8) + move.l %d2,-(%sp) + CFI_OFFSET(2,-12) + + | Make room for all of the new args. + sub.l 12(%fp),%sp + + | Call ffi_prep_args + move.l 8(%fp),-(%sp) + pea 4(%sp) +#if !defined __PIC__ + jsr ffi_prep_args +#else + bsr.l ffi_prep_args@PLTPC +#endif + addq.l #8,%sp + + | Pass pointer to struct value, if any + move.l %a0,%a1 + + | Call the function + move.l 24(%fp),%a0 + jsr (%a0) + + | Remove the space we pushed for the args + add.l 12(%fp),%sp + + | Load the pointer to storage for the return value + move.l 20(%fp),%a1 + + | Load the return type code + move.l 16(%fp),%d2 + + | If the return value pointer is NULL, assume no return value. + | NOTE: On the mc68000, tst on an address register is not supported. +#if defined(__mc68000__) && !defined(__mcoldfire__) + cmp.w #0, %a1 +#else + tst.l %a1 +#endif + jbeq noretval + + btst #0,%d2 + jbeq retlongint + move.l %d0,(%a1) + jbra epilogue + +retlongint: + btst #1,%d2 + jbeq retfloat + move.l %d0,(%a1) + move.l %d1,4(%a1) + jbra epilogue + +retfloat: + btst #2,%d2 + jbeq retdouble +#if defined(__MC68881__) + fmove.s %fp0,(%a1) +#else + move.l %d0,(%a1) +#endif + jbra epilogue + +retdouble: + btst #3,%d2 + jbeq retlongdouble +#if defined(__MC68881__) + fmove.d %fp0,(%a1) +#else + move.l %d0,(%a1)+ + move.l %d1,(%a1) +#endif + jbra epilogue + +retlongdouble: + btst #4,%d2 + jbeq retpointer +#if defined(__MC68881__) + fmove.x %fp0,(%a1) +#else + move.l %d0,(%a1)+ + move.l %d1,(%a1)+ + move.l %d2,(%a1) +#endif + jbra epilogue + +retpointer: + btst #5,%d2 + jbeq retstruct1 + move.l %a0,(%a1) + jbra epilogue + +retstruct1: + btst #6,%d2 + jbeq retstruct2 + move.b %d0,(%a1) + jbra epilogue + +retstruct2: + btst #7,%d2 + jbeq noretval + move.w %d0,(%a1) + +noretval: +epilogue: + move.l (%sp)+,%d2 + unlk %fp + rts + CFI_ENDPROC() + .size ffi_call_SYSV,.-ffi_call_SYSV + + .globl ffi_closure_SYSV + .type ffi_closure_SYSV, @function + .align 4 + +ffi_closure_SYSV: + CFI_STARTPROC() + link %fp,#-12 + CFI_OFFSET(14,-8) + CFI_DEF_CFA(14,8) + move.l %sp,-12(%fp) + pea 8(%fp) + pea -12(%fp) + move.l %a0,-(%sp) +#if !defined __PIC__ + jsr ffi_closure_SYSV_inner +#else + bsr.l ffi_closure_SYSV_inner@PLTPC +#endif + + lsr.l #1,%d0 + jne 1f + jcc .Lcls_epilogue + move.l -12(%fp),%d0 +.Lcls_epilogue: + unlk %fp + rts +1: + lea -12(%fp),%a0 + lsr.l #2,%d0 + jne 1f + jcs .Lcls_ret_float + move.l (%a0)+,%d0 + move.l (%a0),%d1 + jra .Lcls_epilogue +.Lcls_ret_float: +#if defined(__MC68881__) + fmove.s (%a0),%fp0 +#else + move.l (%a0),%d0 +#endif + jra .Lcls_epilogue +1: + lsr.l #2,%d0 + jne 1f + jcs .Lcls_ret_ldouble +#if defined(__MC68881__) + fmove.d (%a0),%fp0 +#else + move.l (%a0)+,%d0 + move.l (%a0),%d1 +#endif + jra .Lcls_epilogue +.Lcls_ret_ldouble: +#if defined(__MC68881__) + fmove.x (%a0),%fp0 +#else + move.l (%a0)+,%d0 + move.l (%a0)+,%d1 + move.l (%a0),%d2 +#endif + jra .Lcls_epilogue +1: + lsr.l #2,%d0 + jne .Lcls_ret_struct2 + jcs .Lcls_ret_struct1 + move.l (%a0),%a0 + move.l %a0,%d0 + jra .Lcls_epilogue +.Lcls_ret_struct1: + move.b (%a0),%d0 + jra .Lcls_epilogue +.Lcls_ret_struct2: + move.w (%a0),%d0 + jra .Lcls_epilogue + CFI_ENDPROC() + + .size ffi_closure_SYSV,.-ffi_closure_SYSV + + .globl ffi_closure_struct_SYSV + .type ffi_closure_struct_SYSV, @function + .align 4 + +ffi_closure_struct_SYSV: + CFI_STARTPROC() + link %fp,#0 + CFI_OFFSET(14,-8) + CFI_DEF_CFA(14,8) + move.l %sp,-12(%fp) + pea 8(%fp) + move.l %a1,-(%sp) + move.l %a0,-(%sp) +#if !defined __PIC__ + jsr ffi_closure_SYSV_inner +#else + bsr.l ffi_closure_SYSV_inner@PLTPC +#endif + unlk %fp + rts + CFI_ENDPROC() + .size ffi_closure_struct_SYSV,.-ffi_closure_struct_SYSV + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/mips/ffi.c b/libffi/src/mips/ffi.c new file mode 100644 index 000000000..d714cc9e9 --- /dev/null +++ b/libffi/src/mips/ffi.c @@ -0,0 +1,1029 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1996, 2007, 2008 Red Hat, Inc. + Copyright (c) 2008 David Daney + + MIPS Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +#ifdef __GNUC__ +# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)) +# define USE__BUILTIN___CLEAR_CACHE 1 +# endif +#endif + +#ifndef USE__BUILTIN___CLEAR_CACHE +#include <sys/cachectl.h> +#endif + +#ifdef FFI_DEBUG +# define FFI_MIPS_STOP_HERE() ffi_stop_here() +#else +# define FFI_MIPS_STOP_HERE() do {} while(0) +#endif + +#ifdef FFI_MIPS_N32 +#define FIX_ARGP \ +FFI_ASSERT(argp <= &stack[bytes]); \ +if (argp == &stack[bytes]) \ +{ \ + argp = stack; \ + FFI_MIPS_STOP_HERE(); \ +} +#else +#define FIX_ARGP +#endif + + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +static void ffi_prep_args(char *stack, + extended_cif *ecif, + int bytes, + int flags) +{ + int i; + void **p_argv; + char *argp; + ffi_type **p_arg; + +#ifdef FFI_MIPS_N32 + /* If more than 8 double words are used, the remainder go + on the stack. We reorder stuff on the stack here to + support this easily. */ + if (bytes > 8 * sizeof(ffi_arg)) + argp = &stack[bytes - (8 * sizeof(ffi_arg))]; + else + argp = stack; +#else + argp = stack; +#endif + + memset(stack, 0, bytes); + +#ifdef FFI_MIPS_N32 + if ( ecif->cif->rstruct_flag != 0 ) +#else + if ( ecif->cif->rtype->type == FFI_TYPE_STRUCT ) +#endif + { + *(ffi_arg *) argp = (ffi_arg) ecif->rvalue; + argp += sizeof(ffi_arg); + FIX_ARGP; + } + + p_argv = ecif->avalue; + + for (i = 0, p_arg = ecif->cif->arg_types; i < ecif->cif->nargs; i++, p_arg++) + { + size_t z; + unsigned int a; + + /* Align if necessary. */ + a = (*p_arg)->alignment; + if (a < sizeof(ffi_arg)) + a = sizeof(ffi_arg); + + if ((a - 1) & (unsigned long) argp) + { + argp = (char *) ALIGN(argp, a); + FIX_ARGP; + } + + z = (*p_arg)->size; + if (z <= sizeof(ffi_arg)) + { + int type = (*p_arg)->type; + z = sizeof(ffi_arg); + + /* The size of a pointer depends on the ABI */ + if (type == FFI_TYPE_POINTER) + type = (ecif->cif->abi == FFI_N64 + || ecif->cif->abi == FFI_N64_SOFT_FLOAT) + ? FFI_TYPE_SINT64 : FFI_TYPE_SINT32; + + if (i < 8 && (ecif->cif->abi == FFI_N32_SOFT_FLOAT + || ecif->cif->abi == FFI_N64_SOFT_FLOAT)) + { + switch (type) + { + case FFI_TYPE_FLOAT: + type = FFI_TYPE_UINT32; + break; + case FFI_TYPE_DOUBLE: + type = FFI_TYPE_UINT64; + break; + default: + break; + } + } + switch (type) + { + case FFI_TYPE_SINT8: + *(ffi_arg *)argp = *(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(ffi_arg *)argp = *(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(ffi_arg *)argp = *(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(ffi_arg *)argp = *(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_SINT32: + *(ffi_arg *)argp = *(SINT32 *)(* p_argv); + break; + + case FFI_TYPE_UINT32: + *(ffi_arg *)argp = *(UINT32 *)(* p_argv); + break; + + /* This can only happen with 64bit slots. */ + case FFI_TYPE_FLOAT: + *(float *) argp = *(float *)(* p_argv); + break; + + /* Handle structures. */ + default: + memcpy(argp, *p_argv, (*p_arg)->size); + break; + } + } + else + { +#ifdef FFI_MIPS_O32 + memcpy(argp, *p_argv, z); +#else + { + unsigned long end = (unsigned long) argp + z; + unsigned long cap = (unsigned long) stack + bytes; + + /* Check if the data will fit within the register space. + Handle it if it doesn't. */ + + if (end <= cap) + memcpy(argp, *p_argv, z); + else + { + unsigned long portion = cap - (unsigned long)argp; + + memcpy(argp, *p_argv, portion); + argp = stack; + z -= portion; + memcpy(argp, (void*)((unsigned long)(*p_argv) + portion), + z); + } + } +#endif + } + p_argv++; + argp += z; + FIX_ARGP; + } +} + +#ifdef FFI_MIPS_N32 + +/* The n32 spec says that if "a chunk consists solely of a double + float field (but not a double, which is part of a union), it + is passed in a floating point register. Any other chunk is + passed in an integer register". This code traverses structure + definitions and generates the appropriate flags. */ + +static unsigned +calc_n32_struct_flags(int soft_float, ffi_type *arg, + unsigned *loc, unsigned *arg_reg) +{ + unsigned flags = 0; + unsigned index = 0; + + ffi_type *e; + + if (soft_float) + return 0; + + while ((e = arg->elements[index])) + { + /* Align this object. */ + *loc = ALIGN(*loc, e->alignment); + if (e->type == FFI_TYPE_DOUBLE) + { + /* Already aligned to FFI_SIZEOF_ARG. */ + *arg_reg = *loc / FFI_SIZEOF_ARG; + if (*arg_reg > 7) + break; + flags += (FFI_TYPE_DOUBLE << (*arg_reg * FFI_FLAG_BITS)); + *loc += e->size; + } + else + *loc += e->size; + index++; + } + /* Next Argument register at alignment of FFI_SIZEOF_ARG. */ + *arg_reg = ALIGN(*loc, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + + return flags; +} + +static unsigned +calc_n32_return_struct_flags(int soft_float, ffi_type *arg) +{ + unsigned flags = 0; + unsigned small = FFI_TYPE_SMALLSTRUCT; + ffi_type *e; + + /* Returning structures under n32 is a tricky thing. + A struct with only one or two floating point fields + is returned in $f0 (and $f2 if necessary). Any other + struct results at most 128 bits are returned in $2 + (the first 64 bits) and $3 (remainder, if necessary). + Larger structs are handled normally. */ + + if (arg->size > 16) + return 0; + + if (arg->size > 8) + small = FFI_TYPE_SMALLSTRUCT2; + + e = arg->elements[0]; + + if (e->type == FFI_TYPE_DOUBLE) + flags = FFI_TYPE_DOUBLE; + else if (e->type == FFI_TYPE_FLOAT) + flags = FFI_TYPE_FLOAT; + + if (flags && (e = arg->elements[1])) + { + if (e->type == FFI_TYPE_DOUBLE) + flags += FFI_TYPE_DOUBLE << FFI_FLAG_BITS; + else if (e->type == FFI_TYPE_FLOAT) + flags += FFI_TYPE_FLOAT << FFI_FLAG_BITS; + else + return small; + + if (flags && (arg->elements[2])) + { + /* There are three arguments and the first two are + floats! This must be passed the old way. */ + return small; + } + if (soft_float) + flags += FFI_TYPE_STRUCT_SOFT; + } + else + if (!flags) + return small; + + return flags; +} + +#endif + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + cif->flags = 0; + +#ifdef FFI_MIPS_O32 + /* Set the flags necessary for O32 processing. FFI_O32_SOFT_FLOAT + * does not have special handling for floating point args. + */ + + if (cif->rtype->type != FFI_TYPE_STRUCT && cif->abi == FFI_O32) + { + if (cif->nargs > 0) + { + switch ((cif->arg_types)[0]->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags += (cif->arg_types)[0]->type; + break; + + default: + break; + } + + if (cif->nargs > 1) + { + /* Only handle the second argument if the first + is a float or double. */ + if (cif->flags) + { + switch ((cif->arg_types)[1]->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags += (cif->arg_types)[1]->type << FFI_FLAG_BITS; + break; + + default: + break; + } + } + } + } + } + + /* Set the return type flag */ + + if (cif->abi == FFI_O32_SOFT_FLOAT) + { + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: + cif->flags += cif->rtype->type << (FFI_FLAG_BITS * 2); + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_DOUBLE: + cif->flags += FFI_TYPE_UINT64 << (FFI_FLAG_BITS * 2); + break; + + case FFI_TYPE_FLOAT: + default: + cif->flags += FFI_TYPE_INT << (FFI_FLAG_BITS * 2); + break; + } + } + else + { + /* FFI_O32 */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags += cif->rtype->type << (FFI_FLAG_BITS * 2); + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags += FFI_TYPE_UINT64 << (FFI_FLAG_BITS * 2); + break; + + default: + cif->flags += FFI_TYPE_INT << (FFI_FLAG_BITS * 2); + break; + } + } +#endif + +#ifdef FFI_MIPS_N32 + /* Set the flags necessary for N32 processing */ + { + int type; + unsigned arg_reg = 0; + unsigned loc = 0; + unsigned count = (cif->nargs < 8) ? cif->nargs : 8; + unsigned index = 0; + + unsigned struct_flags = 0; + int soft_float = (cif->abi == FFI_N32_SOFT_FLOAT + || cif->abi == FFI_N64_SOFT_FLOAT); + + if (cif->rtype->type == FFI_TYPE_STRUCT) + { + struct_flags = calc_n32_return_struct_flags(soft_float, cif->rtype); + + if (struct_flags == 0) + { + /* This means that the structure is being passed as + a hidden argument */ + + arg_reg = 1; + count = (cif->nargs < 7) ? cif->nargs : 7; + + cif->rstruct_flag = !0; + } + else + cif->rstruct_flag = 0; + } + else + cif->rstruct_flag = 0; + + while (count-- > 0 && arg_reg < 8) + { + type = (cif->arg_types)[index]->type; + if (soft_float) + { + switch (type) + { + case FFI_TYPE_FLOAT: + type = FFI_TYPE_UINT32; + break; + case FFI_TYPE_DOUBLE: + type = FFI_TYPE_UINT64; + break; + default: + break; + } + } + switch (type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags += + ((cif->arg_types)[index]->type << (arg_reg * FFI_FLAG_BITS)); + arg_reg++; + break; + case FFI_TYPE_LONGDOUBLE: + /* Align it. */ + arg_reg = ALIGN(arg_reg, 2); + /* Treat it as two adjacent doubles. */ + if (soft_float) + { + arg_reg += 2; + } + else + { + cif->flags += + (FFI_TYPE_DOUBLE << (arg_reg * FFI_FLAG_BITS)); + arg_reg++; + cif->flags += + (FFI_TYPE_DOUBLE << (arg_reg * FFI_FLAG_BITS)); + arg_reg++; + } + break; + + case FFI_TYPE_STRUCT: + loc = arg_reg * FFI_SIZEOF_ARG; + cif->flags += calc_n32_struct_flags(soft_float, + (cif->arg_types)[index], + &loc, &arg_reg); + break; + + default: + arg_reg++; + break; + } + + index++; + } + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_STRUCT: + { + if (struct_flags == 0) + { + /* The structure is returned through a hidden + first argument. Do nothing, 'cause FFI_TYPE_VOID + is 0 */ + } + else + { + /* The structure is returned via some tricky + mechanism */ + cif->flags += FFI_TYPE_STRUCT << (FFI_FLAG_BITS * 8); + cif->flags += struct_flags << (4 + (FFI_FLAG_BITS * 8)); + } + break; + } + + case FFI_TYPE_VOID: + /* Do nothing, 'cause FFI_TYPE_VOID is 0 */ + break; + + case FFI_TYPE_POINTER: + if (cif->abi == FFI_N32_SOFT_FLOAT || cif->abi == FFI_N32) + cif->flags += FFI_TYPE_SINT32 << (FFI_FLAG_BITS * 8); + else + cif->flags += FFI_TYPE_INT << (FFI_FLAG_BITS * 8); + break; + + case FFI_TYPE_FLOAT: + if (soft_float) + { + cif->flags += FFI_TYPE_SINT32 << (FFI_FLAG_BITS * 8); + break; + } + /* else fall through */ + case FFI_TYPE_DOUBLE: + if (soft_float) + cif->flags += FFI_TYPE_INT << (FFI_FLAG_BITS * 8); + else + cif->flags += cif->rtype->type << (FFI_FLAG_BITS * 8); + break; + + case FFI_TYPE_LONGDOUBLE: + /* Long double is returned as if it were a struct containing + two doubles. */ + if (soft_float) + { + cif->flags += FFI_TYPE_STRUCT << (FFI_FLAG_BITS * 8); + cif->flags += FFI_TYPE_SMALLSTRUCT2 << (4 + (FFI_FLAG_BITS * 8)); + } + else + { + cif->flags += FFI_TYPE_STRUCT << (FFI_FLAG_BITS * 8); + cif->flags += (FFI_TYPE_DOUBLE + + (FFI_TYPE_DOUBLE << FFI_FLAG_BITS)) + << (4 + (FFI_FLAG_BITS * 8)); + } + break; + default: + cif->flags += FFI_TYPE_INT << (FFI_FLAG_BITS * 8); + break; + } + } +#endif + + return FFI_OK; +} + +/* Low level routine for calling O32 functions */ +extern int ffi_call_O32(void (*)(char *, extended_cif *, int, int), + extended_cif *, unsigned, + unsigned, unsigned *, void (*)(void)); + +/* Low level routine for calling N32 functions */ +extern int ffi_call_N32(void (*)(char *, extended_cif *, int, int), + extended_cif *, unsigned, + unsigned, void *, void (*)(void)); + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + ecif.rvalue = alloca(cif->rtype->size); + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { +#ifdef FFI_MIPS_O32 + case FFI_O32: + case FFI_O32_SOFT_FLOAT: + ffi_call_O32(ffi_prep_args, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + break; +#endif + +#ifdef FFI_MIPS_N32 + case FFI_N32: + case FFI_N32_SOFT_FLOAT: + case FFI_N64: + case FFI_N64_SOFT_FLOAT: + { + int copy_rvalue = 0; + int copy_offset = 0; + char *rvalue_copy = ecif.rvalue; + if (cif->rtype->type == FFI_TYPE_STRUCT && cif->rtype->size < 16) + { + /* For structures smaller than 16 bytes we clobber memory + in 8 byte increments. Make a copy so we don't clobber + the callers memory outside of the struct bounds. */ + rvalue_copy = alloca(16); + copy_rvalue = 1; + } + else if (cif->rtype->type == FFI_TYPE_FLOAT + && (cif->abi == FFI_N64_SOFT_FLOAT + || cif->abi == FFI_N32_SOFT_FLOAT)) + { + rvalue_copy = alloca (8); + copy_rvalue = 1; +#if defined(__MIPSEB__) || defined(_MIPSEB) + copy_offset = 4; +#endif + } + ffi_call_N32(ffi_prep_args, &ecif, cif->bytes, + cif->flags, rvalue_copy, fn); + if (copy_rvalue) + memcpy(ecif.rvalue, rvalue_copy + copy_offset, cif->rtype->size); + } + break; +#endif + + default: + FFI_ASSERT(0); + break; + } +} + +#if FFI_CLOSURES +#if defined(FFI_MIPS_O32) +extern void ffi_closure_O32(void); +#else +extern void ffi_closure_N32(void); +#endif /* FFI_MIPS_O32 */ + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif *cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp = (unsigned int *) &closure->tramp[0]; + void * fn; + char *clear_location = (char *) codeloc; + +#if defined(FFI_MIPS_O32) + FFI_ASSERT(cif->abi == FFI_O32 || cif->abi == FFI_O32_SOFT_FLOAT); + fn = ffi_closure_O32; +#else /* FFI_MIPS_N32 */ + FFI_ASSERT(cif->abi == FFI_N32 || cif->abi == FFI_N64); + fn = ffi_closure_N32; +#endif /* FFI_MIPS_O32 */ + +#if defined(FFI_MIPS_O32) || (_MIPS_SIM ==_ABIN32) + /* lui $25,high(fn) */ + tramp[0] = 0x3c190000 | ((unsigned)fn >> 16); + /* ori $25,low(fn) */ + tramp[1] = 0x37390000 | ((unsigned)fn & 0xffff); + /* lui $12,high(codeloc) */ + tramp[2] = 0x3c0c0000 | ((unsigned)codeloc >> 16); + /* jr $25 */ + tramp[3] = 0x03200008; + /* ori $12,low(codeloc) */ + tramp[4] = 0x358c0000 | ((unsigned)codeloc & 0xffff); +#else + /* N64 has a somewhat larger trampoline. */ + /* lui $25,high(fn) */ + tramp[0] = 0x3c190000 | ((unsigned long)fn >> 48); + /* lui $12,high(codeloc) */ + tramp[1] = 0x3c0c0000 | ((unsigned long)codeloc >> 48); + /* ori $25,mid-high(fn) */ + tramp[2] = 0x37390000 | (((unsigned long)fn >> 32 ) & 0xffff); + /* ori $12,mid-high(codeloc) */ + tramp[3] = 0x358c0000 | (((unsigned long)codeloc >> 32) & 0xffff); + /* dsll $25,$25,16 */ + tramp[4] = 0x0019cc38; + /* dsll $12,$12,16 */ + tramp[5] = 0x000c6438; + /* ori $25,mid-low(fn) */ + tramp[6] = 0x37390000 | (((unsigned long)fn >> 16 ) & 0xffff); + /* ori $12,mid-low(codeloc) */ + tramp[7] = 0x358c0000 | (((unsigned long)codeloc >> 16) & 0xffff); + /* dsll $25,$25,16 */ + tramp[8] = 0x0019cc38; + /* dsll $12,$12,16 */ + tramp[9] = 0x000c6438; + /* ori $25,low(fn) */ + tramp[10] = 0x37390000 | ((unsigned long)fn & 0xffff); + /* jr $25 */ + tramp[11] = 0x03200008; + /* ori $12,low(codeloc) */ + tramp[12] = 0x358c0000 | ((unsigned long)codeloc & 0xffff); + +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + +#ifdef USE__BUILTIN___CLEAR_CACHE + __builtin___clear_cache(clear_location, clear_location + FFI_TRAMPOLINE_SIZE); +#else + cacheflush (clear_location, FFI_TRAMPOLINE_SIZE, ICACHE); +#endif + return FFI_OK; +} + +/* + * Decodes the arguments to a function, which will be stored on the + * stack. AR is the pointer to the beginning of the integer arguments + * (and, depending upon the arguments, some floating-point arguments + * as well). FPR is a pointer to the area where floating point + * registers have been saved, if any. + * + * RVALUE is the location where the function return value will be + * stored. CLOSURE is the prepared closure to invoke. + * + * This function should only be called from assembly, which is in + * turn called from a trampoline. + * + * Returns the function return type. + * + * Based on the similar routine for sparc. + */ +int +ffi_closure_mips_inner_O32 (ffi_closure *closure, + void *rvalue, ffi_arg *ar, + double *fpr) +{ + ffi_cif *cif; + void **avaluep; + ffi_arg *avalue; + ffi_type **arg_types; + int i, avn, argn, seen_int; + + cif = closure->cif; + avalue = alloca (cif->nargs * sizeof (ffi_arg)); + avaluep = alloca (cif->nargs * sizeof (ffi_arg)); + + seen_int = (cif->abi == FFI_O32_SOFT_FLOAT); + argn = 0; + + if ((cif->flags >> (FFI_FLAG_BITS * 2)) == FFI_TYPE_STRUCT) + { + rvalue = (void *)(UINT32)ar[0]; + argn = 1; + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + while (i < avn) + { + if (i < 2 && !seen_int && + (arg_types[i]->type == FFI_TYPE_FLOAT || + arg_types[i]->type == FFI_TYPE_DOUBLE || + arg_types[i]->type == FFI_TYPE_LONGDOUBLE)) + { +#if defined(__MIPSEB__) || defined(_MIPSEB) + if (arg_types[i]->type == FFI_TYPE_FLOAT) + avaluep[i] = ((char *) &fpr[i]) + sizeof (float); + else +#endif + avaluep[i] = (char *) &fpr[i]; + } + else + { + if (arg_types[i]->alignment == 8 && (argn & 0x1)) + argn++; + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + avaluep[i] = &avalue[i]; + *(SINT8 *) &avalue[i] = (SINT8) ar[argn]; + break; + + case FFI_TYPE_UINT8: + avaluep[i] = &avalue[i]; + *(UINT8 *) &avalue[i] = (UINT8) ar[argn]; + break; + + case FFI_TYPE_SINT16: + avaluep[i] = &avalue[i]; + *(SINT16 *) &avalue[i] = (SINT16) ar[argn]; + break; + + case FFI_TYPE_UINT16: + avaluep[i] = &avalue[i]; + *(UINT16 *) &avalue[i] = (UINT16) ar[argn]; + break; + + default: + avaluep[i] = (char *) &ar[argn]; + break; + } + seen_int = 1; + } + argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + i++; + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avaluep, closure->user_data); + + if (cif->abi == FFI_O32_SOFT_FLOAT) + { + switch (cif->rtype->type) + { + case FFI_TYPE_FLOAT: + return FFI_TYPE_INT; + case FFI_TYPE_DOUBLE: + return FFI_TYPE_UINT64; + default: + return cif->rtype->type; + } + } + else + { + return cif->rtype->type; + } +} + +#if defined(FFI_MIPS_N32) + +static void +copy_struct_N32(char *target, unsigned offset, ffi_abi abi, ffi_type *type, + int argn, unsigned arg_offset, ffi_arg *ar, + ffi_arg *fpr, int soft_float) +{ + ffi_type **elt_typep = type->elements; + while(*elt_typep) + { + ffi_type *elt_type = *elt_typep; + unsigned o; + char *tp; + char *argp; + char *fpp; + + o = ALIGN(offset, elt_type->alignment); + arg_offset += o - offset; + offset = o; + argn += arg_offset / sizeof(ffi_arg); + arg_offset = arg_offset % sizeof(ffi_arg); + + argp = (char *)(ar + argn); + fpp = (char *)(argn >= 8 ? ar + argn : fpr + argn); + + tp = target + offset; + + if (elt_type->type == FFI_TYPE_DOUBLE && !soft_float) + *(double *)tp = *(double *)fpp; + else + memcpy(tp, argp + arg_offset, elt_type->size); + + offset += elt_type->size; + arg_offset += elt_type->size; + elt_typep++; + argn += arg_offset / sizeof(ffi_arg); + arg_offset = arg_offset % sizeof(ffi_arg); + } +} + +/* + * Decodes the arguments to a function, which will be stored on the + * stack. AR is the pointer to the beginning of the integer + * arguments. FPR is a pointer to the area where floating point + * registers have been saved. + * + * RVALUE is the location where the function return value will be + * stored. CLOSURE is the prepared closure to invoke. + * + * This function should only be called from assembly, which is in + * turn called from a trampoline. + * + * Returns the function return flags. + * + */ +int +ffi_closure_mips_inner_N32 (ffi_closure *closure, + void *rvalue, ffi_arg *ar, + ffi_arg *fpr) +{ + ffi_cif *cif; + void **avaluep; + ffi_arg *avalue; + ffi_type **arg_types; + int i, avn, argn; + int soft_float; + ffi_arg *argp; + + cif = closure->cif; + soft_float = cif->abi == FFI_N64_SOFT_FLOAT + || cif->abi == FFI_N32_SOFT_FLOAT; + avalue = alloca (cif->nargs * sizeof (ffi_arg)); + avaluep = alloca (cif->nargs * sizeof (ffi_arg)); + + argn = 0; + + if (cif->rstruct_flag) + { +#if _MIPS_SIM==_ABIN32 + rvalue = (void *)(UINT32)ar[0]; +#else /* N64 */ + rvalue = (void *)ar[0]; +#endif + argn = 1; + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + while (i < avn) + { + if (arg_types[i]->type == FFI_TYPE_FLOAT + || arg_types[i]->type == FFI_TYPE_DOUBLE + || arg_types[i]->type == FFI_TYPE_LONGDOUBLE) + { + argp = (argn >= 8 || soft_float) ? ar + argn : fpr + argn; + if ((arg_types[i]->type == FFI_TYPE_LONGDOUBLE) && ((unsigned)argp & (arg_types[i]->alignment-1))) + { + argp=(ffi_arg*)ALIGN(argp,arg_types[i]->alignment); + argn++; + } +#if defined(__MIPSEB__) || defined(_MIPSEB) + if (arg_types[i]->type == FFI_TYPE_FLOAT && argn < 8) + avaluep[i] = ((char *) argp) + sizeof (float); + else +#endif + avaluep[i] = (char *) argp; + } + else + { + unsigned type = arg_types[i]->type; + + if (arg_types[i]->alignment > sizeof(ffi_arg)) + argn = ALIGN(argn, arg_types[i]->alignment / sizeof(ffi_arg)); + + argp = ar + argn; + + /* The size of a pointer depends on the ABI */ + if (type == FFI_TYPE_POINTER) + type = (cif->abi == FFI_N64 || cif->abi == FFI_N64_SOFT_FLOAT) + ? FFI_TYPE_SINT64 : FFI_TYPE_SINT32; + + if (soft_float && type == FFI_TYPE_FLOAT) + type = FFI_TYPE_UINT32; + + switch (type) + { + case FFI_TYPE_SINT8: + avaluep[i] = &avalue[i]; + *(SINT8 *) &avalue[i] = (SINT8) *argp; + break; + + case FFI_TYPE_UINT8: + avaluep[i] = &avalue[i]; + *(UINT8 *) &avalue[i] = (UINT8) *argp; + break; + + case FFI_TYPE_SINT16: + avaluep[i] = &avalue[i]; + *(SINT16 *) &avalue[i] = (SINT16) *argp; + break; + + case FFI_TYPE_UINT16: + avaluep[i] = &avalue[i]; + *(UINT16 *) &avalue[i] = (UINT16) *argp; + break; + + case FFI_TYPE_SINT32: + avaluep[i] = &avalue[i]; + *(SINT32 *) &avalue[i] = (SINT32) *argp; + break; + + case FFI_TYPE_UINT32: + avaluep[i] = &avalue[i]; + *(UINT32 *) &avalue[i] = (UINT32) *argp; + break; + + case FFI_TYPE_STRUCT: + if (argn < 8) + { + /* Allocate space for the struct as at least part of + it was passed in registers. */ + avaluep[i] = alloca(arg_types[i]->size); + copy_struct_N32(avaluep[i], 0, cif->abi, arg_types[i], + argn, 0, ar, fpr, soft_float); + + break; + } + /* Else fall through. */ + default: + avaluep[i] = (char *) argp; + break; + } + } + argn += ALIGN(arg_types[i]->size, sizeof(ffi_arg)) / sizeof(ffi_arg); + i++; + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avaluep, closure->user_data); + + return cif->flags >> (FFI_FLAG_BITS * 8); +} + +#endif /* FFI_MIPS_N32 */ + +#endif /* FFI_CLOSURES */ diff --git a/libffi/src/mips/ffitarget.h b/libffi/src/mips/ffitarget.h new file mode 100644 index 000000000..d0fc983a7 --- /dev/null +++ b/libffi/src/mips/ffitarget.h @@ -0,0 +1,243 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for MIPS. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifdef linux +# include <asm/sgidefs.h> +#elif defined(__rtems__) +/* + * Subprogram calling convention - copied from sgidefs.h + */ +#define _MIPS_SIM_ABI32 1 +#define _MIPS_SIM_NABI32 2 +#define _MIPS_SIM_ABI64 3 +#else +# include <sgidefs.h> +#endif + +# ifndef _ABIN32 +# define _ABIN32 _MIPS_SIM_NABI32 +# endif +# ifndef _ABI64 +# define _ABI64 _MIPS_SIM_ABI64 +# endif +# ifndef _ABIO32 +# define _ABIO32 _MIPS_SIM_ABI32 +# endif + +#if !defined(_MIPS_SIM) +-- something is very wrong -- +#else +# if (_MIPS_SIM==_ABIN32 && defined(_ABIN32)) || (_MIPS_SIM==_ABI64 && defined(_ABI64)) +# define FFI_MIPS_N32 +# else +# if (_MIPS_SIM==_ABIO32 && defined(_ABIO32)) +# define FFI_MIPS_O32 +# else +-- this is an unsupported platform -- +# endif +# endif +#endif + +#ifdef FFI_MIPS_O32 +/* O32 stack frames have 32bit integer args */ +# define FFI_SIZEOF_ARG 4 +#else +/* N32 and N64 frames have 64bit integer args */ +# define FFI_SIZEOF_ARG 8 +# if _MIPS_SIM == _ABIN32 +# define FFI_SIZEOF_JAVA_RAW 4 +# endif +#endif + +#define FFI_FLAG_BITS 2 + +/* SGI's strange assembler requires that we multiply by 4 rather + than shift left by FFI_FLAG_BITS */ + +#define FFI_ARGS_D FFI_TYPE_DOUBLE +#define FFI_ARGS_F FFI_TYPE_FLOAT +#define FFI_ARGS_DD FFI_TYPE_DOUBLE * 4 + FFI_TYPE_DOUBLE +#define FFI_ARGS_FF FFI_TYPE_FLOAT * 4 + FFI_TYPE_FLOAT +#define FFI_ARGS_FD FFI_TYPE_DOUBLE * 4 + FFI_TYPE_FLOAT +#define FFI_ARGS_DF FFI_TYPE_FLOAT * 4 + FFI_TYPE_DOUBLE + +/* Needed for N32 structure returns */ +#define FFI_TYPE_SMALLSTRUCT FFI_TYPE_UINT8 +#define FFI_TYPE_SMALLSTRUCT2 FFI_TYPE_SINT8 + +#if 0 +/* The SGI assembler can't handle this.. */ +#define FFI_TYPE_STRUCT_DD (( FFI_ARGS_DD ) << 4) + FFI_TYPE_STRUCT +/* (and so on) */ +#else +/* ...so we calculate these by hand! */ +#define FFI_TYPE_STRUCT_D 61 +#define FFI_TYPE_STRUCT_F 45 +#define FFI_TYPE_STRUCT_DD 253 +#define FFI_TYPE_STRUCT_FF 173 +#define FFI_TYPE_STRUCT_FD 237 +#define FFI_TYPE_STRUCT_DF 189 +#define FFI_TYPE_STRUCT_SMALL 93 +#define FFI_TYPE_STRUCT_SMALL2 109 + +/* and for n32 soft float, add 16 * 2^4 */ +#define FFI_TYPE_STRUCT_D_SOFT 317 +#define FFI_TYPE_STRUCT_F_SOFT 301 +#define FFI_TYPE_STRUCT_DD_SOFT 509 +#define FFI_TYPE_STRUCT_FF_SOFT 429 +#define FFI_TYPE_STRUCT_FD_SOFT 493 +#define FFI_TYPE_STRUCT_DF_SOFT 445 +#define FFI_TYPE_STRUCT_SOFT 16 +#endif + +#ifdef LIBFFI_ASM +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define a4 $8 +#define a5 $9 +#define a6 $10 +#define a7 $11 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define t8 $24 +#define t9 $25 +#define ra $31 + +#ifdef FFI_MIPS_O32 +# define REG_L lw +# define REG_S sw +# define SUBU subu +# define ADDU addu +# define SRL srl +# define LI li +#else /* !FFI_MIPS_O32 */ +# define REG_L ld +# define REG_S sd +# define SUBU dsubu +# define ADDU daddu +# define SRL dsrl +# define LI dli +# if (_MIPS_SIM==_ABI64) +# define LA dla +# define EH_FRAME_ALIGN 3 +# define FDE_ADDR_BYTES .8byte +# else +# define LA la +# define EH_FRAME_ALIGN 2 +# define FDE_ADDR_BYTES .4byte +# endif /* _MIPS_SIM==_ABI64 */ +#endif /* !FFI_MIPS_O32 */ +#else /* !LIBFFI_ASM */ +# ifdef __GNUC__ +# ifdef FFI_MIPS_O32 +/* O32 stack frames have 32bit integer args */ +typedef unsigned int ffi_arg __attribute__((__mode__(__SI__))); +typedef signed int ffi_sarg __attribute__((__mode__(__SI__))); +#else +/* N32 and N64 frames have 64bit integer args */ +typedef unsigned int ffi_arg __attribute__((__mode__(__DI__))); +typedef signed int ffi_sarg __attribute__((__mode__(__DI__))); +# endif +# else +# ifdef FFI_MIPS_O32 +/* O32 stack frames have 32bit integer args */ +typedef __uint32_t ffi_arg; +typedef __int32_t ffi_sarg; +# else +/* N32 and N64 frames have 64bit integer args */ +typedef __uint64_t ffi_arg; +typedef __int64_t ffi_sarg; +# endif +# endif /* __GNUC__ */ + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_O32, + FFI_N32, + FFI_N64, + FFI_O32_SOFT_FLOAT, + FFI_N32_SOFT_FLOAT, + FFI_N64_SOFT_FLOAT, + +#ifdef FFI_MIPS_O32 +#ifdef __mips_soft_float + FFI_DEFAULT_ABI = FFI_O32_SOFT_FLOAT, +#else + FFI_DEFAULT_ABI = FFI_O32, +#endif +#else +# if _MIPS_SIM==_ABI64 +# ifdef __mips_soft_float + FFI_DEFAULT_ABI = FFI_N64_SOFT_FLOAT, +# else + FFI_DEFAULT_ABI = FFI_N64, +# endif +# else +# ifdef __mips_soft_float + FFI_DEFAULT_ABI = FFI_N32_SOFT_FLOAT, +# else + FFI_DEFAULT_ABI = FFI_N32, +# endif +# endif +#endif + + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; + +#define FFI_EXTRA_CIF_FIELDS unsigned rstruct_flag +#endif /* !LIBFFI_ASM */ + +/* ---- Definitions for closures ----------------------------------------- */ + +#if defined(FFI_MIPS_O32) +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 20 +#else +/* N32/N64. */ +# define FFI_CLOSURES 1 +#if _MIPS_SIM==_ABI64 +#define FFI_TRAMPOLINE_SIZE 52 +#else +#define FFI_TRAMPOLINE_SIZE 20 +#endif +#endif /* FFI_MIPS_O32 */ +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/libffi/src/mips/n32.S b/libffi/src/mips/n32.S new file mode 100644 index 000000000..ae2309466 --- /dev/null +++ b/libffi/src/mips/n32.S @@ -0,0 +1,591 @@ +/* ----------------------------------------------------------------------- + n32.S - Copyright (c) 1996, 1998, 2005, 2007, 2009, 2010 Red Hat, Inc. + + MIPS Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +/* Only build this code if we are compiling for n32 */ + +#if defined(FFI_MIPS_N32) + +#define callback a0 +#define bytes a2 +#define flags a3 +#define raddr a4 +#define fn a5 + +#define SIZEOF_FRAME ( 8 * FFI_SIZEOF_ARG ) + +#ifdef __GNUC__ + .abicalls +#endif + .text + .align 2 + .globl ffi_call_N32 + .ent ffi_call_N32 +ffi_call_N32: +.LFB3: + .frame $fp, SIZEOF_FRAME, ra + .mask 0xc0000000,-FFI_SIZEOF_ARG + .fmask 0x00000000,0 + + # Prologue + SUBU $sp, SIZEOF_FRAME # Frame size +.LCFI0: + REG_S $fp, SIZEOF_FRAME - 2*FFI_SIZEOF_ARG($sp) # Save frame pointer + REG_S ra, SIZEOF_FRAME - 1*FFI_SIZEOF_ARG($sp) # Save return address +.LCFI1: + move $fp, $sp +.LCFI3: + move t9, callback # callback function pointer + REG_S bytes, 2*FFI_SIZEOF_ARG($fp) # bytes + REG_S flags, 3*FFI_SIZEOF_ARG($fp) # flags + REG_S raddr, 4*FFI_SIZEOF_ARG($fp) # raddr + REG_S fn, 5*FFI_SIZEOF_ARG($fp) # fn + + # Allocate at least 4 words in the argstack + move v0, bytes + bge bytes, 4 * FFI_SIZEOF_ARG, bigger + LI v0, 4 * FFI_SIZEOF_ARG + b sixteen + + bigger: + ADDU t4, v0, 2 * FFI_SIZEOF_ARG -1 # make sure it is aligned + and v0, t4, -2 * FFI_SIZEOF_ARG # to a proper boundry. + +sixteen: + SUBU $sp, $sp, v0 # move the stack pointer to reflect the + # arg space + + move a0, $sp # 4 * FFI_SIZEOF_ARG + ADDU a3, $fp, 3 * FFI_SIZEOF_ARG + + # Call ffi_prep_args + jal t9 + + # Copy the stack pointer to t9 + move t9, $sp + + # Fix the stack if there are more than 8 64bit slots worth + # of arguments. + + # Load the number of bytes + REG_L t6, 2*FFI_SIZEOF_ARG($fp) + + # Is it bigger than 8 * FFI_SIZEOF_ARG? + daddiu t8, t6, -(8 * FFI_SIZEOF_ARG) + bltz t8, loadregs + + ADDU t9, t9, t8 + +loadregs: + + REG_L t6, 3*FFI_SIZEOF_ARG($fp) # load the flags word into t6. + + and t4, t6, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg1_floatp + REG_L a0, 0*FFI_SIZEOF_ARG(t9) + b arg1_next +arg1_floatp: + bne t4, FFI_TYPE_FLOAT, arg1_doublep + l.s $f12, 0*FFI_SIZEOF_ARG(t9) + b arg1_next +arg1_doublep: + l.d $f12, 0*FFI_SIZEOF_ARG(t9) +arg1_next: + + SRL t4, t6, 1*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg2_floatp + REG_L a1, 1*FFI_SIZEOF_ARG(t9) + b arg2_next +arg2_floatp: + bne t4, FFI_TYPE_FLOAT, arg2_doublep + l.s $f13, 1*FFI_SIZEOF_ARG(t9) + b arg2_next +arg2_doublep: + l.d $f13, 1*FFI_SIZEOF_ARG(t9) +arg2_next: + + SRL t4, t6, 2*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg3_floatp + REG_L a2, 2*FFI_SIZEOF_ARG(t9) + b arg3_next +arg3_floatp: + bne t4, FFI_TYPE_FLOAT, arg3_doublep + l.s $f14, 2*FFI_SIZEOF_ARG(t9) + b arg3_next +arg3_doublep: + l.d $f14, 2*FFI_SIZEOF_ARG(t9) +arg3_next: + + SRL t4, t6, 3*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg4_floatp + REG_L a3, 3*FFI_SIZEOF_ARG(t9) + b arg4_next +arg4_floatp: + bne t4, FFI_TYPE_FLOAT, arg4_doublep + l.s $f15, 3*FFI_SIZEOF_ARG(t9) + b arg4_next +arg4_doublep: + l.d $f15, 3*FFI_SIZEOF_ARG(t9) +arg4_next: + + SRL t4, t6, 4*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg5_floatp + REG_L a4, 4*FFI_SIZEOF_ARG(t9) + b arg5_next +arg5_floatp: + bne t4, FFI_TYPE_FLOAT, arg5_doublep + l.s $f16, 4*FFI_SIZEOF_ARG(t9) + b arg5_next +arg5_doublep: + l.d $f16, 4*FFI_SIZEOF_ARG(t9) +arg5_next: + + SRL t4, t6, 5*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg6_floatp + REG_L a5, 5*FFI_SIZEOF_ARG(t9) + b arg6_next +arg6_floatp: + bne t4, FFI_TYPE_FLOAT, arg6_doublep + l.s $f17, 5*FFI_SIZEOF_ARG(t9) + b arg6_next +arg6_doublep: + l.d $f17, 5*FFI_SIZEOF_ARG(t9) +arg6_next: + + SRL t4, t6, 6*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg7_floatp + REG_L a6, 6*FFI_SIZEOF_ARG(t9) + b arg7_next +arg7_floatp: + bne t4, FFI_TYPE_FLOAT, arg7_doublep + l.s $f18, 6*FFI_SIZEOF_ARG(t9) + b arg7_next +arg7_doublep: + l.d $f18, 6*FFI_SIZEOF_ARG(t9) +arg7_next: + + SRL t4, t6, 7*FFI_FLAG_BITS + and t4, ((1<<FFI_FLAG_BITS)-1) + bnez t4, arg8_floatp + REG_L a7, 7*FFI_SIZEOF_ARG(t9) + b arg8_next +arg8_floatp: + bne t4, FFI_TYPE_FLOAT, arg8_doublep + l.s $f19, 7*FFI_SIZEOF_ARG(t9) + b arg8_next +arg8_doublep: + l.d $f19, 7*FFI_SIZEOF_ARG(t9) +arg8_next: + +callit: + # Load the function pointer + REG_L t9, 5*FFI_SIZEOF_ARG($fp) + + # If the return value pointer is NULL, assume no return value. + REG_L t5, 4*FFI_SIZEOF_ARG($fp) + beqz t5, noretval + + # Shift the return type flag over + SRL t6, 8*FFI_FLAG_BITS + + beq t6, FFI_TYPE_SINT32, retint + bne t6, FFI_TYPE_INT, retfloat +retint: + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + REG_S v0, 0(t4) + b epilogue + +retfloat: + bne t6, FFI_TYPE_FLOAT, retdouble + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.s $f0, 0(t4) + b epilogue + +retdouble: + bne t6, FFI_TYPE_DOUBLE, retstruct_d + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.d $f0, 0(t4) + b epilogue + +retstruct_d: + bne t6, FFI_TYPE_STRUCT_D, retstruct_f + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.d $f0, 0(t4) + b epilogue + +retstruct_f: + bne t6, FFI_TYPE_STRUCT_F, retstruct_d_d + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.s $f0, 0(t4) + b epilogue + +retstruct_d_d: + bne t6, FFI_TYPE_STRUCT_DD, retstruct_f_f + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.d $f0, 0(t4) + s.d $f2, 8(t4) + b epilogue + +retstruct_f_f: + bne t6, FFI_TYPE_STRUCT_FF, retstruct_d_f + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.s $f0, 0(t4) + s.s $f2, 4(t4) + b epilogue + +retstruct_d_f: + bne t6, FFI_TYPE_STRUCT_DF, retstruct_f_d + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.d $f0, 0(t4) + s.s $f2, 8(t4) + b epilogue + +retstruct_f_d: + bne t6, FFI_TYPE_STRUCT_FD, retstruct_d_soft + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + s.s $f0, 0(t4) + s.d $f2, 8(t4) + b epilogue + +retstruct_d_soft: + bne t6, FFI_TYPE_STRUCT_D_SOFT, retstruct_f_soft + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + sd v0, 0(t4) + b epilogue + +retstruct_f_soft: + bne t6, FFI_TYPE_STRUCT_F_SOFT, retstruct_d_d_soft + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + sw v0, 0(t4) + b epilogue + +retstruct_d_d_soft: + bne t6, FFI_TYPE_STRUCT_DD_SOFT, retstruct_f_f_soft + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + sd v0, 0(t4) + sd v1, 8(t4) + b epilogue + +retstruct_f_f_soft: + bne t6, FFI_TYPE_STRUCT_FF_SOFT, retstruct_d_f_soft + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + sw v0, 0(t4) + sw v1, 4(t4) + b epilogue + +retstruct_d_f_soft: + bne t6, FFI_TYPE_STRUCT_DF_SOFT, retstruct_f_d_soft + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + sd v0, 0(t4) + sw v1, 8(t4) + b epilogue + +retstruct_f_d_soft: + bne t6, FFI_TYPE_STRUCT_FD_SOFT, retstruct_small + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + sw v0, 0(t4) + sd v1, 8(t4) + b epilogue + +retstruct_small: + bne t6, FFI_TYPE_STRUCT_SMALL, retstruct_small2 + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + REG_S v0, 0(t4) + b epilogue + +retstruct_small2: + bne t6, FFI_TYPE_STRUCT_SMALL2, retstruct + jal t9 + REG_L t4, 4*FFI_SIZEOF_ARG($fp) + REG_S v0, 0(t4) + REG_S v1, 8(t4) + b epilogue + +retstruct: +noretval: + jal t9 + + # Epilogue +epilogue: + move $sp, $fp + REG_L $fp, SIZEOF_FRAME - 2*FFI_SIZEOF_ARG($sp) # Restore frame pointer + REG_L ra, SIZEOF_FRAME - 1*FFI_SIZEOF_ARG($sp) # Restore return address + ADDU $sp, SIZEOF_FRAME # Fix stack pointer + j ra + +.LFE3: + .end ffi_call_N32 + +/* ffi_closure_N32. Expects address of the passed-in ffi_closure in t0 + ($12). Stores any arguments passed in registers onto the stack, + then calls ffi_closure_mips_inner_N32, which then decodes + them. + + Stack layout: + + 20 - Start of parameters, original sp + 19 - Called function a7 save + 18 - Called function a6 save + 17 - Called function a5 save + 16 - Called function a4 save + 15 - Called function a3 save + 14 - Called function a2 save + 13 - Called function a1 save + 12 - Called function a0 save + 11 - Called function f19 + 10 - Called function f18 + 9 - Called function f17 + 8 - Called function f16 + 7 - Called function f15 + 6 - Called function f14 + 5 - Called function f13 + 4 - Called function f12 + 3 - return value high (v1 or $f2) + 2 - return value low (v0 or $f0) + 1 - ra save + 0 - gp save our sp points here + */ + +#define SIZEOF_FRAME2 (20 * FFI_SIZEOF_ARG) + +#define A7_OFF2 (19 * FFI_SIZEOF_ARG) +#define A6_OFF2 (18 * FFI_SIZEOF_ARG) +#define A5_OFF2 (17 * FFI_SIZEOF_ARG) +#define A4_OFF2 (16 * FFI_SIZEOF_ARG) +#define A3_OFF2 (15 * FFI_SIZEOF_ARG) +#define A2_OFF2 (14 * FFI_SIZEOF_ARG) +#define A1_OFF2 (13 * FFI_SIZEOF_ARG) +#define A0_OFF2 (12 * FFI_SIZEOF_ARG) + +#define F19_OFF2 (11 * FFI_SIZEOF_ARG) +#define F18_OFF2 (10 * FFI_SIZEOF_ARG) +#define F17_OFF2 (9 * FFI_SIZEOF_ARG) +#define F16_OFF2 (8 * FFI_SIZEOF_ARG) +#define F15_OFF2 (7 * FFI_SIZEOF_ARG) +#define F14_OFF2 (6 * FFI_SIZEOF_ARG) +#define F13_OFF2 (5 * FFI_SIZEOF_ARG) +#define F12_OFF2 (4 * FFI_SIZEOF_ARG) + +#define V1_OFF2 (3 * FFI_SIZEOF_ARG) +#define V0_OFF2 (2 * FFI_SIZEOF_ARG) + +#define RA_OFF2 (1 * FFI_SIZEOF_ARG) +#define GP_OFF2 (0 * FFI_SIZEOF_ARG) + + .align 2 + .globl ffi_closure_N32 + .ent ffi_closure_N32 +ffi_closure_N32: +.LFB2: + .frame $sp, SIZEOF_FRAME2, ra + .mask 0x90000000,-(SIZEOF_FRAME2 - RA_OFF2) + .fmask 0x00000000,0 + SUBU $sp, SIZEOF_FRAME2 +.LCFI5: + .cpsetup t9, GP_OFF2, ffi_closure_N32 + REG_S ra, RA_OFF2($sp) # Save return address +.LCFI6: + # Store all possible argument registers. If there are more than + # fit in registers, then they were stored on the stack. + REG_S a0, A0_OFF2($sp) + REG_S a1, A1_OFF2($sp) + REG_S a2, A2_OFF2($sp) + REG_S a3, A3_OFF2($sp) + REG_S a4, A4_OFF2($sp) + REG_S a5, A5_OFF2($sp) + REG_S a6, A6_OFF2($sp) + REG_S a7, A7_OFF2($sp) + + # Store all possible float/double registers. + s.d $f12, F12_OFF2($sp) + s.d $f13, F13_OFF2($sp) + s.d $f14, F14_OFF2($sp) + s.d $f15, F15_OFF2($sp) + s.d $f16, F16_OFF2($sp) + s.d $f17, F17_OFF2($sp) + s.d $f18, F18_OFF2($sp) + s.d $f19, F19_OFF2($sp) + + # Call ffi_closure_mips_inner_N32 to do the real work. + LA t9, ffi_closure_mips_inner_N32 + move a0, $12 # Pointer to the ffi_closure + ADDU a1, $sp, V0_OFF2 + ADDU a2, $sp, A0_OFF2 + ADDU a3, $sp, F12_OFF2 + jalr t9 + + # Return flags are in v0 + bne v0, FFI_TYPE_SINT32, cls_retint + lw v0, V0_OFF2($sp) + b cls_epilogue + +cls_retint: + bne v0, FFI_TYPE_INT, cls_retfloat + REG_L v0, V0_OFF2($sp) + b cls_epilogue + +cls_retfloat: + bne v0, FFI_TYPE_FLOAT, cls_retdouble + l.s $f0, V0_OFF2($sp) + b cls_epilogue + +cls_retdouble: + bne v0, FFI_TYPE_DOUBLE, cls_retstruct_d + l.d $f0, V0_OFF2($sp) + b cls_epilogue + +cls_retstruct_d: + bne v0, FFI_TYPE_STRUCT_D, cls_retstruct_f + l.d $f0, V0_OFF2($sp) + b cls_epilogue + +cls_retstruct_f: + bne v0, FFI_TYPE_STRUCT_F, cls_retstruct_d_d + l.s $f0, V0_OFF2($sp) + b cls_epilogue + +cls_retstruct_d_d: + bne v0, FFI_TYPE_STRUCT_DD, cls_retstruct_f_f + l.d $f0, V0_OFF2($sp) + l.d $f2, V1_OFF2($sp) + b cls_epilogue + +cls_retstruct_f_f: + bne v0, FFI_TYPE_STRUCT_FF, cls_retstruct_d_f + l.s $f0, V0_OFF2($sp) + l.s $f2, V1_OFF2($sp) + b cls_epilogue + +cls_retstruct_d_f: + bne v0, FFI_TYPE_STRUCT_DF, cls_retstruct_f_d + l.d $f0, V0_OFF2($sp) + l.s $f2, V1_OFF2($sp) + b cls_epilogue + +cls_retstruct_f_d: + bne v0, FFI_TYPE_STRUCT_FD, cls_retstruct_small2 + l.s $f0, V0_OFF2($sp) + l.d $f2, V1_OFF2($sp) + b cls_epilogue + +cls_retstruct_small2: + REG_L v0, V0_OFF2($sp) + REG_L v1, V1_OFF2($sp) + + # Epilogue +cls_epilogue: + REG_L ra, RA_OFF2($sp) # Restore return address + .cpreturn + ADDU $sp, SIZEOF_FRAME2 + j ra +.LFE2: + .end ffi_closure_N32 + +#ifdef __GNUC__ + .section .eh_frame,"aw",@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 # length +.LSCIE1: + .4byte 0x0 # CIE + .byte 0x1 # Version 1 + .ascii "\000" # Augmentation + .uleb128 0x1 # Code alignment 1 + .sleb128 -4 # Data alignment -4 + .byte 0x1f # Return Address $31 + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1d # in $sp + .uleb128 0x0 # offset 0 + .align EH_FRAME_ALIGN +.LECIE1: + +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 # length. +.LASFDE1: + .4byte .LASFDE1-.Lframe1 # CIE_pointer. + FDE_ADDR_BYTES .LFB3 # initial_location. + FDE_ADDR_BYTES .LFE3-.LFB3 # address_range. + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI0-.LFB3 # to .LCFI0 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 SIZEOF_FRAME # adjust stack.by SIZEOF_FRAME + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI1-.LCFI0 # to .LCFI1 + .byte 0x9e # DW_CFA_offset of $fp + .uleb128 2*FFI_SIZEOF_ARG/4 # + .byte 0x9f # DW_CFA_offset of ra + .uleb128 1*FFI_SIZEOF_ARG/4 # + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI3-.LCFI1 # to .LCFI3 + .byte 0xd # DW_CFA_def_cfa_register + .uleb128 0x1e # in $fp + .align EH_FRAME_ALIGN +.LEFDE1: +.LSFDE3: + .4byte .LEFDE3-.LASFDE3 # length +.LASFDE3: + .4byte .LASFDE3-.Lframe1 # CIE_pointer. + FDE_ADDR_BYTES .LFB2 # initial_location. + FDE_ADDR_BYTES .LFE2-.LFB2 # address_range. + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI5-.LFB2 # to .LCFI5 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 SIZEOF_FRAME2 # adjust stack.by SIZEOF_FRAME + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI6-.LCFI5 # to .LCFI6 + .byte 0x9c # DW_CFA_offset of $gp ($28) + .uleb128 (SIZEOF_FRAME2 - GP_OFF2)/4 + .byte 0x9f # DW_CFA_offset of ra ($31) + .uleb128 (SIZEOF_FRAME2 - RA_OFF2)/4 + .align EH_FRAME_ALIGN +.LEFDE3: +#endif /* __GNUC__ */ + +#endif diff --git a/libffi/src/mips/o32.S b/libffi/src/mips/o32.S new file mode 100644 index 000000000..eb279813a --- /dev/null +++ b/libffi/src/mips/o32.S @@ -0,0 +1,381 @@ +/* ----------------------------------------------------------------------- + o32.S - Copyright (c) 1996, 1998, 2005 Red Hat, Inc. + + MIPS Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +/* Only build this code if we are compiling for o32 */ + +#if defined(FFI_MIPS_O32) + +#define callback a0 +#define bytes a2 +#define flags a3 + +#define SIZEOF_FRAME (4 * FFI_SIZEOF_ARG + 2 * FFI_SIZEOF_ARG) +#define A3_OFF (SIZEOF_FRAME + 3 * FFI_SIZEOF_ARG) +#define FP_OFF (SIZEOF_FRAME - 2 * FFI_SIZEOF_ARG) +#define RA_OFF (SIZEOF_FRAME - 1 * FFI_SIZEOF_ARG) + + .abicalls + .text + .align 2 + .globl ffi_call_O32 + .ent ffi_call_O32 +ffi_call_O32: +$LFB0: + # Prologue + SUBU $sp, SIZEOF_FRAME # Frame size +$LCFI0: + REG_S $fp, FP_OFF($sp) # Save frame pointer +$LCFI1: + REG_S ra, RA_OFF($sp) # Save return address +$LCFI2: + move $fp, $sp + +$LCFI3: + move t9, callback # callback function pointer + REG_S flags, A3_OFF($fp) # flags + + # Allocate at least 4 words in the argstack + LI v0, 4 * FFI_SIZEOF_ARG + blt bytes, v0, sixteen + + ADDU v0, bytes, 7 # make sure it is aligned + and v0, -8 # to an 8 byte boundry + +sixteen: + SUBU $sp, v0 # move the stack pointer to reflect the + # arg space + + ADDU a0, $sp, 4 * FFI_SIZEOF_ARG + + jalr t9 + + REG_L t0, A3_OFF($fp) # load the flags word + SRL t2, t0, 4 # shift our arg info + and t0, ((1<<4)-1) # mask out the return type + + ADDU $sp, 4 * FFI_SIZEOF_ARG # adjust $sp to new args + + bnez t0, pass_d # make it quick for int + REG_L a0, 0*FFI_SIZEOF_ARG($sp) # just go ahead and load the + REG_L a1, 1*FFI_SIZEOF_ARG($sp) # four regs. + REG_L a2, 2*FFI_SIZEOF_ARG($sp) + REG_L a3, 3*FFI_SIZEOF_ARG($sp) + b call_it + +pass_d: + bne t0, FFI_ARGS_D, pass_f + l.d $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args + REG_L a2, 2*FFI_SIZEOF_ARG($sp) # passing a double + REG_L a3, 3*FFI_SIZEOF_ARG($sp) + b call_it + +pass_f: + bne t0, FFI_ARGS_F, pass_d_d + l.s $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args + REG_L a1, 1*FFI_SIZEOF_ARG($sp) # passing a float + REG_L a2, 2*FFI_SIZEOF_ARG($sp) + REG_L a3, 3*FFI_SIZEOF_ARG($sp) + b call_it + +pass_d_d: + bne t0, FFI_ARGS_DD, pass_f_f + l.d $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args + l.d $f14, 2*FFI_SIZEOF_ARG($sp) # passing two doubles + b call_it + +pass_f_f: + bne t0, FFI_ARGS_FF, pass_d_f + l.s $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args + l.s $f14, 1*FFI_SIZEOF_ARG($sp) # passing two floats + REG_L a2, 2*FFI_SIZEOF_ARG($sp) + REG_L a3, 3*FFI_SIZEOF_ARG($sp) + b call_it + +pass_d_f: + bne t0, FFI_ARGS_DF, pass_f_d + l.d $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args + l.s $f14, 2*FFI_SIZEOF_ARG($sp) # passing double and float + REG_L a3, 3*FFI_SIZEOF_ARG($sp) + b call_it + +pass_f_d: + # assume that the only other combination must be float then double + # bne t0, FFI_ARGS_F_D, call_it + l.s $f12, 0*FFI_SIZEOF_ARG($sp) # load $fp regs from args + l.d $f14, 2*FFI_SIZEOF_ARG($sp) # passing double and float + +call_it: + # Load the function pointer + REG_L t9, SIZEOF_FRAME + 5*FFI_SIZEOF_ARG($fp) + + # If the return value pointer is NULL, assume no return value. + REG_L t1, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp) + beqz t1, noretval + + bne t2, FFI_TYPE_INT, retlonglong + jalr t9 + REG_L t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp) + REG_S v0, 0(t0) + b epilogue + +retlonglong: + # Really any 64-bit int, signed or not. + bne t2, FFI_TYPE_UINT64, retfloat + jalr t9 + REG_L t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp) + REG_S v1, 4(t0) + REG_S v0, 0(t0) + b epilogue + +retfloat: + bne t2, FFI_TYPE_FLOAT, retdouble + jalr t9 + REG_L t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp) + s.s $f0, 0(t0) + b epilogue + +retdouble: + bne t2, FFI_TYPE_DOUBLE, noretval + jalr t9 + REG_L t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp) + s.d $f0, 0(t0) + b epilogue + +noretval: + jalr t9 + + # Epilogue +epilogue: + move $sp, $fp + REG_L $fp, FP_OFF($sp) # Restore frame pointer + REG_L ra, RA_OFF($sp) # Restore return address + ADDU $sp, SIZEOF_FRAME # Fix stack pointer + j ra + +$LFE0: + .end ffi_call_O32 + + +/* ffi_closure_O32. Expects address of the passed-in ffi_closure + in t4 ($12). Stores any arguments passed in registers onto the + stack, then calls ffi_closure_mips_inner_O32, which + then decodes them. + + Stack layout: + + 3 - a3 save + 2 - a2 save + 1 - a1 save + 0 - a0 save, original sp + -1 - ra save + -2 - fp save + -3 - $16 (s0) save + -4 - cprestore + -5 - return value high (v1) + -6 - return value low (v0) + -7 - f14 (le high, be low) + -8 - f14 (le low, be high) + -9 - f12 (le high, be low) + -10 - f12 (le low, be high) + -11 - Called function a3 save + -12 - Called function a2 save + -13 - Called function a1 save + -14 - Called function a0 save, our sp and fp point here + */ + +#define SIZEOF_FRAME2 (14 * FFI_SIZEOF_ARG) +#define A3_OFF2 (SIZEOF_FRAME2 + 3 * FFI_SIZEOF_ARG) +#define A2_OFF2 (SIZEOF_FRAME2 + 2 * FFI_SIZEOF_ARG) +#define A1_OFF2 (SIZEOF_FRAME2 + 1 * FFI_SIZEOF_ARG) +#define A0_OFF2 (SIZEOF_FRAME2 + 0 * FFI_SIZEOF_ARG) +#define RA_OFF2 (SIZEOF_FRAME2 - 1 * FFI_SIZEOF_ARG) +#define FP_OFF2 (SIZEOF_FRAME2 - 2 * FFI_SIZEOF_ARG) +#define S0_OFF2 (SIZEOF_FRAME2 - 3 * FFI_SIZEOF_ARG) +#define GP_OFF2 (SIZEOF_FRAME2 - 4 * FFI_SIZEOF_ARG) +#define V1_OFF2 (SIZEOF_FRAME2 - 5 * FFI_SIZEOF_ARG) +#define V0_OFF2 (SIZEOF_FRAME2 - 6 * FFI_SIZEOF_ARG) +#define FA_1_1_OFF2 (SIZEOF_FRAME2 - 7 * FFI_SIZEOF_ARG) +#define FA_1_0_OFF2 (SIZEOF_FRAME2 - 8 * FFI_SIZEOF_ARG) +#define FA_0_1_OFF2 (SIZEOF_FRAME2 - 9 * FFI_SIZEOF_ARG) +#define FA_0_0_OFF2 (SIZEOF_FRAME2 - 10 * FFI_SIZEOF_ARG) + + .text + .align 2 + .globl ffi_closure_O32 + .ent ffi_closure_O32 +ffi_closure_O32: +$LFB1: + # Prologue + .frame $fp, SIZEOF_FRAME2, ra + .set noreorder + .cpload t9 + .set reorder + SUBU $sp, SIZEOF_FRAME2 + .cprestore GP_OFF2 +$LCFI4: + REG_S $16, S0_OFF2($sp) # Save s0 + REG_S $fp, FP_OFF2($sp) # Save frame pointer + REG_S ra, RA_OFF2($sp) # Save return address +$LCFI6: + move $fp, $sp + +$LCFI7: + # Store all possible argument registers. If there are more than + # four arguments, then they are stored above where we put a3. + REG_S a0, A0_OFF2($fp) + REG_S a1, A1_OFF2($fp) + REG_S a2, A2_OFF2($fp) + REG_S a3, A3_OFF2($fp) + + # Load ABI enum to s0 + REG_L $16, 20($12) # cif pointer follows tramp. + REG_L $16, 0($16) # abi is first member. + + li $13, 1 # FFI_O32 + bne $16, $13, 1f # Skip fp save if FFI_O32_SOFT_FLOAT + + # Store all possible float/double registers. + s.d $f12, FA_0_0_OFF2($fp) + s.d $f14, FA_1_0_OFF2($fp) +1: + # Call ffi_closure_mips_inner_O32 to do the work. + la t9, ffi_closure_mips_inner_O32 + move a0, $12 # Pointer to the ffi_closure + addu a1, $fp, V0_OFF2 + addu a2, $fp, A0_OFF2 + addu a3, $fp, FA_0_0_OFF2 + jalr t9 + + # Load the return value into the appropriate register. + move $8, $2 + li $9, FFI_TYPE_VOID + beq $8, $9, closure_done + + li $13, 1 # FFI_O32 + bne $16, $13, 1f # Skip fp restore if FFI_O32_SOFT_FLOAT + + li $9, FFI_TYPE_FLOAT + l.s $f0, V0_OFF2($fp) + beq $8, $9, closure_done + + li $9, FFI_TYPE_DOUBLE + l.d $f0, V0_OFF2($fp) + beq $8, $9, closure_done +1: + REG_L $3, V1_OFF2($fp) + REG_L $2, V0_OFF2($fp) + +closure_done: + # Epilogue + move $sp, $fp + REG_L $16, S0_OFF2($sp) # Restore s0 + REG_L $fp, FP_OFF2($sp) # Restore frame pointer + REG_L ra, RA_OFF2($sp) # Restore return address + ADDU $sp, SIZEOF_FRAME2 + j ra +$LFE1: + .end ffi_closure_O32 + +/* DWARF-2 unwind info. */ + + .section .eh_frame,"a",@progbits +$Lframe0: + .4byte $LECIE0-$LSCIE0 # Length of Common Information Entry +$LSCIE0: + .4byte 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version + .ascii "zR\0" # CIE Augmentation + .uleb128 0x1 # CIE Code Alignment Factor + .sleb128 4 # CIE Data Alignment Factor + .byte 0x1f # CIE RA Column + .uleb128 0x1 # Augmentation size + .byte 0x00 # FDE Encoding (absptr) + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1d + .uleb128 0x0 + .align 2 +$LECIE0: +$LSFDE0: + .4byte $LEFDE0-$LASFDE0 # FDE Length +$LASFDE0: + .4byte $LASFDE0-$Lframe0 # FDE CIE offset + .4byte $LFB0 # FDE initial location + .4byte $LFE0-$LFB0 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .4byte $LCFI0-$LFB0 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x18 + .byte 0x4 # DW_CFA_advance_loc4 + .4byte $LCFI2-$LCFI0 + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x1e # $fp + .sleb128 -2 # SIZEOF_FRAME2 - 2*FFI_SIZEOF_ARG($sp) + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x1f # $ra + .sleb128 -1 # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp) + .byte 0x4 # DW_CFA_advance_loc4 + .4byte $LCFI3-$LCFI2 + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1e + .uleb128 0x18 + .align 2 +$LEFDE0: +$LSFDE1: + .4byte $LEFDE1-$LASFDE1 # FDE Length +$LASFDE1: + .4byte $LASFDE1-$Lframe0 # FDE CIE offset + .4byte $LFB1 # FDE initial location + .4byte $LFE1-$LFB1 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .4byte $LCFI4-$LFB1 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x38 + .byte 0x4 # DW_CFA_advance_loc4 + .4byte $LCFI6-$LCFI4 + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x10 # $16 + .sleb128 -3 # SIZEOF_FRAME2 - 3*FFI_SIZEOF_ARG($sp) + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x1e # $fp + .sleb128 -2 # SIZEOF_FRAME2 - 2*FFI_SIZEOF_ARG($sp) + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x1f # $ra + .sleb128 -1 # SIZEOF_FRAME2 - 1*FFI_SIZEOF_ARG($sp) + .byte 0x4 # DW_CFA_advance_loc4 + .4byte $LCFI7-$LCFI6 + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1e + .uleb128 0x38 + .align 2 +$LEFDE1: + +#endif diff --git a/libffi/src/pa/ffi.c b/libffi/src/pa/ffi.c new file mode 100644 index 000000000..6d7606f89 --- /dev/null +++ b/libffi/src/pa/ffi.c @@ -0,0 +1,716 @@ +/* ----------------------------------------------------------------------- + ffi.c - (c) 2003-2004 Randolph Chung <tausq@debian.org> + (c) 2008 Red Hat, Inc. + + HPPA Foreign Function Interface + HP-UX PA ABI support (c) 2006 Free Software Foundation, Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdio.h> + +#define ROUND_UP(v, a) (((size_t)(v) + (a) - 1) & ~((a) - 1)) + +#define MIN_STACK_SIZE 64 +#define FIRST_ARG_SLOT 9 +#define DEBUG_LEVEL 0 + +#define fldw(addr, fpreg) \ + __asm__ volatile ("fldw 0(%0), %%" #fpreg "L" : : "r"(addr) : #fpreg) +#define fstw(fpreg, addr) \ + __asm__ volatile ("fstw %%" #fpreg "L, 0(%0)" : : "r"(addr)) +#define fldd(addr, fpreg) \ + __asm__ volatile ("fldd 0(%0), %%" #fpreg : : "r"(addr) : #fpreg) +#define fstd(fpreg, addr) \ + __asm__ volatile ("fstd %%" #fpreg "L, 0(%0)" : : "r"(addr)) + +#define debug(lvl, x...) do { if (lvl <= DEBUG_LEVEL) { printf(x); } } while (0) + +static inline int ffi_struct_type(ffi_type *t) +{ + size_t sz = t->size; + + /* Small structure results are passed in registers, + larger ones are passed by pointer. Note that + small structures of size 2, 4 and 8 differ from + the corresponding integer types in that they have + different alignment requirements. */ + + if (sz <= 1) + return FFI_TYPE_UINT8; + else if (sz == 2) + return FFI_TYPE_SMALL_STRUCT2; + else if (sz == 3) + return FFI_TYPE_SMALL_STRUCT3; + else if (sz == 4) + return FFI_TYPE_SMALL_STRUCT4; + else if (sz == 5) + return FFI_TYPE_SMALL_STRUCT5; + else if (sz == 6) + return FFI_TYPE_SMALL_STRUCT6; + else if (sz == 7) + return FFI_TYPE_SMALL_STRUCT7; + else if (sz <= 8) + return FFI_TYPE_SMALL_STRUCT8; + else + return FFI_TYPE_STRUCT; /* else, we pass it by pointer. */ +} + +/* PA has a downward growing stack, which looks like this: + + Offset + [ Variable args ] + SP = (4*(n+9)) arg word N + ... + SP-52 arg word 4 + [ Fixed args ] + SP-48 arg word 3 + SP-44 arg word 2 + SP-40 arg word 1 + SP-36 arg word 0 + [ Frame marker ] + ... + SP-20 RP + SP-4 previous SP + + The first four argument words on the stack are reserved for use by + the callee. Instead, the general and floating registers replace + the first four argument slots. Non FP arguments are passed solely + in the general registers. FP arguments are passed in both general + and floating registers when using libffi. + + Non-FP 32-bit args are passed in gr26, gr25, gr24 and gr23. + Non-FP 64-bit args are passed in register pairs, starting + on an odd numbered register (i.e. r25+r26 and r23+r24). + FP 32-bit arguments are passed in fr4L, fr5L, fr6L and fr7L. + FP 64-bit arguments are passed in fr5 and fr7. + + The registers are allocated in the same manner as stack slots. + This allows the callee to save its arguments on the stack if + necessary: + + arg word 3 -> gr23 or fr7L + arg word 2 -> gr24 or fr6L or fr7R + arg word 1 -> gr25 or fr5L + arg word 0 -> gr26 or fr4L or fr5R + + Note that fr4R and fr6R are never used for arguments (i.e., + doubles are not passed in fr4 or fr6). + + The rest of the arguments are passed on the stack starting at SP-52, + but 64-bit arguments need to be aligned to an 8-byte boundary + + This means we can have holes either in the register allocation, + or in the stack. */ + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments + + The following code will put everything into the stack frame + (which was allocated by the asm routine), and on return + the asm routine will load the arguments that should be + passed by register into the appropriate registers + + NOTE: We load floating point args in this function... that means we + assume gcc will not mess with fp regs in here. */ + +void ffi_prep_args_pa32(UINT32 *stack, extended_cif *ecif, unsigned bytes) +{ + register unsigned int i; + register ffi_type **p_arg; + register void **p_argv; + unsigned int slot = FIRST_ARG_SLOT; + char *dest_cpy; + size_t len; + + debug(1, "%s: stack = %p, ecif = %p, bytes = %u\n", __FUNCTION__, stack, + ecif, bytes); + + p_arg = ecif->cif->arg_types; + p_argv = ecif->avalue; + + for (i = 0; i < ecif->cif->nargs; i++) + { + int type = (*p_arg)->type; + + switch (type) + { + case FFI_TYPE_SINT8: + *(SINT32 *)(stack - slot) = *(SINT8 *)(*p_argv); + break; + + case FFI_TYPE_UINT8: + *(UINT32 *)(stack - slot) = *(UINT8 *)(*p_argv); + break; + + case FFI_TYPE_SINT16: + *(SINT32 *)(stack - slot) = *(SINT16 *)(*p_argv); + break; + + case FFI_TYPE_UINT16: + *(UINT32 *)(stack - slot) = *(UINT16 *)(*p_argv); + break; + + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + debug(3, "Storing UINT32 %u in slot %u\n", *(UINT32 *)(*p_argv), + slot); + *(UINT32 *)(stack - slot) = *(UINT32 *)(*p_argv); + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + /* Align slot for 64-bit type. */ + slot += (slot & 1) ? 1 : 2; + *(UINT64 *)(stack - slot) = *(UINT64 *)(*p_argv); + break; + + case FFI_TYPE_FLOAT: + /* First 4 args go in fr4L - fr7L. */ + debug(3, "Storing UINT32(float) in slot %u\n", slot); + *(UINT32 *)(stack - slot) = *(UINT32 *)(*p_argv); + switch (slot - FIRST_ARG_SLOT) + { + /* First 4 args go in fr4L - fr7L. */ + case 0: fldw(stack - slot, fr4); break; + case 1: fldw(stack - slot, fr5); break; + case 2: fldw(stack - slot, fr6); break; + case 3: fldw(stack - slot, fr7); break; + } + break; + + case FFI_TYPE_DOUBLE: + /* Align slot for 64-bit type. */ + slot += (slot & 1) ? 1 : 2; + debug(3, "Storing UINT64(double) at slot %u\n", slot); + *(UINT64 *)(stack - slot) = *(UINT64 *)(*p_argv); + switch (slot - FIRST_ARG_SLOT) + { + /* First 2 args go in fr5, fr7. */ + case 1: fldd(stack - slot, fr5); break; + case 3: fldd(stack - slot, fr7); break; + } + break; + +#ifdef PA_HPUX + case FFI_TYPE_LONGDOUBLE: + /* Long doubles are passed in the same manner as structures + larger than 8 bytes. */ + *(UINT32 *)(stack - slot) = (UINT32)(*p_argv); + break; +#endif + + case FFI_TYPE_STRUCT: + + /* Structs smaller or equal than 4 bytes are passed in one + register. Structs smaller or equal 8 bytes are passed in two + registers. Larger structures are passed by pointer. */ + + len = (*p_arg)->size; + if (len <= 4) + { + dest_cpy = (char *)(stack - slot) + 4 - len; + memcpy(dest_cpy, (char *)*p_argv, len); + } + else if (len <= 8) + { + slot += (slot & 1) ? 1 : 2; + dest_cpy = (char *)(stack - slot) + 8 - len; + memcpy(dest_cpy, (char *)*p_argv, len); + } + else + *(UINT32 *)(stack - slot) = (UINT32)(*p_argv); + break; + + default: + FFI_ASSERT(0); + } + + slot++; + p_arg++; + p_argv++; + } + + /* Make sure we didn't mess up and scribble on the stack. */ + { + unsigned int n; + + debug(5, "Stack setup:\n"); + for (n = 0; n < (bytes + 3) / 4; n++) + { + if ((n%4) == 0) { debug(5, "\n%08x: ", (unsigned int)(stack - n)); } + debug(5, "%08x ", *(stack - n)); + } + debug(5, "\n"); + } + + FFI_ASSERT(slot * 4 <= bytes); + + return; +} + +static void ffi_size_stack_pa32(ffi_cif *cif) +{ + ffi_type **ptr; + int i; + int z = 0; /* # stack slots */ + + for (ptr = cif->arg_types, i = 0; i < cif->nargs; ptr++, i++) + { + int type = (*ptr)->type; + + switch (type) + { + case FFI_TYPE_DOUBLE: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + z += 2 + (z & 1); /* must start on even regs, so we may waste one */ + break; + +#ifdef PA_HPUX + case FFI_TYPE_LONGDOUBLE: +#endif + case FFI_TYPE_STRUCT: + z += 1; /* pass by ptr, callee will copy */ + break; + + default: /* <= 32-bit values */ + z++; + } + } + + /* We can fit up to 6 args in the default 64-byte stack frame, + if we need more, we need more stack. */ + if (z <= 6) + cif->bytes = MIN_STACK_SIZE; /* min stack size */ + else + cif->bytes = 64 + ROUND_UP((z - 6) * sizeof(UINT32), MIN_STACK_SIZE); + + debug(3, "Calculated stack size is %u bytes\n", cif->bytes); +} + +/* Perform machine dependent cif processing. */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags = (unsigned) cif->rtype->type; + break; + +#ifdef PA_HPUX + case FFI_TYPE_LONGDOUBLE: + /* Long doubles are treated like a structure. */ + cif->flags = FFI_TYPE_STRUCT; + break; +#endif + + case FFI_TYPE_STRUCT: + /* For the return type we have to check the size of the structures. + If the size is smaller or equal 4 bytes, the result is given back + in one register. If the size is smaller or equal 8 bytes than we + return the result in two registers. But if the size is bigger than + 8 bytes, we work with pointers. */ + cif->flags = ffi_struct_type(cif->rtype); + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + cif->flags = FFI_TYPE_UINT64; + break; + + default: + cif->flags = FFI_TYPE_INT; + break; + } + + /* Lucky us, because of the unique PA ABI we get to do our + own stack sizing. */ + switch (cif->abi) + { + case FFI_PA32: + ffi_size_stack_pa32(cif); + break; + + default: + FFI_ASSERT(0); + break; + } + + return FFI_OK; +} + +extern void ffi_call_pa32(void (*)(UINT32 *, extended_cif *, unsigned), + extended_cif *, unsigned, unsigned, unsigned *, + void (*fn)(void)); + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return + value address then we need to make one. */ + + if (rvalue == NULL +#ifdef PA_HPUX + && (cif->rtype->type == FFI_TYPE_STRUCT + || cif->rtype->type == FFI_TYPE_LONGDOUBLE)) +#else + && cif->rtype->type == FFI_TYPE_STRUCT) +#endif + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { + case FFI_PA32: + debug(3, "Calling ffi_call_pa32: ecif=%p, bytes=%u, flags=%u, rvalue=%p, fn=%p\n", &ecif, cif->bytes, cif->flags, ecif.rvalue, (void *)fn); + ffi_call_pa32(ffi_prep_args_pa32, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + break; + + default: + FFI_ASSERT(0); + break; + } +} + +#if FFI_CLOSURES +/* This is more-or-less an inverse of ffi_call -- we have arguments on + the stack, and we need to fill them into a cif structure and invoke + the user function. This really ought to be in asm to make sure + the compiler doesn't do things we don't expect. */ +ffi_status ffi_closure_inner_pa32(ffi_closure *closure, UINT32 *stack) +{ + ffi_cif *cif; + void **avalue; + void *rvalue; + UINT32 ret[2]; /* function can return up to 64-bits in registers */ + ffi_type **p_arg; + char *tmp; + int i, avn; + unsigned int slot = FIRST_ARG_SLOT; + register UINT32 r28 asm("r28"); + + cif = closure->cif; + + /* If returning via structure, callee will write to our pointer. */ + if (cif->flags == FFI_TYPE_STRUCT) + rvalue = (void *)r28; + else + rvalue = &ret[0]; + + avalue = (void **)alloca(cif->nargs * FFI_SIZEOF_ARG); + avn = cif->nargs; + p_arg = cif->arg_types; + + for (i = 0; i < avn; i++) + { + int type = (*p_arg)->type; + + switch (type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + avalue[i] = (char *)(stack - slot) + sizeof(UINT32) - (*p_arg)->size; + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + slot += (slot & 1) ? 1 : 2; + avalue[i] = (void *)(stack - slot); + break; + + case FFI_TYPE_FLOAT: +#ifdef PA_LINUX + /* The closure call is indirect. In Linux, floating point + arguments in indirect calls with a prototype are passed + in the floating point registers instead of the general + registers. So, we need to replace what was previously + stored in the current slot with the value in the + corresponding floating point register. */ + switch (slot - FIRST_ARG_SLOT) + { + case 0: fstw(fr4, (void *)(stack - slot)); break; + case 1: fstw(fr5, (void *)(stack - slot)); break; + case 2: fstw(fr6, (void *)(stack - slot)); break; + case 3: fstw(fr7, (void *)(stack - slot)); break; + } +#endif + avalue[i] = (void *)(stack - slot); + break; + + case FFI_TYPE_DOUBLE: + slot += (slot & 1) ? 1 : 2; +#ifdef PA_LINUX + /* See previous comment for FFI_TYPE_FLOAT. */ + switch (slot - FIRST_ARG_SLOT) + { + case 1: fstd(fr5, (void *)(stack - slot)); break; + case 3: fstd(fr7, (void *)(stack - slot)); break; + } +#endif + avalue[i] = (void *)(stack - slot); + break; + +#ifdef PA_HPUX + case FFI_TYPE_LONGDOUBLE: + /* Long doubles are treated like a big structure. */ + avalue[i] = (void *) *(stack - slot); + break; +#endif + + case FFI_TYPE_STRUCT: + /* Structs smaller or equal than 4 bytes are passed in one + register. Structs smaller or equal 8 bytes are passed in two + registers. Larger structures are passed by pointer. */ + if((*p_arg)->size <= 4) + { + avalue[i] = (void *)(stack - slot) + sizeof(UINT32) - + (*p_arg)->size; + } + else if ((*p_arg)->size <= 8) + { + slot += (slot & 1) ? 1 : 2; + avalue[i] = (void *)(stack - slot) + sizeof(UINT64) - + (*p_arg)->size; + } + else + avalue[i] = (void *) *(stack - slot); + break; + + default: + FFI_ASSERT(0); + } + + slot++; + p_arg++; + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + debug(3, "after calling function, ret[0] = %08x, ret[1] = %08x\n", ret[0], + ret[1]); + + /* Store the result using the lower 2 bytes of the flags. */ + switch (cif->flags) + { + case FFI_TYPE_UINT8: + *(stack - FIRST_ARG_SLOT) = (UINT8)(ret[0] >> 24); + break; + case FFI_TYPE_SINT8: + *(stack - FIRST_ARG_SLOT) = (SINT8)(ret[0] >> 24); + break; + case FFI_TYPE_UINT16: + *(stack - FIRST_ARG_SLOT) = (UINT16)(ret[0] >> 16); + break; + case FFI_TYPE_SINT16: + *(stack - FIRST_ARG_SLOT) = (SINT16)(ret[0] >> 16); + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + *(stack - FIRST_ARG_SLOT) = ret[0]; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + *(stack - FIRST_ARG_SLOT) = ret[0]; + *(stack - FIRST_ARG_SLOT - 1) = ret[1]; + break; + + case FFI_TYPE_DOUBLE: + fldd(rvalue, fr4); + break; + + case FFI_TYPE_FLOAT: + fldw(rvalue, fr4); + break; + + case FFI_TYPE_STRUCT: + /* Don't need a return value, done by caller. */ + break; + + case FFI_TYPE_SMALL_STRUCT2: + case FFI_TYPE_SMALL_STRUCT3: + case FFI_TYPE_SMALL_STRUCT4: + tmp = (void*)(stack - FIRST_ARG_SLOT); + tmp += 4 - cif->rtype->size; + memcpy((void*)tmp, &ret[0], cif->rtype->size); + break; + + case FFI_TYPE_SMALL_STRUCT5: + case FFI_TYPE_SMALL_STRUCT6: + case FFI_TYPE_SMALL_STRUCT7: + case FFI_TYPE_SMALL_STRUCT8: + { + unsigned int ret2[2]; + int off; + + /* Right justify ret[0] and ret[1] */ + switch (cif->flags) + { + case FFI_TYPE_SMALL_STRUCT5: off = 3; break; + case FFI_TYPE_SMALL_STRUCT6: off = 2; break; + case FFI_TYPE_SMALL_STRUCT7: off = 1; break; + default: off = 0; break; + } + + memset (ret2, 0, sizeof (ret2)); + memcpy ((char *)ret2 + off, ret, 8 - off); + + *(stack - FIRST_ARG_SLOT) = ret2[0]; + *(stack - FIRST_ARG_SLOT - 1) = ret2[1]; + } + break; + + case FFI_TYPE_POINTER: + case FFI_TYPE_VOID: + break; + + default: + debug(0, "assert with cif->flags: %d\n",cif->flags); + FFI_ASSERT(0); + break; + } + return FFI_OK; +} + +/* Fill in a closure to refer to the specified fun and user_data. + cif specifies the argument and result types for fun. + The cif must already be prep'ed. */ + +extern void ffi_closure_pa32(void); + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + UINT32 *tramp = (UINT32 *)(closure->tramp); +#ifdef PA_HPUX + UINT32 *tmp; +#endif + + FFI_ASSERT (cif->abi == FFI_PA32); + + /* Make a small trampoline that will branch to our + handler function. Use PC-relative addressing. */ + +#ifdef PA_LINUX + tramp[0] = 0xeaa00000; /* b,l .+8,%r21 ; %r21 <- pc+8 */ + tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21 ; mask priv bits */ + tramp[2] = 0x4aa10028; /* ldw 20(%r21),%r1 ; load plabel */ + tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21 ; get closure addr */ + tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22 ; address of handler */ + tramp[5] = 0xeac0c000; /* bv%r0(%r22) ; branch to handler */ + tramp[6] = 0x0c281093; /* ldw 4(%r1),%r19 ; GP of handler */ + tramp[7] = ((UINT32)(ffi_closure_pa32) & ~2); + + /* Flush d/icache -- have to flush up 2 two lines because of + alignment. */ + __asm__ volatile( + "fdc 0(%0)\n\t" + "fdc %1(%0)\n\t" + "fic 0(%%sr4, %0)\n\t" + "fic %1(%%sr4, %0)\n\t" + "sync\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n" + : + : "r"((unsigned long)tramp & ~31), + "r"(32 /* stride */) + : "memory"); +#endif + +#ifdef PA_HPUX + tramp[0] = 0xeaa00000; /* b,l .+8,%r21 ; %r21 <- pc+8 */ + tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21 ; mask priv bits */ + tramp[2] = 0x4aa10038; /* ldw 28(%r21),%r1 ; load plabel */ + tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21 ; get closure addr */ + tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22 ; address of handler */ + tramp[5] = 0x02c010b4; /* ldsid (%r22),%r20 ; load space id */ + tramp[6] = 0x00141820; /* mtsp %r20,%sr0 ; into %sr0 */ + tramp[7] = 0xe2c00000; /* be 0(%sr0,%r22) ; branch to handler */ + tramp[8] = 0x0c281093; /* ldw 4(%r1),%r19 ; GP of handler */ + tramp[9] = ((UINT32)(ffi_closure_pa32) & ~2); + + /* Flush d/icache -- have to flush three lines because of alignment. */ + __asm__ volatile( + "copy %1,%0\n\t" + "fdc,m %2(%0)\n\t" + "fdc,m %2(%0)\n\t" + "fdc,m %2(%0)\n\t" + "ldsid (%1),%0\n\t" + "mtsp %0,%%sr0\n\t" + "copy %1,%0\n\t" + "fic,m %2(%%sr0,%0)\n\t" + "fic,m %2(%%sr0,%0)\n\t" + "fic,m %2(%%sr0,%0)\n\t" + "sync\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n" + : "=&r" ((unsigned long)tmp) + : "r" ((unsigned long)tramp & ~31), + "r" (32/* stride */) + : "memory"); +#endif + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} +#endif diff --git a/libffi/src/pa/ffitarget.h b/libffi/src/pa/ffitarget.h new file mode 100644 index 000000000..001f8917d --- /dev/null +++ b/libffi/src/pa/ffitarget.h @@ -0,0 +1,77 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for hppa. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- System specific configurations ----------------------------------- */ + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + +#ifdef PA_LINUX + FFI_PA32, + FFI_DEFAULT_ABI = FFI_PA32, +#endif + +#ifdef PA_HPUX + FFI_PA32, + FFI_DEFAULT_ABI = FFI_PA32, +#endif + +#ifdef PA64_HPUX +#error "PA64_HPUX FFI is not yet implemented" + FFI_PA64, + FFI_DEFAULT_ABI = FFI_PA64, +#endif + + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 + +#ifdef PA_LINUX +#define FFI_TRAMPOLINE_SIZE 32 +#else +#define FFI_TRAMPOLINE_SIZE 40 +#endif + +#define FFI_TYPE_SMALL_STRUCT2 -1 +#define FFI_TYPE_SMALL_STRUCT3 -2 +#define FFI_TYPE_SMALL_STRUCT4 -3 +#define FFI_TYPE_SMALL_STRUCT5 -4 +#define FFI_TYPE_SMALL_STRUCT6 -5 +#define FFI_TYPE_SMALL_STRUCT7 -6 +#define FFI_TYPE_SMALL_STRUCT8 -7 +#endif diff --git a/libffi/src/pa/hpux32.S b/libffi/src/pa/hpux32.S new file mode 100644 index 000000000..40528bad7 --- /dev/null +++ b/libffi/src/pa/hpux32.S @@ -0,0 +1,368 @@ +/* ----------------------------------------------------------------------- + hpux32.S - Copyright (c) 2006 Free Software Foundation, Inc. + (c) 2008 Red Hat, Inc. + based on src/pa/linux.S + + HP-UX PA Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + .LEVEL 1.1 + .SPACE $PRIVATE$ + .IMPORT $global$,DATA + .IMPORT $$dyncall,MILLICODE + .SUBSPA $DATA$ + .align 4 + + /* void ffi_call_pa32(void (*)(char *, extended_cif *), + extended_cif *ecif, + unsigned bytes, + unsigned flags, + unsigned *rvalue, + void (*fn)(void)); + */ + + .export ffi_call_pa32,ENTRY,PRIV_LEV=3 + .import ffi_prep_args_pa32,CODE + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .align 4 + +L$FB1 +ffi_call_pa32 + .proc + .callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=4 + .entry + stw %rp, -20(%sp) + copy %r3, %r1 +L$CFI11 + copy %sp, %r3 +L$CFI12 + + /* Setup the stack for calling prep_args... + We want the stack to look like this: + + [ Previous stack ] <- %r3 + + [ 64-bytes register save area ] <- %r4 + + [ Stack space for actual call, passed as ] <- %arg0 + [ arg0 to ffi_prep_args_pa32 ] + + [ Stack for calling prep_args ] <- %sp + */ + + stwm %r1, 64(%sp) + stw %r4, 12(%r3) +L$CFI13 + copy %sp, %r4 + + addl %arg2, %r4, %arg0 ; arg stack + stw %arg3, -48(%r3) ; save flags we need it later + + /* Call prep_args: + %arg0(stack) -- set up above + %arg1(ecif) -- same as incoming param + %arg2(bytes) -- same as incoming param */ + bl ffi_prep_args_pa32,%r2 + ldo 64(%arg0), %sp + ldo -64(%sp), %sp + + /* now %sp should point where %arg0 was pointing. */ + + /* Load the arguments that should be passed in registers + The fp args are loaded by the prep_args function. */ + ldw -36(%sp), %arg0 + ldw -40(%sp), %arg1 + ldw -44(%sp), %arg2 + ldw -48(%sp), %arg3 + + /* in case the function is going to return a structure + we need to give it a place to put the result. */ + ldw -52(%r3), %ret0 ; %ret0 <- rvalue + ldw -56(%r3), %r22 ; %r22 <- function to call + bl $$dyncall, %r31 ; Call the user function + copy %r31, %rp + + /* Prepare to store the result; we need to recover flags and rvalue. */ + ldw -48(%r3), %r21 ; r21 <- flags + ldw -52(%r3), %r20 ; r20 <- rvalue + + /* Store the result according to the return type. The most + likely types should come first. */ + +L$checkint + comib,<>,n FFI_TYPE_INT, %r21, L$checkint8 + b L$done + stw %ret0, 0(%r20) + +L$checkint8 + comib,<>,n FFI_TYPE_UINT8, %r21, L$checkint16 + b L$done + stb %ret0, 0(%r20) + +L$checkint16 + comib,<>,n FFI_TYPE_UINT16, %r21, L$checkdbl + b L$done + sth %ret0, 0(%r20) + +L$checkdbl + comib,<>,n FFI_TYPE_DOUBLE, %r21, L$checkfloat + b L$done + fstd %fr4,0(%r20) + +L$checkfloat + comib,<>,n FFI_TYPE_FLOAT, %r21, L$checkll + b L$done + fstw %fr4L,0(%r20) + +L$checkll + comib,<>,n FFI_TYPE_UINT64, %r21, L$checksmst2 + stw %ret0, 0(%r20) + b L$done + stw %ret1, 4(%r20) + +L$checksmst2 + comib,<>,n FFI_TYPE_SMALL_STRUCT2, %r21, L$checksmst3 + /* 2-byte structs are returned in ret0 as ????xxyy. */ + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b L$done + stb %ret0, 0(%r20) + +L$checksmst3 + comib,<>,n FFI_TYPE_SMALL_STRUCT3, %r21, L$checksmst4 + /* 3-byte structs are returned in ret0 as ??xxyyzz. */ + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b L$done + stb %ret0, 0(%r20) + +L$checksmst4 + comib,<>,n FFI_TYPE_SMALL_STRUCT4, %r21, L$checksmst5 + /* 4-byte structs are returned in ret0 as wwxxyyzz. */ + extru %ret0, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b L$done + stb %ret0, 0(%r20) + +L$checksmst5 + comib,<>,n FFI_TYPE_SMALL_STRUCT5, %r21, L$checksmst6 + /* 5 byte values are returned right justified: + ret0 ret1 + 5: ??????aa bbccddee */ + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b L$done + stb %ret1, 0(%r20) + +L$checksmst6 + comib,<>,n FFI_TYPE_SMALL_STRUCT6, %r21, L$checksmst7 + /* 6 byte values are returned right justified: + ret0 ret1 + 6: ????aabb ccddeeff */ + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b L$done + stb %ret1, 0(%r20) + +L$checksmst7 + comib,<>,n FFI_TYPE_SMALL_STRUCT7, %r21, L$checksmst8 + /* 7 byte values are returned right justified: + ret0 ret1 + 7: ??aabbcc ddeeffgg */ + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b L$done + stb %ret1, 0(%r20) + +L$checksmst8 + comib,<>,n FFI_TYPE_SMALL_STRUCT8, %r21, L$done + /* 8 byte values are returned right justified: + ret0 ret1 + 8: aabbccdd eeffgghh */ + extru %ret0, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stb %ret1, 0(%r20) + +L$done + /* all done, return */ + copy %r4, %sp ; pop arg stack + ldw 12(%r3), %r4 + ldwm -64(%sp), %r3 ; .. and pop stack + ldw -20(%sp), %rp + bv %r0(%rp) + nop + .exit + .procend +L$FE1 + + /* void ffi_closure_pa32(void); + Called with closure argument in %r21 */ + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .export ffi_closure_pa32,ENTRY,PRIV_LEV=3,RTNVAL=GR + .import ffi_closure_inner_pa32,CODE + .align 4 +L$FB2 +ffi_closure_pa32 + .proc + .callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=3 + .entry + + stw %rp, -20(%sp) + copy %r3, %r1 +L$CFI21 + copy %sp, %r3 +L$CFI22 + stwm %r1, 64(%sp) + + /* Put arguments onto the stack and call ffi_closure_inner. */ + stw %arg0, -36(%r3) + stw %arg1, -40(%r3) + stw %arg2, -44(%r3) + stw %arg3, -48(%r3) + + copy %r21, %arg0 + bl ffi_closure_inner_pa32, %r2 + copy %r3, %arg1 + ldwm -64(%sp), %r3 + ldw -20(%sp), %rp + ldw -36(%sp), %ret0 + bv %r0(%rp) + ldw -40(%sp), %ret1 + .exit + .procend +L$FE2: + + .SPACE $PRIVATE$ + .SUBSPA $DATA$ + + .align 4 + .EXPORT _GLOBAL__F_ffi_call_pa32,DATA +_GLOBAL__F_ffi_call_pa32 +L$frame1: + .word L$ECIE1-L$SCIE1 ;# Length of Common Information Entry +L$SCIE1: + .word 0x0 ;# CIE Identifier Tag + .byte 0x1 ;# CIE Version + .ascii "\0" ;# CIE Augmentation + .uleb128 0x1 ;# CIE Code Alignment Factor + .sleb128 4 ;# CIE Data Alignment Factor + .byte 0x2 ;# CIE RA Column + .byte 0xc ;# DW_CFA_def_cfa + .uleb128 0x1e + .uleb128 0x0 + .align 4 +L$ECIE1: +L$SFDE1: + .word L$EFDE1-L$ASFDE1 ;# FDE Length +L$ASFDE1: + .word L$ASFDE1-L$frame1 ;# FDE CIE offset + .word L$FB1 ;# FDE initial location + .word L$FE1-L$FB1 ;# FDE address range + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI11-L$FB1 + .byte 0x83 ;# DW_CFA_offset, column 0x3 + .uleb128 0x0 + .byte 0x11 ;# DW_CFA_offset_extended_sf; save r2 at [r30-20] + .uleb128 0x2 + .sleb128 -5 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI12-L$CFI11 + .byte 0xd ;# DW_CFA_def_cfa_register = r3 + .uleb128 0x3 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI13-L$CFI12 + .byte 0x84 ;# DW_CFA_offset, column 0x4 + .uleb128 0x3 + + .align 4 +L$EFDE1: + +L$SFDE2: + .word L$EFDE2-L$ASFDE2 ;# FDE Length +L$ASFDE2: + .word L$ASFDE2-L$frame1 ;# FDE CIE offset + .word L$FB2 ;# FDE initial location + .word L$FE2-L$FB2 ;# FDE address range + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI21-L$FB2 + .byte 0x83 ;# DW_CFA_offset, column 0x3 + .uleb128 0x0 + .byte 0x11 ;# DW_CFA_offset_extended_sf + .uleb128 0x2 + .sleb128 -5 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word L$CFI22-L$CFI21 + .byte 0xd ;# DW_CFA_def_cfa_register = r3 + .uleb128 0x3 + + .align 4 +L$EFDE2: diff --git a/libffi/src/pa/linux.S b/libffi/src/pa/linux.S new file mode 100644 index 000000000..f11ae7680 --- /dev/null +++ b/libffi/src/pa/linux.S @@ -0,0 +1,357 @@ +/* ----------------------------------------------------------------------- + linux.S - (c) 2003-2004 Randolph Chung <tausq@debian.org> + (c) 2008 Red Hat, Inc. + + HPPA Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL RENESAS TECHNOLOGY BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + .text + .level 1.1 + .align 4 + + /* void ffi_call_pa32(void (*)(char *, extended_cif *), + extended_cif *ecif, + unsigned bytes, + unsigned flags, + unsigned *rvalue, + void (*fn)(void)); + */ + + .export ffi_call_pa32,code + .import ffi_prep_args_pa32,code + + .type ffi_call_pa32, @function +.LFB1: +ffi_call_pa32: + .proc + .callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=4 + .entry + stw %rp, -20(%sp) + copy %r3, %r1 +.LCFI11: + + copy %sp, %r3 +.LCFI12: + + /* Setup the stack for calling prep_args... + We want the stack to look like this: + + [ Previous stack ] <- %r3 + + [ 64-bytes register save area ] <- %r4 + + [ Stack space for actual call, passed as ] <- %arg0 + [ arg0 to ffi_prep_args_pa32 ] + + [ Stack for calling prep_args ] <- %sp + */ + + stwm %r1, 64(%sp) + stw %r4, 12(%r3) +.LCFI13: + copy %sp, %r4 + + addl %arg2, %r4, %arg0 /* arg stack */ + stw %arg3, -48(%r3) /* save flags; we need it later */ + + /* Call prep_args: + %arg0(stack) -- set up above + %arg1(ecif) -- same as incoming param + %arg2(bytes) -- same as incoming param */ + bl ffi_prep_args_pa32,%r2 + ldo 64(%arg0), %sp + ldo -64(%sp), %sp + + /* now %sp should point where %arg0 was pointing. */ + + /* Load the arguments that should be passed in registers + The fp args were loaded by the prep_args function. */ + ldw -36(%sp), %arg0 + ldw -40(%sp), %arg1 + ldw -44(%sp), %arg2 + ldw -48(%sp), %arg3 + + /* in case the function is going to return a structure + we need to give it a place to put the result. */ + ldw -52(%r3), %ret0 /* %ret0 <- rvalue */ + ldw -56(%r3), %r22 /* %r22 <- function to call */ + bl $$dyncall, %r31 /* Call the user function */ + copy %r31, %rp + + /* Prepare to store the result; we need to recover flags and rvalue. */ + ldw -48(%r3), %r21 /* r21 <- flags */ + ldw -52(%r3), %r20 /* r20 <- rvalue */ + + /* Store the result according to the return type. */ + +.Lcheckint: + comib,<>,n FFI_TYPE_INT, %r21, .Lcheckint8 + b .Ldone + stw %ret0, 0(%r20) + +.Lcheckint8: + comib,<>,n FFI_TYPE_UINT8, %r21, .Lcheckint16 + b .Ldone + stb %ret0, 0(%r20) + +.Lcheckint16: + comib,<>,n FFI_TYPE_UINT16, %r21, .Lcheckdbl + b .Ldone + sth %ret0, 0(%r20) + +.Lcheckdbl: + comib,<>,n FFI_TYPE_DOUBLE, %r21, .Lcheckfloat + b .Ldone + fstd %fr4,0(%r20) + +.Lcheckfloat: + comib,<>,n FFI_TYPE_FLOAT, %r21, .Lcheckll + b .Ldone + fstw %fr4L,0(%r20) + +.Lcheckll: + comib,<>,n FFI_TYPE_UINT64, %r21, .Lchecksmst2 + stw %ret0, 0(%r20) + b .Ldone + stw %ret1, 4(%r20) + +.Lchecksmst2: + comib,<>,n FFI_TYPE_SMALL_STRUCT2, %r21, .Lchecksmst3 + /* 2-byte structs are returned in ret0 as ????xxyy. */ + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b .Ldone + stb %ret0, 0(%r20) + +.Lchecksmst3: + comib,<>,n FFI_TYPE_SMALL_STRUCT3, %r21, .Lchecksmst4 + /* 3-byte structs are returned in ret0 as ??xxyyzz. */ + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b .Ldone + stb %ret0, 0(%r20) + +.Lchecksmst4: + comib,<>,n FFI_TYPE_SMALL_STRUCT4, %r21, .Lchecksmst5 + /* 4-byte structs are returned in ret0 as wwxxyyzz. */ + extru %ret0, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b .Ldone + stb %ret0, 0(%r20) + +.Lchecksmst5: + comib,<>,n FFI_TYPE_SMALL_STRUCT5, %r21, .Lchecksmst6 + /* 5 byte values are returned right justified: + ret0 ret1 + 5: ??????aa bbccddee */ + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b .Ldone + stb %ret1, 0(%r20) + +.Lchecksmst6: + comib,<>,n FFI_TYPE_SMALL_STRUCT6, %r21, .Lchecksmst7 + /* 6 byte values are returned right justified: + ret0 ret1 + 6: ????aabb ccddeeff */ + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b .Ldone + stb %ret1, 0(%r20) + +.Lchecksmst7: + comib,<>,n FFI_TYPE_SMALL_STRUCT7, %r21, .Lchecksmst8 + /* 7 byte values are returned right justified: + ret0 ret1 + 7: ??aabbcc ddeeffgg */ + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + b .Ldone + stb %ret1, 0(%r20) + +.Lchecksmst8: + comib,<>,n FFI_TYPE_SMALL_STRUCT8, %r21, .Ldone + /* 8 byte values are returned right justified: + ret0 ret1 + 8: aabbccdd eeffgghh */ + extru %ret0, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret0, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stbs,ma %ret0, 1(%r20) + extru %ret1, 7, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 15, 8, %r22 + stbs,ma %r22, 1(%r20) + extru %ret1, 23, 8, %r22 + stbs,ma %r22, 1(%r20) + stb %ret1, 0(%r20) + +.Ldone: + /* all done, return */ + copy %r4, %sp /* pop arg stack */ + ldw 12(%r3), %r4 + ldwm -64(%sp), %r3 /* .. and pop stack */ + ldw -20(%sp), %rp + bv %r0(%rp) + nop + .exit + .procend +.LFE1: + + /* void ffi_closure_pa32(void); + Called with closure argument in %r21 */ + .export ffi_closure_pa32,code + .import ffi_closure_inner_pa32,code + + .type ffi_closure_pa32, @function +.LFB2: +ffi_closure_pa32: + .proc + .callinfo FRAME=64,CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=3 + .entry + + stw %rp, -20(%sp) +.LCFI20: + copy %r3, %r1 +.LCFI21: + copy %sp, %r3 +.LCFI22: + stwm %r1, 64(%sp) + + /* Put arguments onto the stack and call ffi_closure_inner. */ + stw %arg0, -36(%r3) + stw %arg1, -40(%r3) + stw %arg2, -44(%r3) + stw %arg3, -48(%r3) + + copy %r21, %arg0 + bl ffi_closure_inner_pa32, %r2 + copy %r3, %arg1 + + ldwm -64(%sp), %r3 + ldw -20(%sp), %rp + ldw -36(%sp), %ret0 + bv %r0(%r2) + ldw -40(%sp), %ret1 + + .exit + .procend +.LFE2: + + .section ".eh_frame",EH_FRAME_FLAGS,@progbits +.Lframe1: + .word .LECIE1-.LSCIE1 ;# Length of Common Information Entry +.LSCIE1: + .word 0x0 ;# CIE Identifier Tag + .byte 0x1 ;# CIE Version + .ascii "\0" ;# CIE Augmentation + .uleb128 0x1 ;# CIE Code Alignment Factor + .sleb128 4 ;# CIE Data Alignment Factor + .byte 0x2 ;# CIE RA Column + .byte 0xc ;# DW_CFA_def_cfa + .uleb128 0x1e + .uleb128 0x0 + .align 4 +.LECIE1: +.LSFDE1: + .word .LEFDE1-.LASFDE1 ;# FDE Length +.LASFDE1: + .word .LASFDE1-.Lframe1 ;# FDE CIE offset + .word .LFB1 ;# FDE initial location + .word .LFE1-.LFB1 ;# FDE address range + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word .LCFI11-.LFB1 + .byte 0x83 ;# DW_CFA_offset, column 0x3 + .uleb128 0x0 + .byte 0x11 ;# DW_CFA_offset_extended_sf; save r2 at [r30-20] + .uleb128 0x2 + .sleb128 -5 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word .LCFI12-.LCFI11 + .byte 0xd ;# DW_CFA_def_cfa_register = r3 + .uleb128 0x3 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word .LCFI13-.LCFI12 + .byte 0x84 ;# DW_CFA_offset, column 0x4 + .uleb128 0x3 + + .align 4 +.LEFDE1: + +.LSFDE2: + .word .LEFDE2-.LASFDE2 ;# FDE Length +.LASFDE2: + .word .LASFDE2-.Lframe1 ;# FDE CIE offset + .word .LFB2 ;# FDE initial location + .word .LFE2-.LFB2 ;# FDE address range + .byte 0x4 ;# DW_CFA_advance_loc4 + .word .LCFI21-.LFB2 + .byte 0x83 ;# DW_CFA_offset, column 0x3 + .uleb128 0x0 + .byte 0x11 ;# DW_CFA_offset_extended_sf + .uleb128 0x2 + .sleb128 -5 + + .byte 0x4 ;# DW_CFA_advance_loc4 + .word .LCFI22-.LCFI21 + .byte 0xd ;# DW_CFA_def_cfa_register = r3 + .uleb128 0x3 + + .align 4 +.LEFDE2: diff --git a/libffi/src/powerpc/aix.S b/libffi/src/powerpc/aix.S new file mode 100644 index 000000000..213f2db39 --- /dev/null +++ b/libffi/src/powerpc/aix.S @@ -0,0 +1,328 @@ +/* ----------------------------------------------------------------------- + aix.S - Copyright (c) 2002, 2009 Free Software Foundation, Inc. + based on darwin.S by John Hornkvist + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + + .set r0,0 + .set r1,1 + .set r2,2 + .set r3,3 + .set r4,4 + .set r5,5 + .set r6,6 + .set r7,7 + .set r8,8 + .set r9,9 + .set r10,10 + .set r11,11 + .set r12,12 + .set r13,13 + .set r14,14 + .set r15,15 + .set r16,16 + .set r17,17 + .set r18,18 + .set r19,19 + .set r20,20 + .set r21,21 + .set r22,22 + .set r23,23 + .set r24,24 + .set r25,25 + .set r26,26 + .set r27,27 + .set r28,28 + .set r29,29 + .set r30,30 + .set r31,31 + .set f0,0 + .set f1,1 + .set f2,2 + .set f3,3 + .set f4,4 + .set f5,5 + .set f6,6 + .set f7,7 + .set f8,8 + .set f9,9 + .set f10,10 + .set f11,11 + .set f12,12 + .set f13,13 + .set f14,14 + .set f15,15 + .set f16,16 + .set f17,17 + .set f18,18 + .set f19,19 + .set f20,20 + .set f21,21 + + .extern .ffi_prep_args + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#define JUMPTARGET(name) name +#define L(x) x + .file "aix.S" + .toc + + /* void ffi_call_AIX(extended_cif *ecif, unsigned long bytes, + * unsigned int flags, unsigned int *rvalue, + * void (*fn)(), + * void (*prep_args)(extended_cif*, unsigned *const)); + * r3=ecif, r4=bytes, r5=flags, r6=rvalue, r7=fn, r8=prep_args + */ + +.csect .text[PR] + .align 2 + .globl ffi_call_AIX + .globl .ffi_call_AIX +.csect ffi_call_AIX[DS] +ffi_call_AIX: +#ifdef __64BIT__ + .llong .ffi_call_AIX, TOC[tc0], 0 + .csect .text[PR] +.ffi_call_AIX: + /* Save registers we use. */ + mflr r0 + + std r28,-32(r1) + std r29,-24(r1) + std r30,-16(r1) + std r31, -8(r1) + + std r0, 16(r1) + mr r28, r1 /* our AP. */ + stdux r1, r1, r4 + + /* Save arguments over call... */ + mr r31, r5 /* flags, */ + mr r30, r6 /* rvalue, */ + mr r29, r7 /* function address. */ + std r2, 40(r1) + + /* Call ffi_prep_args. */ + mr r4, r1 + bl .ffi_prep_args + nop + + /* Now do the call. */ + ld r0, 0(r29) + ld r2, 8(r29) + ld r11, 16(r29) + /* Set up cr1 with bits 4-7 of the flags. */ + mtcrf 0x40, r31 + mtctr r0 + /* Load all those argument registers. */ + // We have set up a nice stack frame, just load it into registers. + ld r3, 40+(1*8)(r1) + ld r4, 40+(2*8)(r1) + ld r5, 40+(3*8)(r1) + ld r6, 40+(4*8)(r1) + nop + ld r7, 40+(5*8)(r1) + ld r8, 40+(6*8)(r1) + ld r9, 40+(7*8)(r1) + ld r10,40+(8*8)(r1) + +L1: + /* Load all the FP registers. */ + bf 6,L2 // 2f + 0x18 + lfd f1,-32-(13*8)(r28) + lfd f2,-32-(12*8)(r28) + lfd f3,-32-(11*8)(r28) + lfd f4,-32-(10*8)(r28) + nop + lfd f5,-32-(9*8)(r28) + lfd f6,-32-(8*8)(r28) + lfd f7,-32-(7*8)(r28) + lfd f8,-32-(6*8)(r28) + nop + lfd f9,-32-(5*8)(r28) + lfd f10,-32-(4*8)(r28) + lfd f11,-32-(3*8)(r28) + lfd f12,-32-(2*8)(r28) + nop + lfd f13,-32-(1*8)(r28) + +L2: + /* Make the call. */ + bctrl + ld r2, 40(r1) + + /* Now, deal with the return value. */ + mtcrf 0x01, r31 + + bt 30, L(done_return_value) + bt 29, L(fp_return_value) + std r3, 0(r30) + + /* Fall through... */ + +L(done_return_value): + /* Restore the registers we used and return. */ + mr r1, r28 + ld r0, 16(r28) + ld r28, -32(r1) + mtlr r0 + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) + blr + +L(fp_return_value): + bf 28, L(float_return_value) + stfd f1, 0(r30) + bf 31, L(done_return_value) + stfd f2, 8(r30) + b L(done_return_value) +L(float_return_value): + stfs f1, 0(r30) + b L(done_return_value) + +#else /* ! __64BIT__ */ + + .long .ffi_call_AIX, TOC[tc0], 0 + .csect .text[PR] +.ffi_call_AIX: + /* Save registers we use. */ + mflr r0 + + stw r28,-16(r1) + stw r29,-12(r1) + stw r30, -8(r1) + stw r31, -4(r1) + + stw r0, 8(r1) + mr r28, r1 /* out AP. */ + stwux r1, r1, r4 + + /* Save arguments over call... */ + mr r31, r5 /* flags, */ + mr r30, r6 /* rvalue, */ + mr r29, r7 /* function address, */ + stw r2, 20(r1) + + /* Call ffi_prep_args. */ + mr r4, r1 + bl .ffi_prep_args + nop + + /* Now do the call. */ + lwz r0, 0(r29) + lwz r2, 4(r29) + lwz r11, 8(r29) + /* Set up cr1 with bits 4-7 of the flags. */ + mtcrf 0x40, r31 + mtctr r0 + /* Load all those argument registers. */ + // We have set up a nice stack frame, just load it into registers. + lwz r3, 20+(1*4)(r1) + lwz r4, 20+(2*4)(r1) + lwz r5, 20+(3*4)(r1) + lwz r6, 20+(4*4)(r1) + nop + lwz r7, 20+(5*4)(r1) + lwz r8, 20+(6*4)(r1) + lwz r9, 20+(7*4)(r1) + lwz r10,20+(8*4)(r1) + +L1: + /* Load all the FP registers. */ + bf 6,L2 // 2f + 0x18 + lfd f1,-16-(13*8)(r28) + lfd f2,-16-(12*8)(r28) + lfd f3,-16-(11*8)(r28) + lfd f4,-16-(10*8)(r28) + nop + lfd f5,-16-(9*8)(r28) + lfd f6,-16-(8*8)(r28) + lfd f7,-16-(7*8)(r28) + lfd f8,-16-(6*8)(r28) + nop + lfd f9,-16-(5*8)(r28) + lfd f10,-16-(4*8)(r28) + lfd f11,-16-(3*8)(r28) + lfd f12,-16-(2*8)(r28) + nop + lfd f13,-16-(1*8)(r28) + +L2: + /* Make the call. */ + bctrl + lwz r2, 20(r1) + + /* Now, deal with the return value. */ + mtcrf 0x01, r31 + + bt 30, L(done_return_value) + bt 29, L(fp_return_value) + stw r3, 0(r30) + bf 28, L(done_return_value) + stw r4, 4(r30) + + /* Fall through... */ + +L(done_return_value): + /* Restore the registers we used and return. */ + mr r1, r28 + lwz r0, 8(r28) + lwz r28,-16(r1) + mtlr r0 + lwz r29,-12(r1) + lwz r30, -8(r1) + lwz r31, -4(r1) + blr + +L(fp_return_value): + bf 28, L(float_return_value) + stfd f1, 0(r30) + b L(done_return_value) +L(float_return_value): + stfs f1, 0(r30) + b L(done_return_value) +#endif + .long 0 + .byte 0,0,0,1,128,4,0,0 +//END(ffi_call_AIX) + +.csect .text[PR] + .align 2 + .globl ffi_call_DARWIN + .globl .ffi_call_DARWIN +.csect ffi_call_DARWIN[DS] +ffi_call_DARWIN: +#ifdef __64BIT__ + .llong .ffi_call_DARWIN, TOC[tc0], 0 +#else + .long .ffi_call_DARWIN, TOC[tc0], 0 +#endif + .csect .text[PR] +.ffi_call_DARWIN: + blr + .long 0 + .byte 0,0,0,0,0,0,0,0 +//END(ffi_call_DARWIN) diff --git a/libffi/src/powerpc/aix_closure.S b/libffi/src/powerpc/aix_closure.S new file mode 100644 index 000000000..aabd3c3c1 --- /dev/null +++ b/libffi/src/powerpc/aix_closure.S @@ -0,0 +1,447 @@ +/* ----------------------------------------------------------------------- + aix_closure.S - Copyright (c) 2002, 2003, 2009 Free Software Foundation, Inc. + based on darwin_closure.S + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + + .set r0,0 + .set r1,1 + .set r2,2 + .set r3,3 + .set r4,4 + .set r5,5 + .set r6,6 + .set r7,7 + .set r8,8 + .set r9,9 + .set r10,10 + .set r11,11 + .set r12,12 + .set r13,13 + .set r14,14 + .set r15,15 + .set r16,16 + .set r17,17 + .set r18,18 + .set r19,19 + .set r20,20 + .set r21,21 + .set r22,22 + .set r23,23 + .set r24,24 + .set r25,25 + .set r26,26 + .set r27,27 + .set r28,28 + .set r29,29 + .set r30,30 + .set r31,31 + .set f0,0 + .set f1,1 + .set f2,2 + .set f3,3 + .set f4,4 + .set f5,5 + .set f6,6 + .set f7,7 + .set f8,8 + .set f9,9 + .set f10,10 + .set f11,11 + .set f12,12 + .set f13,13 + .set f14,14 + .set f15,15 + .set f16,16 + .set f17,17 + .set f18,18 + .set f19,19 + .set f20,20 + .set f21,21 + + .extern .ffi_closure_helper_DARWIN + +#define LIBFFI_ASM +#define JUMPTARGET(name) name +#define L(x) x + .file "aix_closure.S" + .toc +LC..60: + .tc L..60[TC],L..60 + .csect .text[PR] + .align 2 + +.csect .text[PR] + .align 2 + .globl ffi_closure_ASM + .globl .ffi_closure_ASM +.csect ffi_closure_ASM[DS] +ffi_closure_ASM: +#ifdef __64BIT__ + .llong .ffi_closure_ASM, TOC[tc0], 0 + .csect .text[PR] +.ffi_closure_ASM: +/* we want to build up an area for the parameters passed */ +/* in registers (both floating point and integer) */ + + /* we store gpr 3 to gpr 10 (aligned to 4) + in the parents outgoing area */ + std r3, 48+(0*8)(r1) + std r4, 48+(1*8)(r1) + std r5, 48+(2*8)(r1) + std r6, 48+(3*8)(r1) + mflr r0 + + std r7, 48+(4*8)(r1) + std r8, 48+(5*8)(r1) + std r9, 48+(6*8)(r1) + std r10, 48+(7*8)(r1) + std r0, 16(r1) /* save the return address */ + + + /* 48 Bytes (Linkage Area) */ + /* 64 Bytes (params) */ + /* 16 Bytes (result) */ + /* 104 Bytes (13*8 from FPR) */ + /* 8 Bytes (alignment) */ + /* 240 Bytes */ + + stdu r1, -240(r1) /* skip over caller save area + keep stack aligned to 16 */ + + /* next save fpr 1 to fpr 13 (aligned to 8) */ + stfd f1, 128+(0*8)(r1) + stfd f2, 128+(1*8)(r1) + stfd f3, 128+(2*8)(r1) + stfd f4, 128+(3*8)(r1) + stfd f5, 128+(4*8)(r1) + stfd f6, 128+(5*8)(r1) + stfd f7, 128+(6*8)(r1) + stfd f8, 128+(7*8)(r1) + stfd f9, 128+(8*8)(r1) + stfd f10, 128+(9*8)(r1) + stfd f11, 128+(10*8)(r1) + stfd f12, 128+(11*8)(r1) + stfd f13, 128+(12*8)(r1) + + /* set up registers for the routine that actually does the work */ + /* get the context pointer from the trampoline */ + mr r3, r11 + + /* now load up the pointer to the result storage */ + addi r4, r1, 112 + + /* now load up the pointer to the saved gpr registers */ + addi r5, r1, 288 + + /* now load up the pointer to the saved fpr registers */ + addi r6, r1, 128 + + /* make the call */ + bl .ffi_closure_helper_DARWIN + nop + + /* now r3 contains the return type */ + /* so use it to look up in a table */ + /* so we know how to deal with each type */ + + /* look up the proper starting point in table */ + /* by using return type as offset */ + lhz r3, 10(r3) /* load type from return type */ + ld r4, LC..60(2) /* get address of jump table */ + sldi r3, r3, 4 /* now multiply return type by 16 */ + ld r0, 240+16(r1) /* load return address */ + add r3, r3, r4 /* add contents of table to table address */ + mtctr r3 + bctr /* jump to it */ + +/* Each fragment must be exactly 16 bytes long (4 instructions). + Align to 16 byte boundary for cache and dispatch efficiency. */ + .align 4 + +L..60: +/* case FFI_TYPE_VOID */ + mtlr r0 + addi r1, r1, 240 + blr + nop + +/* case FFI_TYPE_INT */ + lwa r3, 112+4(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_FLOAT */ + lfs f1, 112+0(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_DOUBLE */ + lfd f1, 112+0(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_LONGDOUBLE */ + lfd f1, 112+0(r1) + mtlr r0 + lfd f2, 112+8(r1) + b L..finish + +/* case FFI_TYPE_UINT8 */ + lbz r3, 112+7(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_SINT8 */ + lbz r3, 112+7(r1) + mtlr r0 + extsb r3, r3 + b L..finish + +/* case FFI_TYPE_UINT16 */ + lhz r3, 112+6(r1) + mtlr r0 +L..finish: + addi r1, r1, 240 + blr + +/* case FFI_TYPE_SINT16 */ + lha r3, 112+6(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_UINT32 */ + lwz r3, 112+4(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_SINT32 */ + lwa r3, 112+4(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_UINT64 */ + ld r3, 112+0(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_SINT64 */ + ld r3, 112+0(r1) + mtlr r0 + addi r1, r1, 240 + blr + +/* case FFI_TYPE_STRUCT */ + mtlr r0 + addi r1, r1, 240 + blr + nop + +/* case FFI_TYPE_POINTER */ + ld r3, 112+0(r1) + mtlr r0 + addi r1, r1, 240 + blr + +#else /* ! __64BIT__ */ + + .long .ffi_closure_ASM, TOC[tc0], 0 + .csect .text[PR] +.ffi_closure_ASM: +/* we want to build up an area for the parameters passed */ +/* in registers (both floating point and integer) */ + + /* we store gpr 3 to gpr 10 (aligned to 4) + in the parents outgoing area */ + stw r3, 24+(0*4)(r1) + stw r4, 24+(1*4)(r1) + stw r5, 24+(2*4)(r1) + stw r6, 24+(3*4)(r1) + mflr r0 + + stw r7, 24+(4*4)(r1) + stw r8, 24+(5*4)(r1) + stw r9, 24+(6*4)(r1) + stw r10, 24+(7*4)(r1) + stw r0, 8(r1) + + /* 24 Bytes (Linkage Area) */ + /* 32 Bytes (params) */ + /* 16 Bytes (result) */ + /* 104 Bytes (13*8 from FPR) */ + /* 176 Bytes */ + + stwu r1, -176(r1) /* skip over caller save area + keep stack aligned to 16 */ + + /* next save fpr 1 to fpr 13 (aligned to 8) */ + stfd f1, 72+(0*8)(r1) + stfd f2, 72+(1*8)(r1) + stfd f3, 72+(2*8)(r1) + stfd f4, 72+(3*8)(r1) + stfd f5, 72+(4*8)(r1) + stfd f6, 72+(5*8)(r1) + stfd f7, 72+(6*8)(r1) + stfd f8, 72+(7*8)(r1) + stfd f9, 72+(8*8)(r1) + stfd f10, 72+(9*8)(r1) + stfd f11, 72+(10*8)(r1) + stfd f12, 72+(11*8)(r1) + stfd f13, 72+(12*8)(r1) + + /* set up registers for the routine that actually does the work */ + /* get the context pointer from the trampoline */ + mr r3, r11 + + /* now load up the pointer to the result storage */ + addi r4, r1, 56 + + /* now load up the pointer to the saved gpr registers */ + addi r5, r1, 200 + + /* now load up the pointer to the saved fpr registers */ + addi r6, r1, 72 + + /* make the call */ + bl .ffi_closure_helper_DARWIN + nop + + /* now r3 contains the return type */ + /* so use it to look up in a table */ + /* so we know how to deal with each type */ + + /* look up the proper starting point in table */ + /* by using return type as offset */ + lhz r3, 6(r3) /* load type from return type */ + lwz r4, LC..60(2) /* get address of jump table */ + slwi r3, r3, 4 /* now multiply return type by 16 */ + lwz r0, 176+8(r1) /* load return address */ + add r3, r3, r4 /* add contents of table to table address */ + mtctr r3 + bctr /* jump to it */ + +/* Each fragment must be exactly 16 bytes long (4 instructions). + Align to 16 byte boundary for cache and dispatch efficiency. */ + .align 4 + +L..60: +/* case FFI_TYPE_VOID */ + mtlr r0 + addi r1, r1, 176 + blr + nop + +/* case FFI_TYPE_INT */ + lwz r3, 56+0(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_FLOAT */ + lfs f1, 56+0(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_DOUBLE */ + lfd f1, 56+0(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_LONGDOUBLE */ + lfd f1, 56+0(r1) + mtlr r0 + lfd f2, 56+8(r1) + b L..finish + +/* case FFI_TYPE_UINT8 */ + lbz r3, 56+3(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_SINT8 */ + lbz r3, 56+3(r1) + mtlr r0 + extsb r3, r3 + b L..finish + +/* case FFI_TYPE_UINT16 */ + lhz r3, 56+2(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_SINT16 */ + lha r3, 56+2(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_UINT32 */ + lwz r3, 56+0(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_SINT32 */ + lwz r3, 56+0(r1) + mtlr r0 + addi r1, r1, 176 + blr + +/* case FFI_TYPE_UINT64 */ + lwz r3, 56+0(r1) + mtlr r0 + lwz r4, 56+4(r1) + b L..finish + +/* case FFI_TYPE_SINT64 */ + lwz r3, 56+0(r1) + mtlr r0 + lwz r4, 56+4(r1) + b L..finish + +/* case FFI_TYPE_STRUCT */ + mtlr r0 + addi r1, r1, 176 + blr + nop + +/* case FFI_TYPE_POINTER */ + lwz r3, 56+0(r1) + mtlr r0 +L..finish: + addi r1, r1, 176 + blr +#endif +/* END(ffi_closure_ASM) */ diff --git a/libffi/src/powerpc/asm.h b/libffi/src/powerpc/asm.h new file mode 100644 index 000000000..e86e6b091 --- /dev/null +++ b/libffi/src/powerpc/asm.h @@ -0,0 +1,125 @@ +/* ----------------------------------------------------------------------- + asm.h - Copyright (c) 1998 Geoffrey Keating + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define ASM_GLOBAL_DIRECTIVE .globl + + +#define C_SYMBOL_NAME(name) name +/* Macro for a label. */ +#ifdef __STDC__ +#define C_LABEL(name) name##: +#else +#define C_LABEL(name) name/**/: +#endif + +/* This seems to always be the case on PPC. */ +#define ALIGNARG(log2) log2 +/* For ELF we need the `.type' directive to make shared libs work right. */ +#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg; +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name + +/* If compiled for profiling, call `_mcount' at the start of each function. */ +#ifdef PROF +/* The mcount code relies on a the return address being on the stack + to locate our caller and so it can restore it; so store one just + for its benefit. */ +#ifdef PIC +#define CALL_MCOUNT \ + .pushsection; \ + .section ".data"; \ + .align ALIGNARG(2); \ +0:.long 0; \ + .previous; \ + mflr %r0; \ + stw %r0,4(%r1); \ + bl _GLOBAL_OFFSET_TABLE_@local-4; \ + mflr %r11; \ + lwz %r0,0b@got(%r11); \ + bl JUMPTARGET(_mcount); +#else /* PIC */ +#define CALL_MCOUNT \ + .section ".data"; \ + .align ALIGNARG(2); \ +0:.long 0; \ + .previous; \ + mflr %r0; \ + lis %r11,0b@ha; \ + stw %r0,4(%r1); \ + addi %r0,%r11,0b@l; \ + bl JUMPTARGET(_mcount); +#endif /* PIC */ +#else /* PROF */ +#define CALL_MCOUNT /* Do nothing. */ +#endif /* PROF */ + +#define ENTRY(name) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + CALL_MCOUNT + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^align boundary. */ +#ifdef PROF +#define EALIGN(name, alignt, words) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + CALL_MCOUNT \ + b 0f; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + 0: +#else /* PROF */ +#define EALIGN(name, alignt, words) \ + ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name); \ + ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(name) +#endif + +#define END(name) \ + ASM_SIZE_DIRECTIVE(name) + +#ifdef PIC +#define JUMPTARGET(name) name##@plt +#else +#define JUMPTARGET(name) name +#endif + +/* Local labels stripped out by the linker. */ +#define L(x) .L##x diff --git a/libffi/src/powerpc/darwin.S b/libffi/src/powerpc/darwin.S new file mode 100644 index 000000000..3b425da78 --- /dev/null +++ b/libffi/src/powerpc/darwin.S @@ -0,0 +1,383 @@ +/* ----------------------------------------------------------------------- + darwin.S - Copyright (c) 2000 John Hornkvist + Copyright (c) 2004, 2010 Free Software Foundation, Inc. + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#if defined(__ppc64__) +#define MODE_CHOICE(x, y) y +#else +#define MODE_CHOICE(x, y) x +#endif + +#define machine_choice MODE_CHOICE(ppc7400,ppc64) + +; Define some pseudo-opcodes for size-independent load & store of GPRs ... +#define lgu MODE_CHOICE(lwzu, ldu) +#define lg MODE_CHOICE(lwz,ld) +#define sg MODE_CHOICE(stw,std) +#define sgu MODE_CHOICE(stwu,stdu) +#define sgux MODE_CHOICE(stwux,stdux) + +; ... and the size of GPRs and their storage indicator. +#define GPR_BYTES MODE_CHOICE(4,8) +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ + +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. +#define LINKAGE_SIZE MODE_CHOICE(24,48) +#define PARAM_AREA MODE_CHOICE(32,64) +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ + +/* If there is any FP stuff we make space for all of the regs. */ +#define SAVED_FPR_COUNT 13 +#define FPR_SIZE 8 +#define RESULT_BYTES 16 + +/* This should be kept in step with the same value in ffi_darwin.c. */ +#define ASM_NEEDS_REGISTERS 4 +#define SAVE_REGS_SIZE (ASM_NEEDS_REGISTERS * GPR_BYTES) + +#include <fficonfig.h> +#include <ffi.h> + +#define JUMPTARGET(name) name +#define L(x) x + + .text + .align 2 + .globl _ffi_prep_args + + .align 2 + .globl _ffi_call_DARWIN + + /* We arrive here with: + r3 = ptr to extended cif. + r4 = -bytes. + r5 = cif flags. + r6 = ptr to return value. + r7 = fn pointer (user func). + r8 = fn pointer (ffi_prep_args). + r9 = ffi_type* for the ret val. */ + +_ffi_call_DARWIN: +Lstartcode: + mr r12,r8 /* We only need r12 until the call, + so it does not have to be saved. */ +LFB1: + /* Save the old stack pointer as AP. */ + mr r8,r1 +LCFI0: + + /* Save the retval type in parents frame. */ + sg r9,(LINKAGE_SIZE+6*GPR_BYTES)(r8) + + /* Allocate the stack space we need. */ + sgux r1,r1,r4 + + /* Save registers we use. */ + mflr r9 + sg r9,SAVED_LR_OFFSET(r8) + + sg r28,-(4 * GPR_BYTES)(r8) + sg r29,-(3 * GPR_BYTES)(r8) + sg r30,-(2 * GPR_BYTES)(r8) + sg r31,-( GPR_BYTES)(r8) + +#if !defined(POWERPC_DARWIN) + /* The TOC slot is reserved in the Darwin ABI and r2 is volatile. */ + sg r2,(5 * GPR_BYTES)(r1) +#endif + +LCFI1: + + /* Save arguments over call. */ + mr r31,r5 /* flags, */ + mr r30,r6 /* rvalue, */ + mr r29,r7 /* function address, */ + mr r28,r8 /* our AP. */ +LCFI2: + /* Call ffi_prep_args. r3 = extended cif, r4 = stack ptr copy. */ + mr r4,r1 + li r9,0 + + mtctr r12 /* r12 holds address of _ffi_prep_args. */ + bctrl + +#if !defined(POWERPC_DARWIN) + /* The TOC slot is reserved in the Darwin ABI and r2 is volatile. */ + lg r2,(5 * GPR_BYTES)(r1) +#endif + /* Now do the call. + Set up cr1 with bits 4-7 of the flags. */ + mtcrf 0x40,r31 + /* Get the address to call into CTR. */ + mtctr r29 + /* Load all those argument registers. + We have set up a nice stack frame, just load it into registers. */ + lg r3, (LINKAGE_SIZE )(r1) + lg r4, (LINKAGE_SIZE + GPR_BYTES)(r1) + lg r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r1) + lg r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r1) + nop + lg r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r1) + lg r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r1) + lg r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r1) + lg r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r1) + +L1: + /* ... Load all the FP registers. */ + bf 6,L2 /* No floats to load. */ + lfd f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28) + lfd f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28) + lfd f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28) + lfd f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28) + nop + lfd f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28) + lfd f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28) + lfd f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28) + lfd f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28) + nop + lfd f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28) + lfd f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28) + lfd f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28) + lfd f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28) + nop + lfd f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28) + +L2: + mr r12,r29 /* Put the target address in r12 as specified. */ + mtctr r12 + nop + nop + + /* Make the call. */ + bctrl + + /* Now, deal with the return value. */ + + /* m64 structure returns can occupy the same set of registers as + would be used to pass such a structure as arg0 - so take care + not to step on any possibly hot regs. */ + + /* Get the flags.. */ + mtcrf 0x03,r31 ; we need c6 & cr7 now. + ; FLAG_RETURNS_NOTHING also covers struct ret-by-ref. + bt 30,L(done_return_value) ; FLAG_RETURNS_NOTHING + bf 27,L(scalar_return_value) ; not FLAG_RETURNS_STRUCT + + /* OK, so we have a struct. */ +#if defined(__ppc64__) + bt 31,L(maybe_return_128) ; FLAG_RETURNS_128BITS, special case + + /* OK, we have to map the return back to a mem struct. + We are about to trample the parents param area, so recover the + return type. r29 is free, since the call is done. */ + lg r29,(LINKAGE_SIZE + 6 * GPR_BYTES)(r28) + + sg r3, (LINKAGE_SIZE )(r28) + sg r4, (LINKAGE_SIZE + GPR_BYTES)(r28) + sg r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r28) + sg r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r28) + nop + sg r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r28) + sg r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r28) + sg r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r28) + sg r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28) + /* OK, so do the block move - we trust that memcpy will not trample + the fprs... */ + mr r3,r30 ; dest + addi r4,r28,LINKAGE_SIZE ; source + /* The size is a size_t, should be long. */ + lg r5,0(r29) + /* Figure out small structs */ + cmpi 0,r5,4 + bgt L3 ; 1, 2 and 4 bytes have special rules. + cmpi 0,r5,3 + beq L3 ; not 3 + addi r4,r4,8 + subf r4,r5,r4 +L3: + bl _memcpy + + /* ... do we need the FP registers? - recover the flags.. */ + mtcrf 0x03,r31 ; we need c6 & cr7 now. + bf 29,L(done_return_value) /* No floats in the struct. */ + stfd f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28) + stfd f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28) + stfd f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28) + stfd f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28) + nop + stfd f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28) + stfd f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28) + stfd f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28) + stfd f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28) + nop + stfd f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28) + stfd f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28) + stfd f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28) + stfd f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28) + nop + stfd f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28) + + mr r3,r29 ; ffi_type * + mr r4,r30 ; dest + addi r5,r28,-SAVE_REGS_SIZE-(13*FPR_SIZE) ; fprs + xor r6,r6,r6 + sg r6,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28) + addi r6,r28,(LINKAGE_SIZE + 7 * GPR_BYTES) ; point to a zeroed counter. + bl _darwin64_struct_floats_to_mem + + b L(done_return_value) +#else + stw r3,0(r30) ; m32 the only struct return in reg is 4 bytes. +#endif + b L(done_return_value) + +L(fp_return_value): + /* Do we have long double to store? */ + bf 31,L(fd_return_value) ; FLAG_RETURNS_128BITS + stfd f1,0(r30) + stfd f2,FPR_SIZE(r30) + b L(done_return_value) + +L(fd_return_value): + /* Do we have double to store? */ + bf 28,L(float_return_value) + stfd f1,0(r30) + b L(done_return_value) + +L(float_return_value): + /* We only have a float to store. */ + stfs f1,0(r30) + b L(done_return_value) + +L(scalar_return_value): + bt 29,L(fp_return_value) ; FLAG_RETURNS_FP + ; ffi_arg is defined as unsigned long. + sg r3,0(r30) ; Save the reg. + bf 28,L(done_return_value) ; not FLAG_RETURNS_64BITS + +#if defined(__ppc64__) +L(maybe_return_128): + std r3,0(r30) + bf 31,L(done_return_value) ; not FLAG_RETURNS_128BITS + std r4,8(r30) +#else + stw r4,4(r30) +#endif + + /* Fall through. */ + /* We want this at the end to simplify eh epilog computation. */ + +L(done_return_value): + /* Restore the registers we used and return. */ + lg r29,SAVED_LR_OFFSET(r28) + ; epilog + lg r31,-(1 * GPR_BYTES)(r28) + mtlr r29 + lg r30,-(2 * GPR_BYTES)(r28) + lg r29,-(3 * GPR_BYTES)(r28) + lg r28,-(4 * GPR_BYTES)(r28) + lg r1,0(r1) + blr +LFE1: + .align 1 +/* END(_ffi_call_DARWIN) */ + +/* Provide a null definition of _ffi_call_AIX. */ + .text + .globl _ffi_call_AIX + .align 2 +_ffi_call_AIX: + blr +/* END(_ffi_call_AIX) */ + +/* EH stuff. */ + +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) + + .static_data + .align LOG2_GPR_BYTES +LLFB0$non_lazy_ptr: + .g_long Lstartcode + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 ; Length of Common Information Entry +LSCIE1: + .long 0x0 ; CIE Identifier Tag + .byte 0x1 ; CIE Version + .ascii "zR\0" ; CIE Augmentation + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor + .byte EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor + .byte 0x41 ; CIE RA Column + .byte 0x1 ; uleb128 0x1; Augmentation size + .byte 0x90 ; FDE Encoding (indirect pcrel) + .byte 0xc ; DW_CFA_def_cfa + .byte 0x1 ; uleb128 0x1 + .byte 0x0 ; uleb128 0x0 + .align LOG2_GPR_BYTES +LECIE1: + + .globl _ffi_call_DARWIN.eh +_ffi_call_DARWIN.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 ; FDE Length +LASFDE1: + .long LASFDE1-EH_frame1 ; FDE CIE offset + .g_long LLFB0$non_lazy_ptr-. ; FDE initial location + .set L$set$3,LFE1-Lstartcode + .g_long L$set$3 ; FDE address range + .byte 0x0 ; uleb128 0x0; Augmentation size + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$4,LCFI0-Lstartcode + .long L$set$4 + .byte 0xd ; DW_CFA_def_cfa_register + .byte 0x08 ; uleb128 0x08 + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$5,LCFI1-LCFI0 + .long L$set$5 + .byte 0x11 ; DW_CFA_offset_extended_sf + .byte 0x41 ; uleb128 0x41 + .byte 0x7e ; sleb128 -2 + .byte 0x9f ; DW_CFA_offset, column 0x1f + .byte 0x1 ; uleb128 0x1 + .byte 0x9e ; DW_CFA_offset, column 0x1e + .byte 0x2 ; uleb128 0x2 + .byte 0x9d ; DW_CFA_offset, column 0x1d + .byte 0x3 ; uleb128 0x3 + .byte 0x9c ; DW_CFA_offset, column 0x1c + .byte 0x4 ; uleb128 0x4 + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$6,LCFI2-LCFI1 + .long L$set$6 + .byte 0xd ; DW_CFA_def_cfa_register + .byte 0x1c ; uleb128 0x1c + .align LOG2_GPR_BYTES +LEFDE1: + .align 1 + diff --git a/libffi/src/powerpc/darwin_closure.S b/libffi/src/powerpc/darwin_closure.S new file mode 100644 index 000000000..b43f9658f --- /dev/null +++ b/libffi/src/powerpc/darwin_closure.S @@ -0,0 +1,576 @@ +/* ----------------------------------------------------------------------- + darwin_closure.S - Copyright (c) 2002, 2003, 2004, 2010, + Free Software Foundation, Inc. + based on ppc_closure.S + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#define L(x) x + +#if defined(__ppc64__) +#define MODE_CHOICE(x, y) y +#else +#define MODE_CHOICE(x, y) x +#endif + +#define machine_choice MODE_CHOICE(ppc7400,ppc64) + +; Define some pseudo-opcodes for size-independent load & store of GPRs ... +#define lgu MODE_CHOICE(lwzu, ldu) +#define lg MODE_CHOICE(lwz,ld) +#define sg MODE_CHOICE(stw,std) +#define sgu MODE_CHOICE(stwu,stdu) + +; ... and the size of GPRs and their storage indicator. +#define GPR_BYTES MODE_CHOICE(4,8) +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ + +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. +#define LINKAGE_SIZE MODE_CHOICE(24,48) +#define PARAM_AREA MODE_CHOICE(32,64) + +#define SAVED_CR_OFFSET MODE_CHOICE(4,8) /* save position for CR */ +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ + +/* WARNING: if ffi_type is changed... here be monsters. + Offsets of items within the result type. */ +#define FFI_TYPE_TYPE MODE_CHOICE(6,10) +#define FFI_TYPE_ELEM MODE_CHOICE(8,16) + +#define SAVED_FPR_COUNT 13 +#define FPR_SIZE 8 +/* biggest m64 struct ret is 8GPRS + 13FPRS = 168 bytes - rounded to 16bytes = 176. */ +#define RESULT_BYTES MODE_CHOICE(16,176) + +; The whole stack frame **MUST** be 16byte-aligned. +#define SAVE_SIZE (((LINKAGE_SIZE+PARAM_AREA+SAVED_FPR_COUNT*FPR_SIZE+RESULT_BYTES)+15) & -16LL) +#define PAD_SIZE (SAVE_SIZE-(LINKAGE_SIZE+PARAM_AREA+SAVED_FPR_COUNT*FPR_SIZE+RESULT_BYTES)) + +#define PARENT_PARM_BASE (SAVE_SIZE+LINKAGE_SIZE) +#define FP_SAVE_BASE (LINKAGE_SIZE+PARAM_AREA) + +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 +; We no longer need the pic symbol stub for Darwin >= 9. +#define BLCLS_HELP _ffi_closure_helper_DARWIN +#define STRUCT_RETVALUE_P _darwin64_struct_ret_by_value_p +#define PASS_STR_FLOATS _darwin64_pass_struct_floats +#undef WANT_STUB +#else +#define BLCLS_HELP L_ffi_closure_helper_DARWIN$stub +#define STRUCT_RETVALUE_P L_darwin64_struct_ret_by_value_p$stub +#define PASS_STR_FLOATS L_darwin64_pass_struct_floats$stub +#define WANT_STUB +#endif + +/* m32/m64 + + The stack layout looks like this: + + | Additional params... | | Higher address + ~ ~ ~ + | Parameters (at least 8*4/8=32/64) | | NUM_GPR_ARG_REGISTERS + |--------------------------------------------| | + | TOC=R2 (AIX) Reserved (Darwin) 4/8 | | + |--------------------------------------------| | + | Reserved 2*4/8 | | + |--------------------------------------------| | + | Space for callee`s LR 4/8 | | + |--------------------------------------------| | + | Saved CR [low word for m64] 4/8 | | + |--------------------------------------------| | + | Current backchain pointer 4/8 |-/ Parent`s frame. + |--------------------------------------------| <+ <<< on entry to + | Result Bytes 16/176 | | + |--------------------------------------------| | + ~ padding to 16-byte alignment ~ ~ + |--------------------------------------------| | + | NUM_FPR_ARG_REGISTERS slots | | + | here fp13 .. fp1 13*8 | | + |--------------------------------------------| | + | R3..R10 8*4/8=32/64 | | NUM_GPR_ARG_REGISTERS + |--------------------------------------------| | + | TOC=R2 (AIX) Reserved (Darwin) 4/8 | | + |--------------------------------------------| | stack | + | Reserved [compiler,binder] 2*4/8 | | grows | + |--------------------------------------------| | down V + | Space for callees LR 4/8 | | + |--------------------------------------------| | lower addresses + | Saved CR [low word for m64] 4/8 | | + |--------------------------------------------| | stack pointer here + | Current backchain pointer 4/8 |-/ during + |--------------------------------------------| <<< call. + +*/ + + .file "darwin_closure.S" + + .machine machine_choice + + .text + .globl _ffi_closure_ASM + .align LOG2_GPR_BYTES +_ffi_closure_ASM: +LFB1: +Lstartcode: + mflr r0 /* extract return address */ + sg r0,SAVED_LR_OFFSET(r1) /* save the return address */ +LCFI0: + sgu r1,-SAVE_SIZE(r1) /* skip over caller save area + keep stack aligned to 16. */ +LCFI1: + /* We want to build up an area for the parameters passed + in registers. (both floating point and integer) */ + + /* Put gpr 3 to gpr 10 in the parents outgoing area... + ... the remainder of any params that overflowed the regs will + follow here. */ + sg r3, (PARENT_PARM_BASE )(r1) + sg r4, (PARENT_PARM_BASE + GPR_BYTES )(r1) + sg r5, (PARENT_PARM_BASE + GPR_BYTES * 2)(r1) + sg r6, (PARENT_PARM_BASE + GPR_BYTES * 3)(r1) + sg r7, (PARENT_PARM_BASE + GPR_BYTES * 4)(r1) + sg r8, (PARENT_PARM_BASE + GPR_BYTES * 5)(r1) + sg r9, (PARENT_PARM_BASE + GPR_BYTES * 6)(r1) + sg r10,(PARENT_PARM_BASE + GPR_BYTES * 7)(r1) + + /* We save fpr 1 to fpr 14 in our own save frame. */ + stfd f1, (FP_SAVE_BASE )(r1) + stfd f2, (FP_SAVE_BASE + FPR_SIZE )(r1) + stfd f3, (FP_SAVE_BASE + FPR_SIZE * 2 )(r1) + stfd f4, (FP_SAVE_BASE + FPR_SIZE * 3 )(r1) + stfd f5, (FP_SAVE_BASE + FPR_SIZE * 4 )(r1) + stfd f6, (FP_SAVE_BASE + FPR_SIZE * 5 )(r1) + stfd f7, (FP_SAVE_BASE + FPR_SIZE * 6 )(r1) + stfd f8, (FP_SAVE_BASE + FPR_SIZE * 7 )(r1) + stfd f9, (FP_SAVE_BASE + FPR_SIZE * 8 )(r1) + stfd f10,(FP_SAVE_BASE + FPR_SIZE * 9 )(r1) + stfd f11,(FP_SAVE_BASE + FPR_SIZE * 10)(r1) + stfd f12,(FP_SAVE_BASE + FPR_SIZE * 11)(r1) + stfd f13,(FP_SAVE_BASE + FPR_SIZE * 12)(r1) + + /* Set up registers for the routine that actually does the work + get the context pointer from the trampoline. */ + mr r3,r11 + + /* Now load up the pointer to the result storage. */ + addi r4,r1,(SAVE_SIZE-RESULT_BYTES) + + /* Now load up the pointer to the saved gpr registers. */ + addi r5,r1,PARENT_PARM_BASE + + /* Now load up the pointer to the saved fpr registers. */ + addi r6,r1,FP_SAVE_BASE + + /* Make the call. */ + bl BLCLS_HELP + + /* r3 contains the rtype pointer... save it since we will need + it later. */ + sg r3,LINKAGE_SIZE(r1) ; ffi_type * result_type + lg r0,0(r3) ; size => r0 + lhz r3,FFI_TYPE_TYPE(r3) ; type => r3 + + /* The helper will have intercepted struture returns and inserted + the caller`s destination address for structs returned by ref. */ + + /* r3 contains the return type so use it to look up in a table + so we know how to deal with each type. */ + + addi r5,r1,(SAVE_SIZE-RESULT_BYTES) /* Otherwise, our return is here. */ + bl Lget_ret_type0_addr /* Get pointer to Lret_type0 into LR. */ + mflr r4 /* Move to r4. */ + slwi r3,r3,4 /* Now multiply return type by 16. */ + add r3,r3,r4 /* Add contents of table to table address. */ + mtctr r3 + bctr /* Jump to it. */ +LFE1: +/* Each of the ret_typeX code fragments has to be exactly 16 bytes long + (4 instructions). For cache effectiveness we align to a 16 byte boundary + first. */ + + .align 4 + + nop + nop + nop +Lget_ret_type0_addr: + blrl + +/* case FFI_TYPE_VOID */ +Lret_type0: + b Lfinish + nop + nop + nop + +/* case FFI_TYPE_INT */ +Lret_type1: + lg r3,0(r5) + b Lfinish + nop + nop + +/* case FFI_TYPE_FLOAT */ +Lret_type2: + lfs f1,0(r5) + b Lfinish + nop + nop + +/* case FFI_TYPE_DOUBLE */ +Lret_type3: + lfd f1,0(r5) + b Lfinish + nop + nop + +/* case FFI_TYPE_LONGDOUBLE */ +Lret_type4: + lfd f1,0(r5) + lfd f2,8(r5) + b Lfinish + nop + +/* case FFI_TYPE_UINT8 */ +Lret_type5: +#if defined(__ppc64__) + lbz r3,7(r5) +#else + lbz r3,3(r5) +#endif + b Lfinish + nop + nop + +/* case FFI_TYPE_SINT8 */ +Lret_type6: +#if defined(__ppc64__) + lbz r3,7(r5) +#else + lbz r3,3(r5) +#endif + extsb r3,r3 + b Lfinish + nop + +/* case FFI_TYPE_UINT16 */ +Lret_type7: +#if defined(__ppc64__) + lhz r3,6(r5) +#else + lhz r3,2(r5) +#endif + b Lfinish + nop + nop + +/* case FFI_TYPE_SINT16 */ +Lret_type8: +#if defined(__ppc64__) + lha r3,6(r5) +#else + lha r3,2(r5) +#endif + b Lfinish + nop + nop + +/* case FFI_TYPE_UINT32 */ +Lret_type9: +#if defined(__ppc64__) + lwz r3,4(r5) +#else + lwz r3,0(r5) +#endif + b Lfinish + nop + nop + +/* case FFI_TYPE_SINT32 */ +Lret_type10: +#if defined(__ppc64__) + lwz r3,4(r5) +#else + lwz r3,0(r5) +#endif + b Lfinish + nop + nop + +/* case FFI_TYPE_UINT64 */ +Lret_type11: +#if defined(__ppc64__) + lg r3,0(r5) + b Lfinish + nop +#else + lwz r3,0(r5) + lwz r4,4(r5) + b Lfinish +#endif + nop + +/* case FFI_TYPE_SINT64 */ +Lret_type12: +#if defined(__ppc64__) + lg r3,0(r5) + b Lfinish + nop +#else + lwz r3,0(r5) + lwz r4,4(r5) + b Lfinish +#endif + nop + +/* case FFI_TYPE_STRUCT */ +Lret_type13: +#if defined(__ppc64__) + lg r3,0(r5) ; we need at least this... + cmpi 0,r0,4 + bgt Lstructend ; not a special small case + b Lsmallstruct ; see if we need more. +#else + cmpi 0,r0,4 + bgt Lfinish ; not by value + lg r3,0(r5) + b Lfinish +#endif +/* case FFI_TYPE_POINTER */ +Lret_type14: + lg r3,0(r5) + b Lfinish + nop + nop + +#if defined(__ppc64__) +Lsmallstruct: + beq Lfour ; continuation of Lret13. + cmpi 0,r0,3 + beq Lfinish ; don`t adjust this - can`t be any floats here... + srdi r3,r3,48 + cmpi 0,r0,2 + beq Lfinish ; .. or here .. + srdi r3,r3,8 + b Lfinish ; .. or here. + +Lfour: + lg r6,LINKAGE_SIZE(r1) ; get the result type + lg r6,FFI_TYPE_ELEM(r6) ; elements array pointer + lg r6,0(r6) ; first element + lhz r0,FFI_TYPE_TYPE(r6) ; OK go the type + cmpi 0,r0,2 ; FFI_TYPE_FLOAT + bne Lfourint + lfs f1,0(r5) ; just one float in the struct. + b Lfinish + +Lfourint: + srdi r3,r3,32 ; four bytes. + b Lfinish + +Lstructend: + lg r3,LINKAGE_SIZE(r1) ; get the result type + bl STRUCT_RETVALUE_P + cmpi 0,r3,0 + beq Lfinish ; nope. + /* Recover a pointer to the results. */ + addi r11,r1,(SAVE_SIZE-RESULT_BYTES) + lg r3,0(r11) ; we need at least this... + lg r4,8(r11) + cmpi 0,r0,16 + beq Lfinish ; special case 16 bytes we don't consider floats. + + /* OK, frustratingly, the process of saving the struct to mem might have + messed with the FPRs, so we have to re-load them :(. + We`ll use our FPRs space again - calling: + void darwin64_pass_struct_floats (ffi_type *s, char *src, + unsigned *nfpr, double **fprs) + We`ll temporarily pinch the first two slots of the param area for local + vars used by the routine. */ + xor r6,r6,r6 + addi r5,r1,PARENT_PARM_BASE ; some space + sg r6,0(r5) ; *nfpr zeroed. + addi r6,r5,8 ; **fprs + addi r3,r1,FP_SAVE_BASE ; pointer to FPRs space + sg r3,0(r6) + mr r4,r11 ; the struct is here... + lg r3,LINKAGE_SIZE(r1) ; ffi_type * result_type. + bl PASS_STR_FLOATS ; get struct floats into FPR save space. + /* See if we used any floats */ + lwz r0,(SAVE_SIZE-RESULT_BYTES)(r1) + cmpi 0,r0,0 + beq Lstructints ; nope. + /* OK load `em up... */ + lfd f1, (FP_SAVE_BASE )(r1) + lfd f2, (FP_SAVE_BASE + FPR_SIZE )(r1) + lfd f3, (FP_SAVE_BASE + FPR_SIZE * 2 )(r1) + lfd f4, (FP_SAVE_BASE + FPR_SIZE * 3 )(r1) + lfd f5, (FP_SAVE_BASE + FPR_SIZE * 4 )(r1) + lfd f6, (FP_SAVE_BASE + FPR_SIZE * 5 )(r1) + lfd f7, (FP_SAVE_BASE + FPR_SIZE * 6 )(r1) + lfd f8, (FP_SAVE_BASE + FPR_SIZE * 7 )(r1) + lfd f9, (FP_SAVE_BASE + FPR_SIZE * 8 )(r1) + lfd f10,(FP_SAVE_BASE + FPR_SIZE * 9 )(r1) + lfd f11,(FP_SAVE_BASE + FPR_SIZE * 10)(r1) + lfd f12,(FP_SAVE_BASE + FPR_SIZE * 11)(r1) + lfd f13,(FP_SAVE_BASE + FPR_SIZE * 12)(r1) + + /* point back at our saved struct. */ +Lstructints: + addi r11,r1,(SAVE_SIZE-RESULT_BYTES) + lg r3,0(r11) ; we end up picking the + lg r4,8(r11) ; first two again. + lg r5,16(r11) + lg r6,24(r11) + lg r7,32(r11) + lg r8,40(r11) + lg r9,48(r11) + lg r10,56(r11) +#endif + +/* case done */ +Lfinish: + addi r1,r1,SAVE_SIZE /* Restore stack pointer. */ + lg r0,SAVED_LR_OFFSET(r1) /* Get return address. */ + mtlr r0 /* Reset link register. */ + blr +Lendcode: + .align 1 + +/* END(ffi_closure_ASM) */ + +/* EH frame stuff. */ +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) +/* 176, 400 */ +#define EH_FRAME_OFFSETA MODE_CHOICE(176,0x90) +#define EH_FRAME_OFFSETB MODE_CHOICE(1,3) + + .static_data + .align LOG2_GPR_BYTES +LLFB1$non_lazy_ptr: + .g_long Lstartcode + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 ; Length of Common Information Entry +LSCIE1: + .long 0x0 ; CIE Identifier Tag + .byte 0x1 ; CIE Version + .ascii "zR\0" ; CIE Augmentation + .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor + .byte EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor + .byte 0x41 ; CIE RA Column + .byte 0x1 ; uleb128 0x1; Augmentation size + .byte 0x90 ; FDE Encoding (indirect pcrel) + .byte 0xc ; DW_CFA_def_cfa + .byte 0x1 ; uleb128 0x1 + .byte 0x0 ; uleb128 0x0 + .align LOG2_GPR_BYTES +LECIE1: + .globl _ffi_closure_ASM.eh +_ffi_closure_ASM.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 ; FDE Length + +LASFDE1: + .long LASFDE1-EH_frame1 ; FDE CIE offset + .g_long LLFB1$non_lazy_ptr-. ; FDE initial location + .set L$set$3,LFE1-Lstartcode + .g_long L$set$3 ; FDE address range + .byte 0x0 ; uleb128 0x0; Augmentation size + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$3,LCFI1-LCFI0 + .long L$set$3 + .byte 0xe ; DW_CFA_def_cfa_offset + .byte EH_FRAME_OFFSETA,EH_FRAME_OFFSETB ; uleb128 176,1/190,3 + .byte 0x4 ; DW_CFA_advance_loc4 + .set L$set$4,LCFI0-Lstartcode + .long L$set$4 + .byte 0x11 ; DW_CFA_offset_extended_sf + .byte 0x41 ; uleb128 0x41 + .byte 0x7e ; sleb128 -2 + .align LOG2_GPR_BYTES +LEFDE1: + .align 1 + +#ifdef WANT_STUB + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_ffi_closure_helper_DARWIN$stub: + .indirect_symbol _ffi_closure_helper_DARWIN + mflr r0 + bcl 20,31,"L00000000001$spb" +"L00000000001$spb": + mflr r11 + addis r11,r11,ha16(L_ffi_closure_helper_DARWIN$lazy_ptr-"L00000000001$spb") + mtlr r0 + lwzu r12,lo16(L_ffi_closure_helper_DARWIN$lazy_ptr-"L00000000001$spb")(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_ffi_closure_helper_DARWIN$lazy_ptr: + .indirect_symbol _ffi_closure_helper_DARWIN + .g_long dyld_stub_binding_helper + +#if defined(__ppc64__) + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_darwin64_struct_ret_by_value_p$stub: + .indirect_symbol _darwin64_struct_ret_by_value_p + mflr r0 + bcl 20,31,"L00000000002$spb" +"L00000000002$spb": + mflr r11 + addis r11,r11,ha16(L_darwin64_struct_ret_by_value_p$lazy_ptr-"L00000000002$spb") + mtlr r0 + lwzu r12,lo16(L_darwin64_struct_ret_by_value_p$lazy_ptr-"L00000000002$spb")(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_darwin64_struct_ret_by_value_p$lazy_ptr: + .indirect_symbol _darwin64_struct_ret_by_value_p + .g_long dyld_stub_binding_helper + + .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 + .align 5 +L_darwin64_pass_struct_floats$stub: + .indirect_symbol _darwin64_pass_struct_floats + mflr r0 + bcl 20,31,"L00000000003$spb" +"L00000000003$spb": + mflr r11 + addis r11,r11,ha16(L_darwin64_pass_struct_floats$lazy_ptr-"L00000000003$spb") + mtlr r0 + lwzu r12,lo16(L_darwin64_pass_struct_floats$lazy_ptr-"L00000000003$spb")(r11) + mtctr r12 + bctr + .lazy_symbol_pointer +L_darwin64_pass_struct_floats$lazy_ptr: + .indirect_symbol _darwin64_pass_struct_floats + .g_long dyld_stub_binding_helper +# endif +#endif diff --git a/libffi/src/powerpc/ffi.c b/libffi/src/powerpc/ffi.c new file mode 100644 index 000000000..75784a96d --- /dev/null +++ b/libffi/src/powerpc/ffi.c @@ -0,0 +1,1442 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1998 Geoffrey Keating + Copyright (C) 2007, 2008 Free Software Foundation, Inc + Copyright (C) 2008 Red Hat, Inc + + PowerPC Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdio.h> + + +extern void ffi_closure_SYSV (void); +extern void FFI_HIDDEN ffi_closure_LINUX64 (void); + +enum { + /* The assembly depends on these exact flags. */ + FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */ + FLAG_RETURNS_NOTHING = 1 << (31-30), /* These go in cr7 */ + FLAG_RETURNS_FP = 1 << (31-29), + FLAG_RETURNS_64BITS = 1 << (31-28), + + FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */ + FLAG_SYSV_SMST_R4 = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte + structs. */ + FLAG_SYSV_SMST_R3 = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte + structs. */ + /* Bits (31-24) through (31-19) store shift value for SMST */ + + FLAG_ARG_NEEDS_COPY = 1 << (31- 7), + FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */ + FLAG_4_GPR_ARGUMENTS = 1 << (31- 5), + FLAG_RETVAL_REFERENCE = 1 << (31- 4) +}; + +/* About the SYSV ABI. */ +unsigned int NUM_GPR_ARG_REGISTERS = 8; +#ifndef __NO_FPRS__ +unsigned int NUM_FPR_ARG_REGISTERS = 8; +#else +unsigned int NUM_FPR_ARG_REGISTERS = 0; +#endif + +enum { ASM_NEEDS_REGISTERS = 4 }; + +/* ffi_prep_args_SYSV is called by the assembly routine once stack space + has been allocated for the function's arguments. + + The stack layout we want looks like this: + + | Return address from ffi_call_SYSV 4bytes | higher addresses + |--------------------------------------------| + | Previous backchain pointer 4 | stack pointer here + |--------------------------------------------|<+ <<< on entry to + | Saved r28-r31 4*4 | | ffi_call_SYSV + |--------------------------------------------| | + | GPR registers r3-r10 8*4 | | ffi_call_SYSV + |--------------------------------------------| | + | FPR registers f1-f8 (optional) 8*8 | | + |--------------------------------------------| | stack | + | Space for copied structures | | grows | + |--------------------------------------------| | down V + | Parameters that didn't fit in registers | | + |--------------------------------------------| | lower addresses + | Space for callee's LR 4 | | + |--------------------------------------------| | stack pointer here + | Current backchain pointer 4 |-/ during + |--------------------------------------------| <<< ffi_call_SYSV + +*/ + +void +ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack) +{ + const unsigned bytes = ecif->cif->bytes; + const unsigned flags = ecif->cif->flags; + + typedef union { + char *c; + unsigned *u; + long long *ll; + float *f; + double *d; + } valp; + + /* 'stacktop' points at the previous backchain pointer. */ + valp stacktop; + + /* 'gpr_base' points at the space for gpr3, and grows upwards as + we use GPR registers. */ + valp gpr_base; + int intarg_count; + + /* 'fpr_base' points at the space for fpr1, and grows upwards as + we use FPR registers. */ + valp fpr_base; + int fparg_count; + + /* 'copy_space' grows down as we put structures in it. It should + stay 16-byte aligned. */ + valp copy_space; + + /* 'next_arg' grows up as we put parameters in it. */ + valp next_arg; + + int i, ii MAYBE_UNUSED; + ffi_type **ptr; + double double_tmp; + union { + void **v; + char **c; + signed char **sc; + unsigned char **uc; + signed short **ss; + unsigned short **us; + unsigned int **ui; + long long **ll; + float **f; + double **d; + } p_argv; + size_t struct_copy_size; + unsigned gprvalue; + + if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) + NUM_FPR_ARG_REGISTERS = 0; + + stacktop.c = (char *) stack + bytes; + gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS; + intarg_count = 0; + fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS; + fparg_count = 0; + copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c); + next_arg.u = stack + 2; + + /* Check that everything starts aligned properly. */ + FFI_ASSERT (((unsigned) (char *) stack & 0xF) == 0); + FFI_ASSERT (((unsigned) copy_space.c & 0xF) == 0); + FFI_ASSERT (((unsigned) stacktop.c & 0xF) == 0); + FFI_ASSERT ((bytes & 0xF) == 0); + FFI_ASSERT (copy_space.c >= next_arg.c); + + /* Deal with return values that are actually pass-by-reference. */ + if (flags & FLAG_RETVAL_REFERENCE) + { + *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue; + intarg_count++; + } + + /* Now for the arguments. */ + p_argv.v = ecif->avalue; + for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs; + i > 0; + i--, ptr++, p_argv.v++) + { + switch ((*ptr)->type) + { + case FFI_TYPE_FLOAT: + /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */ + if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) + goto soft_float_prep; + double_tmp = **p_argv.f; + if (fparg_count >= NUM_FPR_ARG_REGISTERS) + { + *next_arg.f = (float) double_tmp; + next_arg.u += 1; + intarg_count++; + } + else + *fpr_base.d++ = double_tmp; + fparg_count++; + FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); + break; + + case FFI_TYPE_DOUBLE: + /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */ + if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) + goto soft_double_prep; + double_tmp = **p_argv.d; + + if (fparg_count >= NUM_FPR_ARG_REGISTERS) + { + if (intarg_count >= NUM_GPR_ARG_REGISTERS + && intarg_count % 2 != 0) + { + intarg_count++; + next_arg.u++; + } + *next_arg.d = double_tmp; + next_arg.u += 2; + } + else + *fpr_base.d++ = double_tmp; + fparg_count++; + FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + if ((ecif->cif->abi != FFI_LINUX) + && (ecif->cif->abi != FFI_LINUX_SOFT_FLOAT)) + goto do_struct; + /* The soft float ABI for long doubles works like this, + a long double is passed in four consecutive gprs if available. + A maximum of 2 long doubles can be passed in gprs. + If we do not have 4 gprs left, the long double is passed on the + stack, 4-byte aligned. */ + if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) + { + unsigned int int_tmp = (*p_argv.ui)[0]; + if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3) + { + if (intarg_count < NUM_GPR_ARG_REGISTERS) + intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count; + *next_arg.u = int_tmp; + next_arg.u++; + for (ii = 1; ii < 4; ii++) + { + int_tmp = (*p_argv.ui)[ii]; + *next_arg.u = int_tmp; + next_arg.u++; + } + } + else + { + *gpr_base.u++ = int_tmp; + for (ii = 1; ii < 4; ii++) + { + int_tmp = (*p_argv.ui)[ii]; + *gpr_base.u++ = int_tmp; + } + } + intarg_count +=4; + } + else + { + double_tmp = (*p_argv.d)[0]; + + if (fparg_count >= NUM_FPR_ARG_REGISTERS - 1) + { + if (intarg_count >= NUM_GPR_ARG_REGISTERS + && intarg_count % 2 != 0) + { + intarg_count++; + next_arg.u++; + } + *next_arg.d = double_tmp; + next_arg.u += 2; + double_tmp = (*p_argv.d)[1]; + *next_arg.d = double_tmp; + next_arg.u += 2; + } + else + { + *fpr_base.d++ = double_tmp; + double_tmp = (*p_argv.d)[1]; + *fpr_base.d++ = double_tmp; + } + + fparg_count += 2; + FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); + } + break; +#endif + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + soft_double_prep: + if (intarg_count == NUM_GPR_ARG_REGISTERS-1) + intarg_count++; + if (intarg_count >= NUM_GPR_ARG_REGISTERS) + { + if (intarg_count % 2 != 0) + { + intarg_count++; + next_arg.u++; + } + *next_arg.ll = **p_argv.ll; + next_arg.u += 2; + } + else + { + /* whoops: abi states only certain register pairs + * can be used for passing long long int + * specifically (r3,r4), (r5,r6), (r7,r8), + * (r9,r10) and if next arg is long long but + * not correct starting register of pair then skip + * until the proper starting register + */ + if (intarg_count % 2 != 0) + { + intarg_count ++; + gpr_base.u++; + } + *gpr_base.ll++ = **p_argv.ll; + } + intarg_count += 2; + break; + + case FFI_TYPE_STRUCT: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + do_struct: +#endif + struct_copy_size = ((*ptr)->size + 15) & ~0xF; + copy_space.c -= struct_copy_size; + memcpy (copy_space.c, *p_argv.c, (*ptr)->size); + + gprvalue = (unsigned long) copy_space.c; + + FFI_ASSERT (copy_space.c > next_arg.c); + FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY); + goto putgpr; + + case FFI_TYPE_UINT8: + gprvalue = **p_argv.uc; + goto putgpr; + case FFI_TYPE_SINT8: + gprvalue = **p_argv.sc; + goto putgpr; + case FFI_TYPE_UINT16: + gprvalue = **p_argv.us; + goto putgpr; + case FFI_TYPE_SINT16: + gprvalue = **p_argv.ss; + goto putgpr; + + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + soft_float_prep: + + gprvalue = **p_argv.ui; + + putgpr: + if (intarg_count >= NUM_GPR_ARG_REGISTERS) + *next_arg.u++ = gprvalue; + else + *gpr_base.u++ = gprvalue; + intarg_count++; + break; + } + } + + /* Check that we didn't overrun the stack... */ + FFI_ASSERT (copy_space.c >= next_arg.c); + FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS); + FFI_ASSERT (fpr_base.u + <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS); + FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4); +} + +/* About the LINUX64 ABI. */ +enum { + NUM_GPR_ARG_REGISTERS64 = 8, + NUM_FPR_ARG_REGISTERS64 = 13 +}; +enum { ASM_NEEDS_REGISTERS64 = 4 }; + +/* ffi_prep_args64 is called by the assembly routine once stack space + has been allocated for the function's arguments. + + The stack layout we want looks like this: + + | Ret addr from ffi_call_LINUX64 8bytes | higher addresses + |--------------------------------------------| + | CR save area 8bytes | + |--------------------------------------------| + | Previous backchain pointer 8 | stack pointer here + |--------------------------------------------|<+ <<< on entry to + | Saved r28-r31 4*8 | | ffi_call_LINUX64 + |--------------------------------------------| | + | GPR registers r3-r10 8*8 | | + |--------------------------------------------| | + | FPR registers f1-f13 (optional) 13*8 | | + |--------------------------------------------| | + | Parameter save area | | + |--------------------------------------------| | + | TOC save area 8 | | + |--------------------------------------------| | stack | + | Linker doubleword 8 | | grows | + |--------------------------------------------| | down V + | Compiler doubleword 8 | | + |--------------------------------------------| | lower addresses + | Space for callee's LR 8 | | + |--------------------------------------------| | + | CR save area 8 | | + |--------------------------------------------| | stack pointer here + | Current backchain pointer 8 |-/ during + |--------------------------------------------| <<< ffi_call_LINUX64 + +*/ + +void FFI_HIDDEN +ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) +{ + const unsigned long bytes = ecif->cif->bytes; + const unsigned long flags = ecif->cif->flags; + + typedef union { + char *c; + unsigned long *ul; + float *f; + double *d; + } valp; + + /* 'stacktop' points at the previous backchain pointer. */ + valp stacktop; + + /* 'next_arg' points at the space for gpr3, and grows upwards as + we use GPR registers, then continues at rest. */ + valp gpr_base; + valp gpr_end; + valp rest; + valp next_arg; + + /* 'fpr_base' points at the space for fpr3, and grows upwards as + we use FPR registers. */ + valp fpr_base; + int fparg_count; + + int i, words; + ffi_type **ptr; + double double_tmp; + union { + void **v; + char **c; + signed char **sc; + unsigned char **uc; + signed short **ss; + unsigned short **us; + signed int **si; + unsigned int **ui; + unsigned long **ul; + float **f; + double **d; + } p_argv; + unsigned long gprvalue; + + stacktop.c = (char *) stack + bytes; + gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64; + gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64; + rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64; + fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64; + fparg_count = 0; + next_arg.ul = gpr_base.ul; + + /* Check that everything starts aligned properly. */ + FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0); + FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0); + FFI_ASSERT ((bytes & 0xF) == 0); + + /* Deal with return values that are actually pass-by-reference. */ + if (flags & FLAG_RETVAL_REFERENCE) + *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue; + + /* Now for the arguments. */ + p_argv.v = ecif->avalue; + for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs; + i > 0; + i--, ptr++, p_argv.v++) + { + switch ((*ptr)->type) + { + case FFI_TYPE_FLOAT: + double_tmp = **p_argv.f; + *next_arg.f = (float) double_tmp; + if (++next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + if (fparg_count < NUM_FPR_ARG_REGISTERS64) + *fpr_base.d++ = double_tmp; + fparg_count++; + FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); + break; + + case FFI_TYPE_DOUBLE: + double_tmp = **p_argv.d; + *next_arg.d = double_tmp; + if (++next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + if (fparg_count < NUM_FPR_ARG_REGISTERS64) + *fpr_base.d++ = double_tmp; + fparg_count++; + FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + double_tmp = (*p_argv.d)[0]; + *next_arg.d = double_tmp; + if (++next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + if (fparg_count < NUM_FPR_ARG_REGISTERS64) + *fpr_base.d++ = double_tmp; + fparg_count++; + double_tmp = (*p_argv.d)[1]; + *next_arg.d = double_tmp; + if (++next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + if (fparg_count < NUM_FPR_ARG_REGISTERS64) + *fpr_base.d++ = double_tmp; + fparg_count++; + FFI_ASSERT (__LDBL_MANT_DIG__ == 106); + FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); + break; +#endif + + case FFI_TYPE_STRUCT: + words = ((*ptr)->size + 7) / 8; + if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul) + { + size_t first = gpr_end.c - next_arg.c; + memcpy (next_arg.c, *p_argv.c, first); + memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first); + next_arg.c = rest.c + words * 8 - first; + } + else + { + char *where = next_arg.c; + + /* Structures with size less than eight bytes are passed + left-padded. */ + if ((*ptr)->size < 8) + where += 8 - (*ptr)->size; + + memcpy (where, *p_argv.c, (*ptr)->size); + next_arg.ul += words; + if (next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + } + break; + + case FFI_TYPE_UINT8: + gprvalue = **p_argv.uc; + goto putgpr; + case FFI_TYPE_SINT8: + gprvalue = **p_argv.sc; + goto putgpr; + case FFI_TYPE_UINT16: + gprvalue = **p_argv.us; + goto putgpr; + case FFI_TYPE_SINT16: + gprvalue = **p_argv.ss; + goto putgpr; + case FFI_TYPE_UINT32: + gprvalue = **p_argv.ui; + goto putgpr; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + gprvalue = **p_argv.si; + goto putgpr; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + gprvalue = **p_argv.ul; + putgpr: + *next_arg.ul++ = gprvalue; + if (next_arg.ul == gpr_end.ul) + next_arg.ul = rest.ul; + break; + } + } + + FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS + || (next_arg.ul >= gpr_base.ul + && next_arg.ul <= gpr_base.ul + 4)); +} + + + +/* Perform machine dependent cif processing */ +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + /* All this is for the SYSV and LINUX64 ABI. */ + int i; + ffi_type **ptr; + unsigned bytes; + int fparg_count = 0, intarg_count = 0; + unsigned flags = 0; + unsigned struct_copy_size = 0; + unsigned type = cif->rtype->type; + unsigned size = cif->rtype->size; + + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + NUM_FPR_ARG_REGISTERS = 0; + + if (cif->abi != FFI_LINUX64) + { + /* All the machine-independent calculation of cif->bytes will be wrong. + Redo the calculation for SYSV. */ + + /* Space for the frame pointer, callee's LR, and the asm's temp regs. */ + bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int); + + /* Space for the GPR registers. */ + bytes += NUM_GPR_ARG_REGISTERS * sizeof (int); + } + else + { + /* 64-bit ABI. */ + + /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp + regs. */ + bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long); + + /* Space for the mandatory parm save area and general registers. */ + bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long); + } + + /* Return value handling. The rules for SYSV are as follows: + - 32-bit (or less) integer values are returned in gpr3; + - Structures of size <= 4 bytes also returned in gpr3; + - 64-bit integer values and structures between 5 and 8 bytes are returned + in gpr3 and gpr4; + - Single/double FP values are returned in fpr1; + - Larger structures are allocated space and a pointer is passed as + the first argument. + - long doubles (if not equivalent to double) are returned in + fpr1,fpr2 for Linux and as for large structs for SysV. + For LINUX64: + - integer values in gpr3; + - Structures/Unions by reference; + - Single/double FP values in fpr1, long double in fpr1,fpr2. + - soft-float float/doubles are treated as UINT32/UINT64 respectivley. + - soft-float long doubles are returned in gpr3-gpr6. */ + switch (type) + { +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64 + && cif->abi != FFI_LINUX_SOFT_FLOAT) + goto byref; + flags |= FLAG_RETURNS_128BITS; + /* Fall through. */ +#endif + case FFI_TYPE_DOUBLE: + flags |= FLAG_RETURNS_64BITS; + /* Fall through. */ + case FFI_TYPE_FLOAT: + /* With FFI_LINUX_SOFT_FLOAT no fp registers are used. */ + if (cif->abi != FFI_LINUX_SOFT_FLOAT) + flags |= FLAG_RETURNS_FP; + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags |= FLAG_RETURNS_64BITS; + break; + + case FFI_TYPE_STRUCT: + if (cif->abi == FFI_SYSV) + { + /* The final SYSV ABI says that structures smaller or equal 8 bytes + are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them + in memory. */ + + /* Treat structs with size <= 8 bytes. */ + if (size <= 8) + { + flags |= FLAG_RETURNS_SMST; + /* These structs are returned in r3. We pack the type and the + precalculated shift value (needed in the sysv.S) into flags. + The same applies for the structs returned in r3/r4. */ + if (size <= 4) + { + flags |= FLAG_SYSV_SMST_R3; + flags |= 8 * (4 - size) << 8; + break; + } + /* These structs are returned in r3 and r4. See above. */ + if (size <= 8) + { + flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4; + flags |= 8 * (8 - size) << 8; + break; + } + } + } +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + byref: +#endif + intarg_count++; + flags |= FLAG_RETVAL_REFERENCE; + /* Fall through. */ + case FFI_TYPE_VOID: + flags |= FLAG_RETURNS_NOTHING; + break; + + default: + /* Returns 32-bit integer, or similar. Nothing to do here. */ + break; + } + + if (cif->abi != FFI_LINUX64) + /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the + first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest + goes on the stack. Structures and long doubles (if not equivalent + to double) are passed as a pointer to a copy of the structure. + Stuff on the stack needs to keep proper alignment. */ + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + switch ((*ptr)->type) + { + case FFI_TYPE_FLOAT: + /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */ + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + goto soft_float_cif; + fparg_count++; + /* floating singles are not 8-aligned on stack */ + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT) + goto do_struct; + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + { + if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3 + || intarg_count < NUM_GPR_ARG_REGISTERS) + /* A long double in FFI_LINUX_SOFT_FLOAT can use only + a set of four consecutive gprs. If we have not enough, + we have to adjust the intarg_count value. */ + intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count; + intarg_count += 4; + break; + } + else + fparg_count++; + /* Fall thru */ +#endif + case FFI_TYPE_DOUBLE: + /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */ + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + goto soft_double_cif; + fparg_count++; + /* If this FP arg is going on the stack, it must be + 8-byte-aligned. */ + if (fparg_count > NUM_FPR_ARG_REGISTERS + && intarg_count >= NUM_GPR_ARG_REGISTERS + && intarg_count % 2 != 0) + intarg_count++; + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + soft_double_cif: + /* 'long long' arguments are passed as two words, but + either both words must fit in registers or both go + on the stack. If they go on the stack, they must + be 8-byte-aligned. + + Also, only certain register pairs can be used for + passing long long int -- specifically (r3,r4), (r5,r6), + (r7,r8), (r9,r10). + */ + if (intarg_count == NUM_GPR_ARG_REGISTERS-1 + || intarg_count % 2 != 0) + intarg_count++; + intarg_count += 2; + break; + + case FFI_TYPE_STRUCT: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + do_struct: +#endif + /* We must allocate space for a copy of these to enforce + pass-by-value. Pad the space up to a multiple of 16 + bytes (the maximum alignment required for anything under + the SYSV ABI). */ + struct_copy_size += ((*ptr)->size + 15) & ~0xF; + /* Fall through (allocate space for the pointer). */ + + default: + soft_float_cif: + /* Everything else is passed as a 4-byte word in a GPR, either + the object itself or a pointer to it. */ + intarg_count++; + break; + } + } + else + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + switch ((*ptr)->type) + { +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + intarg_count += 4; + else + { + fparg_count += 2; + intarg_count += 2; + } + break; +#endif + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + fparg_count++; + intarg_count++; + break; + + case FFI_TYPE_STRUCT: + intarg_count += ((*ptr)->size + 7) / 8; + break; + + default: + /* Everything else is passed as a 8-byte word in a GPR, either + the object itself or a pointer to it. */ + intarg_count++; + break; + } + } + + if (fparg_count != 0) + flags |= FLAG_FP_ARGUMENTS; + if (intarg_count > 4) + flags |= FLAG_4_GPR_ARGUMENTS; + if (struct_copy_size != 0) + flags |= FLAG_ARG_NEEDS_COPY; + + if (cif->abi != FFI_LINUX64) + { + /* Space for the FPR registers, if needed. */ + if (fparg_count != 0) + bytes += NUM_FPR_ARG_REGISTERS * sizeof (double); + + /* Stack space. */ + if (intarg_count > NUM_GPR_ARG_REGISTERS) + bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int); + if (fparg_count > NUM_FPR_ARG_REGISTERS) + bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double); + } + else + { + /* Space for the FPR registers, if needed. */ + if (fparg_count != 0) + bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double); + + /* Stack space. */ + if (intarg_count > NUM_GPR_ARG_REGISTERS64) + bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long); + } + + /* The stack space allocated needs to be a multiple of 16 bytes. */ + bytes = (bytes + 15) & ~0xF; + + /* Add in the space for the copied structures. */ + bytes += struct_copy_size; + + cif->flags = flags; + cif->bytes = bytes; + + return FFI_OK; +} + +extern void ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *, + void (*fn)(void)); +extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long, + unsigned long, unsigned long *, + void (*fn)(void)); + +void +ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { +#ifndef POWERPC64 + case FFI_SYSV: + case FFI_GCC_SYSV: + case FFI_LINUX: + case FFI_LINUX_SOFT_FLOAT: + ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn); + break; +#else + case FFI_LINUX64: + ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn); + break; +#endif + default: + FFI_ASSERT (0); + break; + } +} + + +#ifndef POWERPC64 +#define MIN_CACHE_LINE_SIZE 8 + +static void +flush_icache (char *wraddr, char *xaddr, int size) +{ + int i; + for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE) + __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" + : : "r" (xaddr + i), "r" (wraddr + i) : "memory"); + __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;" + : : "r"(xaddr + size - 1), "r"(wraddr + size - 1) + : "memory"); +} +#endif + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif *cif, + void (*fun) (ffi_cif *, void *, void **, void *), + void *user_data, + void *codeloc) +{ +#ifdef POWERPC64 + void **tramp = (void **) &closure->tramp[0]; + + FFI_ASSERT (cif->abi == FFI_LINUX64); + /* Copy function address and TOC from ffi_closure_LINUX64. */ + memcpy (tramp, (char *) ffi_closure_LINUX64, 16); + tramp[2] = codeloc; +#else + unsigned int *tramp; + + FFI_ASSERT (cif->abi == FFI_GCC_SYSV || cif->abi == FFI_SYSV); + + tramp = (unsigned int *) &closure->tramp[0]; + tramp[0] = 0x7c0802a6; /* mflr r0 */ + tramp[1] = 0x4800000d; /* bl 10 <trampoline_initial+0x10> */ + tramp[4] = 0x7d6802a6; /* mflr r11 */ + tramp[5] = 0x7c0803a6; /* mtlr r0 */ + tramp[6] = 0x800b0000; /* lwz r0,0(r11) */ + tramp[7] = 0x816b0004; /* lwz r11,4(r11) */ + tramp[8] = 0x7c0903a6; /* mtctr r0 */ + tramp[9] = 0x4e800420; /* bctr */ + *(void **) &tramp[2] = (void *) ffi_closure_SYSV; /* function */ + *(void **) &tramp[3] = codeloc; /* context */ + + /* Flush the icache. */ + flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE); +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +typedef union +{ + float f; + double d; +} ffi_dblfl; + +int ffi_closure_helper_SYSV (ffi_closure *, void *, unsigned long *, + ffi_dblfl *, unsigned long *); + +/* Basically the trampoline invokes ffi_closure_SYSV, and on + * entry, r11 holds the address of the closure. + * After storing the registers that could possibly contain + * parameters to be passed into the stack frame and setting + * up space for a return value, ffi_closure_SYSV invokes the + * following helper function to do most of the work + */ + +int +ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue, + unsigned long *pgr, ffi_dblfl *pfr, + unsigned long *pst) +{ + /* rvalue is the pointer to space for return value in closure assembly */ + /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */ + /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV */ + /* pst is the pointer to outgoing parameter stack in original caller */ + + void ** avalue; + ffi_type ** arg_types; + long i, avn; + long nf; /* number of floating registers already used */ + long ng; /* number of general registers already used */ + ffi_cif * cif; + double temp; + unsigned size; + + cif = closure->cif; + avalue = alloca (cif->nargs * sizeof (void *)); + size = cif->rtype->size; + + nf = 0; + ng = 0; + + /* Copy the caller's structure return value address so that the closure + returns the data directly to the caller. + For FFI_SYSV the result is passed in r3/r4 if the struct size is less + or equal 8 bytes. */ + + if ((cif->rtype->type == FFI_TYPE_STRUCT + && !((cif->abi == FFI_SYSV) && (size <= 8))) +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + || (cif->rtype->type == FFI_TYPE_LONGDOUBLE + && cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT) +#endif + ) + { + rvalue = (void *) *pgr; + ng++; + pgr++; + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + while (i < avn) + { + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + /* there are 8 gpr registers used to pass values */ + if (ng < 8) + { + avalue[i] = (char *) pgr + 3; + ng++; + pgr++; + } + else + { + avalue[i] = (char *) pst + 3; + pst++; + } + break; + + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + /* there are 8 gpr registers used to pass values */ + if (ng < 8) + { + avalue[i] = (char *) pgr + 2; + ng++; + pgr++; + } + else + { + avalue[i] = (char *) pst + 2; + pst++; + } + break; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + soft_float_closure: + /* there are 8 gpr registers used to pass values */ + if (ng < 8) + { + avalue[i] = pgr; + ng++; + pgr++; + } + else + { + avalue[i] = pst; + pst++; + } + break; + + case FFI_TYPE_STRUCT: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + do_struct: +#endif + /* Structs are passed by reference. The address will appear in a + gpr if it is one of the first 8 arguments. */ + if (ng < 8) + { + avalue[i] = (void *) *pgr; + ng++; + pgr++; + } + else + { + avalue[i] = (void *) *pst; + pst++; + } + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + soft_double_closure: + /* passing long long ints are complex, they must + * be passed in suitable register pairs such as + * (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10) + * and if the entire pair aren't available then the outgoing + * parameter stack is used for both but an alignment of 8 + * must will be kept. So we must either look in pgr + * or pst to find the correct address for this type + * of parameter. + */ + if (ng < 7) + { + if (ng & 0x01) + { + /* skip r4, r6, r8 as starting points */ + ng++; + pgr++; + } + avalue[i] = pgr; + ng += 2; + pgr += 2; + } + else + { + if (((long) pst) & 4) + pst++; + avalue[i] = pst; + pst += 2; + ng = 8; + } + break; + + case FFI_TYPE_FLOAT: + /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */ + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + goto soft_float_closure; + /* unfortunately float values are stored as doubles + * in the ffi_closure_SYSV code (since we don't check + * the type in that routine). + */ + + /* there are 8 64bit floating point registers */ + + if (nf < 8) + { + temp = pfr->d; + pfr->f = (float) temp; + avalue[i] = pfr; + nf++; + pfr++; + } + else + { + /* FIXME? here we are really changing the values + * stored in the original calling routines outgoing + * parameter stack. This is probably a really + * naughty thing to do but... + */ + avalue[i] = pst; + pst += 1; + } + break; + + case FFI_TYPE_DOUBLE: + /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */ + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + goto soft_double_closure; + /* On the outgoing stack all values are aligned to 8 */ + /* there are 8 64bit floating point registers */ + + if (nf < 8) + { + avalue[i] = pfr; + nf++; + pfr++; + } + else + { + if (((long) pst) & 4) + pst++; + avalue[i] = pst; + pst += 2; + } + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT) + goto do_struct; + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + { /* Test if for the whole long double, 4 gprs are available. + otherwise the stuff ends up on the stack. */ + if (ng < 5) + { + avalue[i] = pgr; + pgr += 4; + ng += 4; + } + else + { + avalue[i] = pst; + pst += 4; + ng = 8; + } + break; + } + if (nf < 7) + { + avalue[i] = pfr; + pfr += 2; + nf += 2; + } + else + { + if (((long) pst) & 4) + pst++; + avalue[i] = pst; + pst += 4; + nf = 8; + } + break; +#endif + + default: + FFI_ASSERT (0); + } + + i++; + } + + + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_SYSV how to perform return type promotions. + Because the FFI_SYSV ABI returns the structures <= 8 bytes in r3/r4 + we have to tell ffi_closure_SYSV how to treat them. We combine the base + type FFI_SYSV_TYPE_SMALL_STRUCT - 1 with the size of the struct. + So a one byte struct gets the return type 16. Return type 1 to 15 are + already used and we never have a struct with size zero. That is the reason + for the subtraction of 1. See the comment in ffitarget.h about ordering. + */ + if (cif->abi == FFI_SYSV && cif->rtype->type == FFI_TYPE_STRUCT + && size <= 8) + return (FFI_SYSV_TYPE_SMALL_STRUCT - 1) + size; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + else if (cif->rtype->type == FFI_TYPE_LONGDOUBLE + && cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT) + return FFI_TYPE_STRUCT; +#endif + /* With FFI_LINUX_SOFT_FLOAT floats and doubles are handled like UINT32 + respectivley UINT64. */ + if (cif->abi == FFI_LINUX_SOFT_FLOAT) + { + switch (cif->rtype->type) + { + case FFI_TYPE_FLOAT: + return FFI_TYPE_UINT32; + break; + case FFI_TYPE_DOUBLE: + return FFI_TYPE_UINT64; + break; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + return FFI_TYPE_UINT128; + break; +#endif + default: + return cif->rtype->type; + } + } + else + { + return cif->rtype->type; + } +} + +int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *, + unsigned long *, ffi_dblfl *); + +int FFI_HIDDEN +ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue, + unsigned long *pst, ffi_dblfl *pfr) +{ + /* rvalue is the pointer to space for return value in closure assembly */ + /* pst is the pointer to parameter save area + (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */ + /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */ + + void **avalue; + ffi_type **arg_types; + long i, avn; + ffi_cif *cif; + ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64; + + cif = closure->cif; + avalue = alloca (cif->nargs * sizeof (void *)); + + /* Copy the caller's structure return value address so that the closure + returns the data directly to the caller. */ + if (cif->rtype->type == FFI_TYPE_STRUCT) + { + rvalue = (void *) *pst; + pst++; + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + while (i < avn) + { + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = (char *) pst + 7; + pst++; + break; + + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = (char *) pst + 6; + pst++; + break; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + avalue[i] = (char *) pst + 4; + pst++; + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + avalue[i] = pst; + pst++; + break; + + case FFI_TYPE_STRUCT: + /* Structures with size less than eight bytes are passed + left-padded. */ + if (arg_types[i]->size < 8) + avalue[i] = (char *) pst + 8 - arg_types[i]->size; + else + avalue[i] = pst; + pst += (arg_types[i]->size + 7) / 8; + break; + + case FFI_TYPE_FLOAT: + /* unfortunately float values are stored as doubles + * in the ffi_closure_LINUX64 code (since we don't check + * the type in that routine). + */ + + /* there are 13 64bit floating point registers */ + + if (pfr < end_pfr) + { + double temp = pfr->d; + pfr->f = (float) temp; + avalue[i] = pfr; + pfr++; + } + else + avalue[i] = pst; + pst++; + break; + + case FFI_TYPE_DOUBLE: + /* On the outgoing stack all values are aligned to 8 */ + /* there are 13 64bit floating point registers */ + + if (pfr < end_pfr) + { + avalue[i] = pfr; + pfr++; + } + else + avalue[i] = pst; + pst++; + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + if (pfr + 1 < end_pfr) + { + avalue[i] = pfr; + pfr += 2; + } + else + { + if (pfr < end_pfr) + { + /* Passed partly in f13 and partly on the stack. + Move it all to the stack. */ + *pst = *(unsigned long *) pfr; + pfr++; + } + avalue[i] = pst; + } + pst += 2; + break; +#endif + + default: + FFI_ASSERT (0); + } + + i++; + } + + + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_LINUX64 how to perform return type promotions. */ + return cif->rtype->type; +} diff --git a/libffi/src/powerpc/ffi_darwin.c b/libffi/src/powerpc/ffi_darwin.c new file mode 100644 index 000000000..ee03dab69 --- /dev/null +++ b/libffi/src/powerpc/ffi_darwin.c @@ -0,0 +1,1359 @@ +/* ----------------------------------------------------------------------- + ffi_darwin.c + + Copyright (C) 1998 Geoffrey Keating + Copyright (C) 2001 John Hornkvist + Copyright (C) 2002, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. + + FFI support for Darwin and AIX. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +extern void ffi_closure_ASM (void); + +enum { + /* The assembly depends on these exact flags. + For Darwin64 (when FLAG_RETURNS_STRUCT is set): + FLAG_RETURNS_FP indicates that the structure embeds FP data. + FLAG_RETURNS_128BITS signals a special struct size that is not + expanded for float content. */ + FLAG_RETURNS_128BITS = 1 << (31-31), /* These go in cr7 */ + FLAG_RETURNS_NOTHING = 1 << (31-30), + FLAG_RETURNS_FP = 1 << (31-29), + FLAG_RETURNS_64BITS = 1 << (31-28), + + FLAG_RETURNS_STRUCT = 1 << (31-27), /* This goes in cr6 */ + + FLAG_ARG_NEEDS_COPY = 1 << (31- 7), + FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */ + FLAG_4_GPR_ARGUMENTS = 1 << (31- 5), + FLAG_RETVAL_REFERENCE = 1 << (31- 4) +}; + +/* About the DARWIN ABI. */ +enum { + NUM_GPR_ARG_REGISTERS = 8, + NUM_FPR_ARG_REGISTERS = 13, + LINKAGE_AREA_GPRS = 6 +}; + +enum { ASM_NEEDS_REGISTERS = 4 }; /* r28-r31 */ + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments. + + m32/m64 + + The stack layout we want looks like this: + + | Return address from ffi_call_DARWIN | higher addresses + |--------------------------------------------| + | Previous backchain pointer 4/8 | stack pointer here + |--------------------------------------------|<+ <<< on entry to + | ASM_NEEDS_REGISTERS=r28-r31 4*(4/8) | | ffi_call_DARWIN + |--------------------------------------------| | + | When we have any FP activity... the | | + | FPRs occupy NUM_FPR_ARG_REGISTERS slots | | + | here fp13 .. fp1 from high to low addr. | | + ~ ~ ~ + | Parameters (at least 8*4/8=32/64) | | NUM_GPR_ARG_REGISTERS + |--------------------------------------------| | + | TOC=R2 (AIX) Reserved (Darwin) 4/8 | | + |--------------------------------------------| | stack | + | Reserved 2*4/8 | | grows | + |--------------------------------------------| | down V + | Space for callee's LR 4/8 | | + |--------------------------------------------| | lower addresses + | Saved CR [low word for m64] 4/8 | | + |--------------------------------------------| | stack pointer here + | Current backchain pointer 4/8 |-/ during + |--------------------------------------------| <<< ffi_call_DARWIN + + */ + +#if defined(POWERPC_DARWIN64) +static void +darwin64_pass_struct_by_value + (ffi_type *, char *, unsigned, unsigned *, double **, unsigned long **); +#endif + +/* This depends on GPR_SIZE = sizeof (unsigned long) */ + +void +ffi_prep_args (extended_cif *ecif, unsigned long *const stack) +{ + const unsigned bytes = ecif->cif->bytes; + const unsigned flags = ecif->cif->flags; + const unsigned nargs = ecif->cif->nargs; +#if !defined(POWERPC_DARWIN64) + const ffi_abi abi = ecif->cif->abi; +#endif + + /* 'stacktop' points at the previous backchain pointer. */ + unsigned long *const stacktop = stack + (bytes / sizeof(unsigned long)); + + /* 'fpr_base' points at the space for fpr1, and grows upwards as + we use FPR registers. */ + double *fpr_base = (double *) (stacktop - ASM_NEEDS_REGISTERS) - NUM_FPR_ARG_REGISTERS; + int gp_count = 0, fparg_count = 0; + + /* 'next_arg' grows up as we put parameters in it. */ + unsigned long *next_arg = stack + LINKAGE_AREA_GPRS; /* 6 reserved positions. */ + + int i; + double double_tmp; + void **p_argv = ecif->avalue; + unsigned long gprvalue; + ffi_type** ptr = ecif->cif->arg_types; +#if !defined(POWERPC_DARWIN64) + char *dest_cpy; +#endif + unsigned size_al = 0; + + /* Check that everything starts aligned properly. */ + FFI_ASSERT(((unsigned) (char *) stack & 0xF) == 0); + FFI_ASSERT(((unsigned) (char *) stacktop & 0xF) == 0); + FFI_ASSERT((bytes & 0xF) == 0); + + /* Deal with return values that are actually pass-by-reference. + Rule: + Return values are referenced by r3, so r4 is the first parameter. */ + + if (flags & FLAG_RETVAL_REFERENCE) + *next_arg++ = (unsigned long) (char *) ecif->rvalue; + + /* Now for the arguments. */ + for (i = nargs; i > 0; i--, ptr++, p_argv++) + { + switch ((*ptr)->type) + { + /* If a floating-point parameter appears before all of the general- + purpose registers are filled, the corresponding GPRs that match + the size of the floating-point parameter are skipped. */ + case FFI_TYPE_FLOAT: + double_tmp = *(float *) *p_argv; + if (fparg_count < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = double_tmp; +#if defined(POWERPC_DARWIN) + *(float *)next_arg = *(float *) *p_argv; +#else + *(double *)next_arg = double_tmp; +#endif + next_arg++; + gp_count++; + fparg_count++; + FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); + break; + + case FFI_TYPE_DOUBLE: + double_tmp = *(double *) *p_argv; + if (fparg_count < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = double_tmp; + *(double *)next_arg = double_tmp; +#ifdef POWERPC64 + next_arg++; + gp_count++; +#else + next_arg += 2; + gp_count += 2; +#endif + fparg_count++; + FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + + case FFI_TYPE_LONGDOUBLE: +# if defined(POWERPC64) && !defined(POWERPC_DARWIN64) + /* ??? This will exceed the regs count when the value starts at fp13 + and it will not put the extra bit on the stack. */ + if (fparg_count < NUM_FPR_ARG_REGISTERS) + *(long double *) fpr_base++ = *(long double *) *p_argv; + else + *(long double *) next_arg = *(long double *) *p_argv; + next_arg += 2; + fparg_count += 2; +# else + double_tmp = ((double *) *p_argv)[0]; + if (fparg_count < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = double_tmp; + *(double *) next_arg = double_tmp; +# if defined(POWERPC_DARWIN64) + next_arg++; + gp_count++; +# else + next_arg += 2; + gp_count += 2; +# endif + fparg_count++; + double_tmp = ((double *) *p_argv)[1]; + if (fparg_count < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = double_tmp; + *(double *) next_arg = double_tmp; +# if defined(POWERPC_DARWIN64) + next_arg++; + gp_count++; +# else + next_arg += 2; + gp_count += 2; +# endif + fparg_count++; +# endif + FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); + break; +#endif + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: +#ifdef POWERPC64 + gprvalue = *(long long *) *p_argv; + goto putgpr; +#else + *(long long *) next_arg = *(long long *) *p_argv; + next_arg += 2; + gp_count += 2; +#endif + break; + case FFI_TYPE_POINTER: + gprvalue = *(unsigned long *) *p_argv; + goto putgpr; + case FFI_TYPE_UINT8: + gprvalue = *(unsigned char *) *p_argv; + goto putgpr; + case FFI_TYPE_SINT8: + gprvalue = *(signed char *) *p_argv; + goto putgpr; + case FFI_TYPE_UINT16: + gprvalue = *(unsigned short *) *p_argv; + goto putgpr; + case FFI_TYPE_SINT16: + gprvalue = *(signed short *) *p_argv; + goto putgpr; + + case FFI_TYPE_STRUCT: + size_al = (*ptr)->size; +#if defined(POWERPC_DARWIN64) + next_arg = (unsigned long *)ALIGN((char *)next_arg, (*ptr)->alignment); + darwin64_pass_struct_by_value (*ptr, (char *) *p_argv, + (unsigned) size_al, + (unsigned int *) &fparg_count, + &fpr_base, &next_arg); +#else + dest_cpy = (char *) next_arg; + + /* If the first member of the struct is a double, then include enough + padding in the struct size to align it to double-word. */ + if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE) + size_al = ALIGN((*ptr)->size, 8); + +# if defined(POWERPC64) + FFI_ASSERT (abi != FFI_DARWIN); + memcpy ((char *) dest_cpy, (char *) *p_argv, size_al); + next_arg += (size_al + 7) / 8; +# else + /* Structures that match the basic modes (QI 1 byte, HI 2 bytes, + SI 4 bytes) are aligned as if they were those modes. + Structures with 3 byte in size are padded upwards. */ + if (size_al < 3 && abi == FFI_DARWIN) + dest_cpy += 4 - size_al; + + memcpy((char *) dest_cpy, (char *) *p_argv, size_al); + next_arg += (size_al + 3) / 4; +# endif +#endif + break; + + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + gprvalue = *(signed int *) *p_argv; + goto putgpr; + + case FFI_TYPE_UINT32: + gprvalue = *(unsigned int *) *p_argv; + putgpr: + *next_arg++ = gprvalue; + gp_count++; + break; + default: + break; + } + } + + /* Check that we didn't overrun the stack... */ + //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS); + //FFI_ASSERT((unsigned *)fpr_base + // <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS); + //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4); +} + +#if defined(POWERPC_DARWIN64) + +/* See if we can put some of the struct into fprs. + This should not be called for structures of size 16 bytes, since these are not + broken out this way. */ +static void +darwin64_scan_struct_for_floats (ffi_type *s, unsigned *nfpr) +{ + int i; + + FFI_ASSERT (s->type == FFI_TYPE_STRUCT) + + for (i = 0; s->elements[i] != NULL; i++) + { + ffi_type *p = s->elements[i]; + switch (p->type) + { + case FFI_TYPE_STRUCT: + darwin64_scan_struct_for_floats (p, nfpr); + break; + case FFI_TYPE_LONGDOUBLE: + (*nfpr) += 2; + break; + case FFI_TYPE_DOUBLE: + case FFI_TYPE_FLOAT: + (*nfpr) += 1; + break; + default: + break; + } + } +} + +static int +darwin64_struct_size_exceeds_gprs_p (ffi_type *s, char *src, unsigned *nfpr) +{ + unsigned struct_offset=0, i; + + for (i = 0; s->elements[i] != NULL; i++) + { + char *item_base; + ffi_type *p = s->elements[i]; + /* Find the start of this item (0 for the first one). */ + if (i > 0) + struct_offset = ALIGN(struct_offset, p->alignment); + + item_base = src + struct_offset; + + switch (p->type) + { + case FFI_TYPE_STRUCT: + if (darwin64_struct_size_exceeds_gprs_p (p, item_base, nfpr)) + return 1; + break; + case FFI_TYPE_LONGDOUBLE: + if (*nfpr >= NUM_FPR_ARG_REGISTERS) + return 1; + (*nfpr) += 1; + item_base += 8; + /* FALL THROUGH */ + case FFI_TYPE_DOUBLE: + if (*nfpr >= NUM_FPR_ARG_REGISTERS) + return 1; + (*nfpr) += 1; + break; + case FFI_TYPE_FLOAT: + if (*nfpr >= NUM_FPR_ARG_REGISTERS) + return 1; + (*nfpr) += 1; + break; + default: + /* If we try and place any item, that is non-float, once we've + exceeded the 8 GPR mark, then we can't fit the struct. */ + if ((unsigned long)item_base >= 8*8) + return 1; + break; + } + /* now count the size of what we just used. */ + struct_offset += p->size; + } + return 0; +} + +/* Can this struct be returned by value? */ +int +darwin64_struct_ret_by_value_p (ffi_type *s) +{ + unsigned nfp = 0; + + FFI_ASSERT (s && s->type == FFI_TYPE_STRUCT); + + /* The largest structure we can return is 8long + 13 doubles. */ + if (s->size > 168) + return 0; + + /* We can't pass more than 13 floats. */ + darwin64_scan_struct_for_floats (s, &nfp); + if (nfp > 13) + return 0; + + /* If there are not too many floats, and the struct is + small enough to accommodate in the GPRs, then it must be OK. */ + if (s->size <= 64) + return 1; + + /* Well, we have to look harder. */ + nfp = 0; + if (darwin64_struct_size_exceeds_gprs_p (s, NULL, &nfp)) + return 0; + + return 1; +} + +void +darwin64_pass_struct_floats (ffi_type *s, char *src, + unsigned *nfpr, double **fprs) +{ + int i; + double *fpr_base = *fprs; + unsigned struct_offset = 0; + + /* We don't assume anything about the alignment of the source. */ + for (i = 0; s->elements[i] != NULL; i++) + { + char *item_base; + ffi_type *p = s->elements[i]; + /* Find the start of this item (0 for the first one). */ + if (i > 0) + struct_offset = ALIGN(struct_offset, p->alignment); + item_base = src + struct_offset; + + switch (p->type) + { + case FFI_TYPE_STRUCT: + darwin64_pass_struct_floats (p, item_base, nfpr, + &fpr_base); + break; + case FFI_TYPE_LONGDOUBLE: + if (*nfpr < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = *(double *)item_base; + (*nfpr) += 1; + item_base += 8; + /* FALL THROUGH */ + case FFI_TYPE_DOUBLE: + if (*nfpr < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = *(double *)item_base; + (*nfpr) += 1; + break; + case FFI_TYPE_FLOAT: + if (*nfpr < NUM_FPR_ARG_REGISTERS) + *fpr_base++ = (double) *(float *)item_base; + (*nfpr) += 1; + break; + default: + break; + } + /* now count the size of what we just used. */ + struct_offset += p->size; + } + /* Update the scores. */ + *fprs = fpr_base; +} + +/* Darwin64 special rules. + Break out a struct into params and float registers. */ +static void +darwin64_pass_struct_by_value (ffi_type *s, char *src, unsigned size, + unsigned *nfpr, double **fprs, unsigned long **arg) +{ + unsigned long *next_arg = *arg; + char *dest_cpy = (char *)next_arg; + + FFI_ASSERT (s->type == FFI_TYPE_STRUCT) + + if (!size) + return; + + /* First... special cases. */ + if (size < 3 + || (size == 4 + && s->elements[0] + && s->elements[0]->type != FFI_TYPE_FLOAT)) + { + /* Must be at least one GPR, padding is unspecified in value, + let's make it zero. */ + *next_arg = 0UL; + dest_cpy += 8 - size; + memcpy ((char *) dest_cpy, src, size); + next_arg++; + } + else if (size == 16) + { + memcpy ((char *) dest_cpy, src, size); + next_arg += 2; + } + else + { + /* now the general case, we consider embedded floats. */ + memcpy ((char *) dest_cpy, src, size); + darwin64_pass_struct_floats (s, src, nfpr, fprs); + next_arg += (size+7)/8; + } + + *arg = next_arg; +} + +double * +darwin64_struct_floats_to_mem (ffi_type *s, char *dest, double *fprs, unsigned *nf) +{ + int i; + unsigned struct_offset = 0; + + /* We don't assume anything about the alignment of the source. */ + for (i = 0; s->elements[i] != NULL; i++) + { + char *item_base; + ffi_type *p = s->elements[i]; + /* Find the start of this item (0 for the first one). */ + if (i > 0) + struct_offset = ALIGN(struct_offset, p->alignment); + item_base = dest + struct_offset; + + switch (p->type) + { + case FFI_TYPE_STRUCT: + fprs = darwin64_struct_floats_to_mem (p, item_base, fprs, nf); + break; + case FFI_TYPE_LONGDOUBLE: + if (*nf < NUM_FPR_ARG_REGISTERS) + { + *(double *)item_base = *fprs++ ; + (*nf) += 1; + } + item_base += 8; + /* FALL THROUGH */ + case FFI_TYPE_DOUBLE: + if (*nf < NUM_FPR_ARG_REGISTERS) + { + *(double *)item_base = *fprs++ ; + (*nf) += 1; + } + break; + case FFI_TYPE_FLOAT: + if (*nf < NUM_FPR_ARG_REGISTERS) + { + *(float *)item_base = (float) *fprs++ ; + (*nf) += 1; + } + break; + default: + break; + } + /* now count the size of what we just used. */ + struct_offset += p->size; + } + return fprs; +} + +#endif + +/* Adjust the size of S to be correct for Darwin. + On Darwin m32, the first field of a structure has natural alignment. + On Darwin m64, all fields have natural alignment. */ + +static void +darwin_adjust_aggregate_sizes (ffi_type *s) +{ + int i; + + if (s->type != FFI_TYPE_STRUCT) + return; + + s->size = 0; + for (i = 0; s->elements[i] != NULL; i++) + { + ffi_type *p; + int align; + + p = s->elements[i]; + if (p->type == FFI_TYPE_STRUCT) + darwin_adjust_aggregate_sizes (p); +#if defined(POWERPC_DARWIN64) + /* Natural alignment for all items. */ + align = p->alignment; +#else + /* Natrual alignment for the first item... */ + if (i == 0) + align = p->alignment; + else if (p->alignment == 16 || p->alignment < 4) + /* .. subsequent items with vector or align < 4 have natural align. */ + align = p->alignment; + else + /* .. or align is 4. */ + align = 4; +#endif + /* Pad, if necessary, before adding the current item. */ + s->size = ALIGN(s->size, align) + p->size; + } + + s->size = ALIGN(s->size, s->alignment); + + /* This should not be necessary on m64, but harmless. */ + if (s->elements[0]->type == FFI_TYPE_UINT64 + || s->elements[0]->type == FFI_TYPE_SINT64 + || s->elements[0]->type == FFI_TYPE_DOUBLE + || s->elements[0]->alignment == 8) + s->alignment = s->alignment > 8 ? s->alignment : 8; + /* Do not add additional tail padding. */ +} + +/* Adjust the size of S to be correct for AIX. + Word-align double unless it is the first member of a structure. */ + +static void +aix_adjust_aggregate_sizes (ffi_type *s) +{ + int i; + + if (s->type != FFI_TYPE_STRUCT) + return; + + s->size = 0; + for (i = 0; s->elements[i] != NULL; i++) + { + ffi_type *p; + int align; + + p = s->elements[i]; + aix_adjust_aggregate_sizes (p); + align = p->alignment; + if (i != 0 && p->type == FFI_TYPE_DOUBLE) + align = 4; + s->size = ALIGN(s->size, align) + p->size; + } + + s->size = ALIGN(s->size, s->alignment); + + if (s->elements[0]->type == FFI_TYPE_UINT64 + || s->elements[0]->type == FFI_TYPE_SINT64 + || s->elements[0]->type == FFI_TYPE_DOUBLE + || s->elements[0]->alignment == 8) + s->alignment = s->alignment > 8 ? s->alignment : 8; + /* Do not add additional tail padding. */ +} + +/* Perform machine dependent cif processing. */ +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + /* All this is for the DARWIN ABI. */ + unsigned i; + ffi_type **ptr; + unsigned bytes; + unsigned fparg_count = 0, intarg_count = 0; + unsigned flags = 0; + unsigned size_al = 0; + + /* All the machine-independent calculation of cif->bytes will be wrong. + All the calculation of structure sizes will also be wrong. + Redo the calculation for DARWIN. */ + + if (cif->abi == FFI_DARWIN) + { + darwin_adjust_aggregate_sizes (cif->rtype); + for (i = 0; i < cif->nargs; i++) + darwin_adjust_aggregate_sizes (cif->arg_types[i]); + } + + if (cif->abi == FFI_AIX) + { + aix_adjust_aggregate_sizes (cif->rtype); + for (i = 0; i < cif->nargs; i++) + aix_adjust_aggregate_sizes (cif->arg_types[i]); + } + + /* Space for the frame pointer, callee's LR, CR, etc, and for + the asm's temp regs. */ + + bytes = (LINKAGE_AREA_GPRS + ASM_NEEDS_REGISTERS) * sizeof(unsigned long); + + /* Return value handling. + The rules m32 are as follows: + - 32-bit (or less) integer values are returned in gpr3; + - structures of size <= 4 bytes also returned in gpr3; + - 64-bit integer values [??? and structures between 5 and 8 bytes] are + returned in gpr3 and gpr4; + - Single/double FP values are returned in fpr1; + - Long double FP (if not equivalent to double) values are returned in + fpr1 and fpr2; + m64: + - 64-bit or smaller integral values are returned in GPR3 + - Single/double FP values are returned in fpr1; + - Long double FP values are returned in fpr1 and fpr2; + m64 Structures: + - If the structure could be accommodated in registers were it to be the + first argument to a routine, then it is returned in those registers. + m32/m64 structures otherwise: + - Larger structures values are allocated space and a pointer is passed + as the first argument. */ + switch (cif->rtype->type) + { + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + flags |= FLAG_RETURNS_128BITS; + flags |= FLAG_RETURNS_FP; + break; +#endif + + case FFI_TYPE_DOUBLE: + flags |= FLAG_RETURNS_64BITS; + /* Fall through. */ + case FFI_TYPE_FLOAT: + flags |= FLAG_RETURNS_FP; + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: +#ifdef POWERPC64 + case FFI_TYPE_POINTER: +#endif + flags |= FLAG_RETURNS_64BITS; + break; + + case FFI_TYPE_STRUCT: +#if defined(POWERPC_DARWIN64) + { + /* Can we fit the struct into regs? */ + if (darwin64_struct_ret_by_value_p (cif->rtype)) + { + unsigned nfpr = 0; + flags |= FLAG_RETURNS_STRUCT; + if (cif->rtype->size != 16) + darwin64_scan_struct_for_floats (cif->rtype, &nfpr) ; + else + flags |= FLAG_RETURNS_128BITS; + /* Will be 0 for 16byte struct. */ + if (nfpr) + flags |= FLAG_RETURNS_FP; + } + else /* By ref. */ + { + flags |= FLAG_RETVAL_REFERENCE; + flags |= FLAG_RETURNS_NOTHING; + intarg_count++; + } + } +#elif defined(DARWIN_PPC) + if (cif->rtype->size <= 4) + flags |= FLAG_RETURNS_STRUCT; + else /* else by reference. */ + { + flags |= FLAG_RETVAL_REFERENCE; + flags |= FLAG_RETURNS_NOTHING; + intarg_count++; + } +#else /* assume we pass by ref. */ + flags |= FLAG_RETVAL_REFERENCE; + flags |= FLAG_RETURNS_NOTHING; + intarg_count++; +#endif + break; + case FFI_TYPE_VOID: + flags |= FLAG_RETURNS_NOTHING; + break; + + default: + /* Returns 32-bit integer, or similar. Nothing to do here. */ + break; + } + + /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the + first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest + goes on the stack. + ??? Structures are passed as a pointer to a copy of the structure. + Stuff on the stack needs to keep proper alignment. + For m64 the count is effectively of half-GPRs. */ + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + unsigned align_words; + switch ((*ptr)->type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + fparg_count++; +#if !defined(POWERPC_DARWIN64) + /* If this FP arg is going on the stack, it must be + 8-byte-aligned. */ + if (fparg_count > NUM_FPR_ARG_REGISTERS + && (intarg_count & 0x01) != 0) + intarg_count++; +#endif + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + fparg_count += 2; + /* If this FP arg is going on the stack, it must be + 16-byte-aligned. */ + if (fparg_count >= NUM_FPR_ARG_REGISTERS) +#if defined (POWERPC64) + intarg_count = ALIGN(intarg_count, 2); +#else + intarg_count = ALIGN(intarg_count, 4); +#endif + break; +#endif + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: +#if defined(POWERPC64) + intarg_count++; +#else + /* 'long long' arguments are passed as two words, but + either both words must fit in registers or both go + on the stack. If they go on the stack, they must + be 8-byte-aligned. */ + if (intarg_count == NUM_GPR_ARG_REGISTERS-1 + || (intarg_count >= NUM_GPR_ARG_REGISTERS + && (intarg_count & 0x01) != 0)) + intarg_count++; + intarg_count += 2; +#endif + break; + + case FFI_TYPE_STRUCT: + size_al = (*ptr)->size; +#if defined(POWERPC_DARWIN64) + align_words = (*ptr)->alignment >> 3; + if (align_words) + intarg_count = ALIGN(intarg_count, align_words); + /* Base size of the struct. */ + intarg_count += (size_al + 7) / 8; + /* If 16 bytes then don't worry about floats. */ + if (size_al != 16) + /* Scan through for floats to be placed in regs. */ + darwin64_scan_struct_for_floats (*ptr, &fparg_count) ; +#else + align_words = (*ptr)->alignment >> 2; + if (align_words) + intarg_count = ALIGN(intarg_count, align_words); + /* If the first member of the struct is a double, then align + the struct to double-word. + if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE) + size_al = ALIGN((*ptr)->size, 8); */ +# ifdef POWERPC64 + intarg_count += (size_al + 7) / 8; +# else + intarg_count += (size_al + 3) / 4; +# endif +#endif + break; + + default: + /* Everything else is passed as a 4-byte word in a GPR, either + the object itself or a pointer to it. */ + intarg_count++; + break; + } + } + + if (fparg_count != 0) + flags |= FLAG_FP_ARGUMENTS; + +#if defined(POWERPC_DARWIN64) + /* Space to image the FPR registers, if needed - which includes when they might be + used in a struct return. */ + if (fparg_count != 0 + || ((flags & FLAG_RETURNS_STRUCT) + && (flags & FLAG_RETURNS_FP))) + bytes += NUM_FPR_ARG_REGISTERS * sizeof(double); +#else + /* Space for the FPR registers, if needed. */ + if (fparg_count != 0) + bytes += NUM_FPR_ARG_REGISTERS * sizeof(double); +#endif + + /* Stack space. */ +#ifdef POWERPC64 + if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS) + bytes += (intarg_count + fparg_count) * sizeof(long); +#else + if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS) + bytes += (intarg_count + 2 * fparg_count) * sizeof(long); +#endif + else + bytes += NUM_GPR_ARG_REGISTERS * sizeof(long); + + /* The stack space allocated needs to be a multiple of 16 bytes. */ + bytes = ALIGN(bytes, 16) ; + + cif->flags = flags; + cif->bytes = bytes; + + return FFI_OK; +} + +extern void ffi_call_AIX(extended_cif *, long, unsigned, unsigned *, + void (*fn)(void), void (*fn2)(void)); + +extern void ffi_call_DARWIN(extended_cif *, long, unsigned, unsigned *, + void (*fn)(void), void (*fn2)(void), ffi_type*); + +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return + value address then we need to make one. */ + + if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca (cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_AIX: + ffi_call_AIX(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn, + FFI_FN(ffi_prep_args)); + break; + case FFI_DARWIN: + ffi_call_DARWIN(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn, + FFI_FN(ffi_prep_args), cif->rtype); + break; + default: + FFI_ASSERT(0); + break; + } +} + +static void flush_icache(char *); +static void flush_range(char *, int); + +/* The layout of a function descriptor. A C function pointer really + points to one of these. */ + +typedef struct aix_fd_struct { + void *code_pointer; + void *toc; +} aix_fd; + +/* here I'd like to add the stack frame layout we use in darwin_closure.S + and aix_closure.S + + m32/m64 + + The stack layout looks like this: + + | Additional params... | | Higher address + ~ ~ ~ + | Parameters (at least 8*4/8=32/64) | | NUM_GPR_ARG_REGISTERS + |--------------------------------------------| | + | TOC=R2 (AIX) Reserved (Darwin) 4/8 | | + |--------------------------------------------| | + | Reserved 2*4/8 | | + |--------------------------------------------| | + | Space for callee's LR 4/8 | | + |--------------------------------------------| | + | Saved CR [low word for m64] 4/8 | | + |--------------------------------------------| | + | Current backchain pointer 4/8 |-/ Parent's frame. + |--------------------------------------------| <+ <<< on entry to ffi_closure_ASM + | Result Bytes 16 | | + |--------------------------------------------| | + ~ padding to 16-byte alignment ~ ~ + |--------------------------------------------| | + | NUM_FPR_ARG_REGISTERS slots | | + | here fp13 .. fp1 13*8 | | + |--------------------------------------------| | + | R3..R10 8*4/8=32/64 | | NUM_GPR_ARG_REGISTERS + |--------------------------------------------| | + | TOC=R2 (AIX) Reserved (Darwin) 4/8 | | + |--------------------------------------------| | stack | + | Reserved [compiler,binder] 2*4/8 | | grows | + |--------------------------------------------| | down V + | Space for callee's LR 4/8 | | + |--------------------------------------------| | lower addresses + | Saved CR [low word for m64] 4/8 | | + |--------------------------------------------| | stack pointer here + | Current backchain pointer 4/8 |-/ during + |--------------------------------------------| <<< ffi_closure_ASM. + +*/ + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp; + struct ffi_aix_trampoline_struct *tramp_aix; + aix_fd *fd; + + switch (cif->abi) + { + case FFI_DARWIN: + + FFI_ASSERT (cif->abi == FFI_DARWIN); + + tramp = (unsigned int *) &closure->tramp[0]; +#if defined(POWERPC_DARWIN64) + tramp[0] = 0x7c0802a6; /* mflr r0 */ + tramp[1] = 0x429f0015; /* bcl- 20,4*cr7+so, +0x18 (L1) */ + /* We put the addresses here. */ + tramp[6] = 0x7d6802a6; /*L1: mflr r11 */ + tramp[7] = 0xe98b0000; /* ld r12,0(r11) function address */ + tramp[8] = 0x7c0803a6; /* mtlr r0 */ + tramp[9] = 0x7d8903a6; /* mtctr r12 */ + tramp[10] = 0xe96b0008; /* lwz r11,8(r11) static chain */ + tramp[11] = 0x4e800420; /* bctr */ + + *((unsigned long *)&tramp[2]) = (unsigned long) ffi_closure_ASM; /* function */ + *((unsigned long *)&tramp[4]) = (unsigned long) codeloc; /* context */ +#else + tramp[0] = 0x7c0802a6; /* mflr r0 */ + tramp[1] = 0x429f000d; /* bcl- 20,4*cr7+so,0x10 */ + tramp[4] = 0x7d6802a6; /* mflr r11 */ + tramp[5] = 0x818b0000; /* lwz r12,0(r11) function address */ + tramp[6] = 0x7c0803a6; /* mtlr r0 */ + tramp[7] = 0x7d8903a6; /* mtctr r12 */ + tramp[8] = 0x816b0004; /* lwz r11,4(r11) static chain */ + tramp[9] = 0x4e800420; /* bctr */ + tramp[2] = (unsigned long) ffi_closure_ASM; /* function */ + tramp[3] = (unsigned long) codeloc; /* context */ +#endif + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Flush the icache. Only necessary on Darwin. */ + flush_range(codeloc, FFI_TRAMPOLINE_SIZE); + + break; + + case FFI_AIX: + + tramp_aix = (struct ffi_aix_trampoline_struct *) (closure->tramp); + fd = (aix_fd *)(void *)ffi_closure_ASM; + + FFI_ASSERT (cif->abi == FFI_AIX); + + tramp_aix->code_pointer = fd->code_pointer; + tramp_aix->toc = fd->toc; + tramp_aix->static_chain = codeloc; + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + default: + + FFI_ASSERT(0); + break; + } + return FFI_OK; +} + +static void +flush_icache(char *addr) +{ +#ifndef _AIX + __asm__ volatile ( + "dcbf 0,%0\n" + "\tsync\n" + "\ticbi 0,%0\n" + "\tsync\n" + "\tisync" + : : "r"(addr) : "memory"); +#endif +} + +static void +flush_range(char * addr1, int size) +{ +#define MIN_LINE_SIZE 32 + int i; + for (i = 0; i < size; i += MIN_LINE_SIZE) + flush_icache(addr1+i); + flush_icache(addr1+size-1); +} + +typedef union +{ + float f; + double d; +} ffi_dblfl; + +ffi_type * +ffi_closure_helper_DARWIN (ffi_closure *, void *, + unsigned long *, ffi_dblfl *); + +/* Basically the trampoline invokes ffi_closure_ASM, and on + entry, r11 holds the address of the closure. + After storing the registers that could possibly contain + parameters to be passed into the stack frame and setting + up space for a return value, ffi_closure_ASM invokes the + following helper function to do most of the work. */ + +ffi_type * +ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue, + unsigned long *pgr, ffi_dblfl *pfr) +{ + /* rvalue is the pointer to space for return value in closure assembly + pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM + pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM. */ + + typedef double ldbits[2]; + + union ldu + { + ldbits lb; + long double ld; + }; + + void ** avalue; + ffi_type ** arg_types; + long i, avn; + ffi_cif * cif; + ffi_dblfl * end_pfr = pfr + NUM_FPR_ARG_REGISTERS; + unsigned size_al; +#if defined(POWERPC_DARWIN64) + unsigned fpsused = 0; +#endif + + cif = closure->cif; + avalue = alloca (cif->nargs * sizeof(void *)); + + if (cif->rtype->type == FFI_TYPE_STRUCT) + { +#if defined(POWERPC_DARWIN64) + if (!darwin64_struct_ret_by_value_p (cif->rtype)) + { + /* Won't fit into the regs - return by ref. */ + rvalue = (void *) *pgr; + pgr++; + } +#elif defined(DARWIN_PPC) + if (cif->rtype->size > 4) + { + rvalue = (void *) *pgr; + pgr++; + } +#else /* assume we return by ref. */ + rvalue = (void *) *pgr; + pgr++; +#endif + } + + i = 0; + avn = cif->nargs; + arg_types = cif->arg_types; + + /* Grab the addresses of the arguments from the stack frame. */ + while (i < avn) + { + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: +#if defined(POWERPC64) + avalue[i] = (char *) pgr + 7; +#else + avalue[i] = (char *) pgr + 3; +#endif + pgr++; + break; + + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: +#if defined(POWERPC64) + avalue[i] = (char *) pgr + 6; +#else + avalue[i] = (char *) pgr + 2; +#endif + pgr++; + break; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: +#if defined(POWERPC64) + avalue[i] = (char *) pgr + 4; +#else + case FFI_TYPE_POINTER: + avalue[i] = pgr; +#endif + pgr++; + break; + + case FFI_TYPE_STRUCT: + size_al = arg_types[i]->size; +#if defined(POWERPC_DARWIN64) + pgr = (unsigned long *)ALIGN((char *)pgr, arg_types[i]->alignment); + if (size_al < 3 || size_al == 4) + { + avalue[i] = ((char *)pgr)+8-size_al; + if (arg_types[i]->elements[0]->type == FFI_TYPE_FLOAT + && fpsused < NUM_FPR_ARG_REGISTERS) + { + *(float *)pgr = (float) *(double *)pfr; + pfr++; + fpsused++; + } + } + else + { + if (size_al != 16) + pfr = (ffi_dblfl *) + darwin64_struct_floats_to_mem (arg_types[i], (char *)pgr, + (double *)pfr, &fpsused); + avalue[i] = pgr; + } + pgr += (size_al + 7) / 8; +#else + /* If the first member of the struct is a double, then align + the struct to double-word. */ + if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE) + size_al = ALIGN(arg_types[i]->size, 8); +# if defined(POWERPC64) + FFI_ASSERT (cif->abi != FFI_DARWIN) + avalue[i] = pgr; + pgr += (size_al + 7) / 8; +# else + /* Structures that match the basic modes (QI 1 byte, HI 2 bytes, + SI 4 bytes) are aligned as if they were those modes. */ + if (size_al < 3 && cif->abi == FFI_DARWIN) + avalue[i] = (char*) pgr + 4 - size_al; + else + avalue[i] = pgr; + pgr += (size_al + 3) / 4; +# endif +#endif + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: +#if defined(POWERPC64) + case FFI_TYPE_POINTER: + avalue[i] = pgr; + pgr++; + break; +#else + /* Long long ints are passed in two gpr's. */ + avalue[i] = pgr; + pgr += 2; + break; +#endif + + case FFI_TYPE_FLOAT: + /* A float value consumes a GPR. + There are 13 64bit floating point registers. */ + if (pfr < end_pfr) + { + double temp = pfr->d; + pfr->f = (float) temp; + avalue[i] = pfr; + pfr++; + } + else + { + avalue[i] = pgr; + } + pgr++; + break; + + case FFI_TYPE_DOUBLE: + /* A double value consumes two GPRs. + There are 13 64bit floating point registers. */ + if (pfr < end_pfr) + { + avalue[i] = pfr; + pfr++; + } + else + { + avalue[i] = pgr; + } +#ifdef POWERPC64 + pgr++; +#else + pgr += 2; +#endif + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + + case FFI_TYPE_LONGDOUBLE: +#ifdef POWERPC64 + if (pfr + 1 < end_pfr) + { + avalue[i] = pfr; + pfr += 2; + } + else + { + if (pfr < end_pfr) + { + *pgr = *(unsigned long *) pfr; + pfr++; + } + avalue[i] = pgr; + } + pgr += 2; +#else /* POWERPC64 */ + /* A long double value consumes four GPRs and two FPRs. + There are 13 64bit floating point registers. */ + if (pfr + 1 < end_pfr) + { + avalue[i] = pfr; + pfr += 2; + } + /* Here we have the situation where one part of the long double + is stored in fpr13 and the other part is already on the stack. + We use a union to pass the long double to avalue[i]. */ + else if (pfr + 1 == end_pfr) + { + union ldu temp_ld; + memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits)); + memcpy (&temp_ld.lb[1], pgr + 2, sizeof(ldbits)); + avalue[i] = &temp_ld.ld; + pfr++; + } + else + { + avalue[i] = pgr; + } + pgr += 4; +#endif /* POWERPC64 */ + break; +#endif + default: + FFI_ASSERT(0); + } + i++; + } + + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_ASM to perform return type promotions. */ + return cif->rtype; +} diff --git a/libffi/src/powerpc/ffitarget.h b/libffi/src/powerpc/ffitarget.h new file mode 100644 index 000000000..d17f7312d --- /dev/null +++ b/libffi/src/powerpc/ffitarget.h @@ -0,0 +1,139 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc + Target configuration macros for PowerPC. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- System specific configurations ----------------------------------- */ + +#if defined (POWERPC) && defined (__powerpc64__) /* linux64 */ +#ifndef POWERPC64 +#define POWERPC64 +#endif +#elif defined (POWERPC_DARWIN) && defined (__ppc64__) /* Darwin64 */ +#ifndef POWERPC64 +#define POWERPC64 +#endif +#ifndef POWERPC_DARWIN64 +#define POWERPC_DARWIN64 +#endif +#elif defined (POWERPC_AIX) && defined (__64BIT__) /* AIX64 */ +#ifndef POWERPC64 +#define POWERPC64 +#endif +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + +#ifdef POWERPC + FFI_SYSV, + FFI_GCC_SYSV, + FFI_LINUX64, + FFI_LINUX, + FFI_LINUX_SOFT_FLOAT, +# ifdef POWERPC64 + FFI_DEFAULT_ABI = FFI_LINUX64, +# else +# if (!defined(__NO_FPRS__) && (__LDBL_MANT_DIG__ == 106)) + FFI_DEFAULT_ABI = FFI_LINUX, +# else +# ifdef __NO_FPRS__ + FFI_DEFAULT_ABI = FFI_LINUX_SOFT_FLOAT, +# else + FFI_DEFAULT_ABI = FFI_GCC_SYSV, +# endif +# endif +# endif +#endif + +#ifdef POWERPC_AIX + FFI_AIX, + FFI_DARWIN, + FFI_DEFAULT_ABI = FFI_AIX, +#endif + +#ifdef POWERPC_DARWIN + FFI_AIX, + FFI_DARWIN, + FFI_DEFAULT_ABI = FFI_DARWIN, +#endif + +#ifdef POWERPC_FREEBSD + FFI_SYSV, + FFI_GCC_SYSV, + FFI_LINUX64, + FFI_LINUX, + FFI_LINUX_SOFT_FLOAT, + FFI_DEFAULT_ABI = FFI_SYSV, +#endif + + FFI_LAST_ABI +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 + +/* For additional types like the below, take care about the order in + ppc_closures.S. They must follow after the FFI_TYPE_LAST. */ + +/* Needed for soft-float long-double-128 support. */ +#define FFI_TYPE_UINT128 (FFI_TYPE_LAST + 1) + +/* Needed for FFI_SYSV small structure returns. + We use two flag bits, (FLAG_SYSV_SMST_R3, FLAG_SYSV_SMST_R4) which are + defined in ffi.c, to determine the exact return type and its size. */ +#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2) + +#if defined(POWERPC64) || defined(POWERPC_AIX) +# if defined(POWERPC_DARWIN64) +# define FFI_TRAMPOLINE_SIZE 48 +# else +# define FFI_TRAMPOLINE_SIZE 24 +# endif +#else /* POWERPC || POWERPC_AIX */ +# define FFI_TRAMPOLINE_SIZE 40 +#endif + +#ifndef LIBFFI_ASM +#if defined(POWERPC_DARWIN) || defined(POWERPC_AIX) +struct ffi_aix_trampoline_struct { + void * code_pointer; /* Pointer to ffi_closure_ASM */ + void * toc; /* TOC */ + void * static_chain; /* Pointer to closure */ +}; +#endif +#endif + +#endif diff --git a/libffi/src/powerpc/linux64.S b/libffi/src/powerpc/linux64.S new file mode 100644 index 000000000..57b56cbb2 --- /dev/null +++ b/libffi/src/powerpc/linux64.S @@ -0,0 +1,187 @@ +/* ----------------------------------------------------------------------- + sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com> + Copyright (c) 2008 Red Hat, Inc. + + PowerPC64 Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#ifdef __powerpc64__ + .hidden ffi_call_LINUX64, .ffi_call_LINUX64 + .globl ffi_call_LINUX64, .ffi_call_LINUX64 + .section ".opd","aw" + .align 3 +ffi_call_LINUX64: + .quad .ffi_call_LINUX64,.TOC.@tocbase,0 + .size ffi_call_LINUX64,24 + .type .ffi_call_LINUX64,@function + .text +.ffi_call_LINUX64: +.LFB1: + mflr %r0 + std %r28, -32(%r1) + std %r29, -24(%r1) + std %r30, -16(%r1) + std %r31, -8(%r1) + std %r0, 16(%r1) + + mr %r28, %r1 /* our AP. */ +.LCFI0: + stdux %r1, %r1, %r4 + mr %r31, %r5 /* flags, */ + mr %r30, %r6 /* rvalue, */ + mr %r29, %r7 /* function address. */ + std %r2, 40(%r1) + + /* Call ffi_prep_args64. */ + mr %r4, %r1 + bl .ffi_prep_args64 + + ld %r0, 0(%r29) + ld %r2, 8(%r29) + ld %r11, 16(%r29) + + /* Now do the call. */ + /* Set up cr1 with bits 4-7 of the flags. */ + mtcrf 0x40, %r31 + + /* Get the address to call into CTR. */ + mtctr %r0 + /* Load all those argument registers. */ + ld %r3, -32-(8*8)(%r28) + ld %r4, -32-(7*8)(%r28) + ld %r5, -32-(6*8)(%r28) + ld %r6, -32-(5*8)(%r28) + bf- 5, 1f + ld %r7, -32-(4*8)(%r28) + ld %r8, -32-(3*8)(%r28) + ld %r9, -32-(2*8)(%r28) + ld %r10, -32-(1*8)(%r28) +1: + + /* Load all the FP registers. */ + bf- 6, 2f + lfd %f1, -32-(21*8)(%r28) + lfd %f2, -32-(20*8)(%r28) + lfd %f3, -32-(19*8)(%r28) + lfd %f4, -32-(18*8)(%r28) + lfd %f5, -32-(17*8)(%r28) + lfd %f6, -32-(16*8)(%r28) + lfd %f7, -32-(15*8)(%r28) + lfd %f8, -32-(14*8)(%r28) + lfd %f9, -32-(13*8)(%r28) + lfd %f10, -32-(12*8)(%r28) + lfd %f11, -32-(11*8)(%r28) + lfd %f12, -32-(10*8)(%r28) + lfd %f13, -32-(9*8)(%r28) +2: + + /* Make the call. */ + bctrl + + /* This must follow the call immediately, the unwinder + uses this to find out if r2 has been saved or not. */ + ld %r2, 40(%r1) + + /* Now, deal with the return value. */ + mtcrf 0x01, %r31 + bt- 30, .Ldone_return_value + bt- 29, .Lfp_return_value + std %r3, 0(%r30) + /* Fall through... */ + +.Ldone_return_value: + /* Restore the registers we used and return. */ + mr %r1, %r28 + ld %r0, 16(%r28) + ld %r28, -32(%r1) + mtlr %r0 + ld %r29, -24(%r1) + ld %r30, -16(%r1) + ld %r31, -8(%r1) + blr + +.Lfp_return_value: + bf 28, .Lfloat_return_value + stfd %f1, 0(%r30) + mtcrf 0x02, %r31 /* cr6 */ + bf 27, .Ldone_return_value + stfd %f2, 8(%r30) + b .Ldone_return_value +.Lfloat_return_value: + stfs %f1, 0(%r30) + b .Ldone_return_value +.LFE1: + .long 0 + .byte 0,12,0,1,128,4,0,0 + .size .ffi_call_LINUX64,.-.ffi_call_LINUX64 + + .section .eh_frame,EH_FRAME_FLAGS,@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry +.LSCIE1: + .4byte 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version + .ascii "zR\0" # CIE Augmentation + .uleb128 0x1 # CIE Code Alignment Factor + .sleb128 -8 # CIE Data Alignment Factor + .byte 0x41 # CIE RA Column + .uleb128 0x1 # Augmentation size + .byte 0x14 # FDE Encoding (pcrel udata8) + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1 + .uleb128 0x0 + .align 3 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 # FDE Length +.LASFDE1: + .4byte .LASFDE1-.Lframe1 # FDE CIE offset + .8byte .LFB1-. # FDE initial location + .8byte .LFE1-.LFB1 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x2 # DW_CFA_advance_loc1 + .byte .LCFI0-.LFB1 + .byte 0xd # DW_CFA_def_cfa_register + .uleb128 0x1c + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x41 + .sleb128 -2 + .byte 0x9f # DW_CFA_offset, column 0x1f + .uleb128 0x1 + .byte 0x9e # DW_CFA_offset, column 0x1e + .uleb128 0x2 + .byte 0x9d # DW_CFA_offset, column 0x1d + .uleb128 0x3 + .byte 0x9c # DW_CFA_offset, column 0x1c + .uleb128 0x4 + .align 3 +.LEFDE1: +#endif + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/powerpc/linux64_closure.S b/libffi/src/powerpc/linux64_closure.S new file mode 100644 index 000000000..f7aa2c98e --- /dev/null +++ b/libffi/src/powerpc/linux64_closure.S @@ -0,0 +1,236 @@ +/* ----------------------------------------------------------------------- + sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com> + Copyright (c) 2008 Red Hat, Inc. + + PowerPC64 Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + .file "linux64_closure.S" + +#ifdef __powerpc64__ + FFI_HIDDEN (ffi_closure_LINUX64) + FFI_HIDDEN (.ffi_closure_LINUX64) + .globl ffi_closure_LINUX64, .ffi_closure_LINUX64 + .section ".opd","aw" + .align 3 +ffi_closure_LINUX64: + .quad .ffi_closure_LINUX64,.TOC.@tocbase,0 + .size ffi_closure_LINUX64,24 + .type .ffi_closure_LINUX64,@function + .text +.ffi_closure_LINUX64: +.LFB1: + # save general regs into parm save area + std %r3, 48(%r1) + std %r4, 56(%r1) + std %r5, 64(%r1) + std %r6, 72(%r1) + mflr %r0 + + std %r7, 80(%r1) + std %r8, 88(%r1) + std %r9, 96(%r1) + std %r10, 104(%r1) + std %r0, 16(%r1) + + # mandatory 48 bytes special reg save area + 64 bytes parm save area + # + 16 bytes retval area + 13*8 bytes fpr save area + round to 16 + stdu %r1, -240(%r1) +.LCFI0: + + # next save fpr 1 to fpr 13 + stfd %f1, 128+(0*8)(%r1) + stfd %f2, 128+(1*8)(%r1) + stfd %f3, 128+(2*8)(%r1) + stfd %f4, 128+(3*8)(%r1) + stfd %f5, 128+(4*8)(%r1) + stfd %f6, 128+(5*8)(%r1) + stfd %f7, 128+(6*8)(%r1) + stfd %f8, 128+(7*8)(%r1) + stfd %f9, 128+(8*8)(%r1) + stfd %f10, 128+(9*8)(%r1) + stfd %f11, 128+(10*8)(%r1) + stfd %f12, 128+(11*8)(%r1) + stfd %f13, 128+(12*8)(%r1) + + # set up registers for the routine that actually does the work + # get the context pointer from the trampoline + mr %r3, %r11 + + # now load up the pointer to the result storage + addi %r4, %r1, 112 + + # now load up the pointer to the parameter save area + # in the previous frame + addi %r5, %r1, 240 + 48 + + # now load up the pointer to the saved fpr registers */ + addi %r6, %r1, 128 + + # make the call + bl .ffi_closure_helper_LINUX64 +.Lret: + + # now r3 contains the return type + # so use it to look up in a table + # so we know how to deal with each type + + # look up the proper starting point in table + # by using return type as offset + mflr %r4 # move address of .Lret to r4 + sldi %r3, %r3, 4 # now multiply return type by 16 + addi %r4, %r4, .Lret_type0 - .Lret + ld %r0, 240+16(%r1) + add %r3, %r3, %r4 # add contents of table to table address + mtctr %r3 + bctr # jump to it + +# Each of the ret_typeX code fragments has to be exactly 16 bytes long +# (4 instructions). For cache effectiveness we align to a 16 byte boundary +# first. + .align 4 + +.Lret_type0: +# case FFI_TYPE_VOID + mtlr %r0 + addi %r1, %r1, 240 + blr + nop +# case FFI_TYPE_INT + lwa %r3, 112+4(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_FLOAT + lfs %f1, 112+0(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_DOUBLE + lfd %f1, 112+0(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_LONGDOUBLE + lfd %f1, 112+0(%r1) + mtlr %r0 + lfd %f2, 112+8(%r1) + b .Lfinish +# case FFI_TYPE_UINT8 + lbz %r3, 112+7(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_SINT8 + lbz %r3, 112+7(%r1) + extsb %r3,%r3 + mtlr %r0 + b .Lfinish +# case FFI_TYPE_UINT16 + lhz %r3, 112+6(%r1) + mtlr %r0 +.Lfinish: + addi %r1, %r1, 240 + blr +# case FFI_TYPE_SINT16 + lha %r3, 112+6(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_UINT32 + lwz %r3, 112+4(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_SINT32 + lwa %r3, 112+4(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_UINT64 + ld %r3, 112+0(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_SINT64 + ld %r3, 112+0(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# case FFI_TYPE_STRUCT + mtlr %r0 + addi %r1, %r1, 240 + blr + nop +# case FFI_TYPE_POINTER + ld %r3, 112+0(%r1) + mtlr %r0 + addi %r1, %r1, 240 + blr +# esac +.LFE1: + .long 0 + .byte 0,12,0,1,128,0,0,0 + .size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64 + + .section .eh_frame,EH_FRAME_FLAGS,@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry +.LSCIE1: + .4byte 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version + .ascii "zR\0" # CIE Augmentation + .uleb128 0x1 # CIE Code Alignment Factor + .sleb128 -8 # CIE Data Alignment Factor + .byte 0x41 # CIE RA Column + .uleb128 0x1 # Augmentation size + .byte 0x14 # FDE Encoding (pcrel udata8) + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1 + .uleb128 0x0 + .align 3 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 # FDE Length +.LASFDE1: + .4byte .LASFDE1-.Lframe1 # FDE CIE offset + .8byte .LFB1-. # FDE initial location + .8byte .LFE1-.LFB1 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x2 # DW_CFA_advance_loc1 + .byte .LCFI0-.LFB1 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 240 + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x41 + .sleb128 -2 + .align 3 +.LEFDE1: +#endif + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/powerpc/ppc_closure.S b/libffi/src/powerpc/ppc_closure.S new file mode 100644 index 000000000..56f7d1af2 --- /dev/null +++ b/libffi/src/powerpc/ppc_closure.S @@ -0,0 +1,327 @@ +/* ----------------------------------------------------------------------- + sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com> + Copyright (c) 2008 Red Hat, Inc. + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include <powerpc/asm.h> + + .file "ppc_closure.S" + +#ifndef __powerpc64__ + +ENTRY(ffi_closure_SYSV) +.LFB1: + stwu %r1,-144(%r1) +.LCFI0: + mflr %r0 +.LCFI1: + stw %r0,148(%r1) + +# we want to build up an areas for the parameters passed +# in registers (both floating point and integer) + + # so first save gpr 3 to gpr 10 (aligned to 4) + stw %r3, 16(%r1) + stw %r4, 20(%r1) + stw %r5, 24(%r1) + stw %r6, 28(%r1) + stw %r7, 32(%r1) + stw %r8, 36(%r1) + stw %r9, 40(%r1) + stw %r10,44(%r1) + +#ifndef __NO_FPRS__ + # next save fpr 1 to fpr 8 (aligned to 8) + stfd %f1, 48(%r1) + stfd %f2, 56(%r1) + stfd %f3, 64(%r1) + stfd %f4, 72(%r1) + stfd %f5, 80(%r1) + stfd %f6, 88(%r1) + stfd %f7, 96(%r1) + stfd %f8, 104(%r1) +#endif + + # set up registers for the routine that actually does the work + # get the context pointer from the trampoline + mr %r3,%r11 + + # now load up the pointer to the result storage + addi %r4,%r1,112 + + # now load up the pointer to the saved gpr registers + addi %r5,%r1,16 + + # now load up the pointer to the saved fpr registers */ + addi %r6,%r1,48 + + # now load up the pointer to the outgoing parameter + # stack in the previous frame + # i.e. the previous frame pointer + 8 + addi %r7,%r1,152 + + # make the call + bl ffi_closure_helper_SYSV@local +.Lret: + # now r3 contains the return type + # so use it to look up in a table + # so we know how to deal with each type + + # look up the proper starting point in table + # by using return type as offset + + mflr %r4 # move address of .Lret to r4 + slwi %r3,%r3,4 # now multiply return type by 16 + addi %r4, %r4, .Lret_type0 - .Lret + lwz %r0,148(%r1) + add %r3,%r3,%r4 # add contents of table to table address + mtctr %r3 + bctr # jump to it +.LFE1: + +# Each of the ret_typeX code fragments has to be exactly 16 bytes long +# (4 instructions). For cache effectiveness we align to a 16 byte boundary +# first. + .align 4 +# case FFI_TYPE_VOID +.Lret_type0: + mtlr %r0 + addi %r1,%r1,144 + blr + nop + +# case FFI_TYPE_INT + lwz %r3,112+0(%r1) + mtlr %r0 +.Lfinish: + addi %r1,%r1,144 + blr + +# case FFI_TYPE_FLOAT + lfs %f1,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_DOUBLE + lfd %f1,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_LONGDOUBLE + lfd %f1,112+0(%r1) + lfd %f2,112+8(%r1) + mtlr %r0 + b .Lfinish + +# case FFI_TYPE_UINT8 + lbz %r3,112+3(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_SINT8 + lbz %r3,112+3(%r1) + extsb %r3,%r3 + mtlr %r0 + b .Lfinish + +# case FFI_TYPE_UINT16 + lhz %r3,112+2(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_SINT16 + lha %r3,112+2(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_UINT32 + lwz %r3,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_SINT32 + lwz %r3,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_UINT64 + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + mtlr %r0 + b .Lfinish + +# case FFI_TYPE_SINT64 + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + mtlr %r0 + b .Lfinish + +# case FFI_TYPE_STRUCT + mtlr %r0 + addi %r1,%r1,144 + blr + nop + +# case FFI_TYPE_POINTER + lwz %r3,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_TYPE_UINT128 + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + lwz %r5,112+8(%r1) + bl .Luint128 + +# The return types below are only used when the ABI type is FFI_SYSV. +# case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct. + lbz %r3,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 2. Two byte struct. + lhz %r3,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 3. Three byte struct. + lwz %r3,112+0(%r1) + srwi %r3,%r3,8 + mtlr %r0 + b .Lfinish + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 4. Four byte struct. + lwz %r3,112+0(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 5. Five byte struct. + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + li %r5,24 + b .Lstruct567 + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 6. Six byte struct. + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + li %r5,16 + b .Lstruct567 + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 7. Seven byte struct. + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + li %r5,8 + b .Lstruct567 + +# case FFI_SYSV_TYPE_SMALL_STRUCT + 8. Eight byte struct. + lwz %r3,112+0(%r1) + lwz %r4,112+4(%r1) + mtlr %r0 + b .Lfinish + +.Lstruct567: + subfic %r6,%r5,32 + srw %r4,%r4,%r5 + slw %r6,%r3,%r6 + srw %r3,%r3,%r5 + or %r4,%r6,%r4 + mtlr %r0 + addi %r1,%r1,144 + blr + +.Luint128: + lwz %r6,112+12(%r1) + mtlr %r0 + addi %r1,%r1,144 + blr + +END(ffi_closure_SYSV) + + .section ".eh_frame",EH_FRAME_FLAGS,@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry +.LSCIE1: + .4byte 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version +#if defined _RELOCATABLE || defined __PIC__ + .ascii "zR\0" # CIE Augmentation +#else + .ascii "\0" # CIE Augmentation +#endif + .uleb128 0x1 # CIE Code Alignment Factor + .sleb128 -4 # CIE Data Alignment Factor + .byte 0x41 # CIE RA Column +#if defined _RELOCATABLE || defined __PIC__ + .uleb128 0x1 # Augmentation size + .byte 0x1b # FDE Encoding (pcrel sdata4) +#endif + .byte 0xc # DW_CFA_def_cfa + .uleb128 0x1 + .uleb128 0x0 + .align 2 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 # FDE Length +.LASFDE1: + .4byte .LASFDE1-.Lframe1 # FDE CIE offset +#if defined _RELOCATABLE || defined __PIC__ + .4byte .LFB1-. # FDE initial location +#else + .4byte .LFB1 # FDE initial location +#endif + .4byte .LFE1-.LFB1 # FDE address range +#if defined _RELOCATABLE || defined __PIC__ + .uleb128 0x0 # Augmentation size +#endif + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI0-.LFB1 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 144 + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI1-.LCFI0 + .byte 0x11 # DW_CFA_offset_extended_sf + .uleb128 0x41 + .sleb128 -1 + .align 2 +.LEFDE1: + +#endif + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/powerpc/sysv.S b/libffi/src/powerpc/sysv.S new file mode 100644 index 000000000..96ea22b0b --- /dev/null +++ b/libffi/src/powerpc/sysv.S @@ -0,0 +1,219 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 1998 Geoffrey Keating + Copyright (C) 2007 Free Software Foundation, Inc + + PowerPC Assembly glue. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include <powerpc/asm.h> + +#ifndef __powerpc64__ + .globl ffi_prep_args_SYSV +ENTRY(ffi_call_SYSV) +.LFB1: + /* Save the old stack pointer as AP. */ + mr %r8,%r1 + +.LCFI0: + /* Allocate the stack space we need. */ + stwux %r1,%r1,%r4 + /* Save registers we use. */ + mflr %r9 + stw %r28,-16(%r8) +.LCFI1: + stw %r29,-12(%r8) +.LCFI2: + stw %r30, -8(%r8) +.LCFI3: + stw %r31, -4(%r8) +.LCFI4: + stw %r9, 4(%r8) +.LCFI5: + + /* Save arguments over call... */ + mr %r31,%r5 /* flags, */ + mr %r30,%r6 /* rvalue, */ + mr %r29,%r7 /* function address, */ + mr %r28,%r8 /* our AP. */ +.LCFI6: + + /* Call ffi_prep_args_SYSV. */ + mr %r4,%r1 + bl ffi_prep_args_SYSV@local + + /* Now do the call. */ + /* Set up cr1 with bits 4-7 of the flags. */ + mtcrf 0x40,%r31 + /* Get the address to call into CTR. */ + mtctr %r29 + /* Load all those argument registers. */ + lwz %r3,-16-(8*4)(%r28) + lwz %r4,-16-(7*4)(%r28) + lwz %r5,-16-(6*4)(%r28) + lwz %r6,-16-(5*4)(%r28) + bf- 5,1f + nop + lwz %r7,-16-(4*4)(%r28) + lwz %r8,-16-(3*4)(%r28) + lwz %r9,-16-(2*4)(%r28) + lwz %r10,-16-(1*4)(%r28) + nop +1: + + /* Load all the FP registers. */ + bf- 6,2f + lfd %f1,-16-(8*4)-(8*8)(%r28) + lfd %f2,-16-(8*4)-(7*8)(%r28) + lfd %f3,-16-(8*4)-(6*8)(%r28) + lfd %f4,-16-(8*4)-(5*8)(%r28) + nop + lfd %f5,-16-(8*4)-(4*8)(%r28) + lfd %f6,-16-(8*4)-(3*8)(%r28) + lfd %f7,-16-(8*4)-(2*8)(%r28) + lfd %f8,-16-(8*4)-(1*8)(%r28) +2: + + /* Make the call. */ + bctrl + + /* Now, deal with the return value. */ + mtcrf 0x01,%r31 /* cr7 */ + bt- 31,L(small_struct_return_value) + bt- 30,L(done_return_value) + bt- 29,L(fp_return_value) + stw %r3,0(%r30) + bf+ 28,L(done_return_value) + stw %r4,4(%r30) + mtcrf 0x02,%r31 /* cr6 */ + bf 27,L(done_return_value) + stw %r5,8(%r30) + stw %r6,12(%r30) + /* Fall through... */ + +L(done_return_value): + /* Restore the registers we used and return. */ + lwz %r9, 4(%r28) + lwz %r31, -4(%r28) + mtlr %r9 + lwz %r30, -8(%r28) + lwz %r29,-12(%r28) + lwz %r28,-16(%r28) + lwz %r1,0(%r1) + blr + +L(fp_return_value): + bf 28,L(float_return_value) + stfd %f1,0(%r30) + mtcrf 0x02,%r31 /* cr6 */ + bf 27,L(done_return_value) + stfd %f2,8(%r30) + b L(done_return_value) +L(float_return_value): + stfs %f1,0(%r30) + b L(done_return_value) + +L(small_struct_return_value): + extrwi %r6,%r31,2,19 /* number of bytes padding = shift/8 */ + mtcrf 0x02,%r31 /* copy flags to cr[24:27] (cr6) */ + extrwi %r5,%r31,5,19 /* r5 <- number of bits of padding */ + subfic %r6,%r6,4 /* r6 <- number of useful bytes in r3 */ + bf- 25,L(done_return_value) /* struct in r3 ? if not, done. */ +/* smst_one_register: */ + slw %r3,%r3,%r5 /* Left-justify value in r3 */ + mtxer %r6 /* move byte count to XER ... */ + stswx %r3,0,%r30 /* ... and store that many bytes */ + bf+ 26,L(done_return_value) /* struct in r3:r4 ? */ + add %r6,%r6,%r30 /* adjust pointer */ + stswi %r4,%r6,4 /* store last four bytes */ + b L(done_return_value) + +.LFE1: +END(ffi_call_SYSV) + + .section ".eh_frame",EH_FRAME_FLAGS,@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 /* Length of Common Information Entry */ +.LSCIE1: + .4byte 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ +#if defined _RELOCATABLE || defined __PIC__ + .ascii "zR\0" /* CIE Augmentation */ +#else + .ascii "\0" /* CIE Augmentation */ +#endif + .uleb128 0x1 /* CIE Code Alignment Factor */ + .sleb128 -4 /* CIE Data Alignment Factor */ + .byte 0x41 /* CIE RA Column */ +#if defined _RELOCATABLE || defined __PIC__ + .uleb128 0x1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ +#endif + .byte 0xc /* DW_CFA_def_cfa */ + .uleb128 0x1 + .uleb128 0x0 + .align 2 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .4byte .LASFDE1-.Lframe1 /* FDE CIE offset */ +#if defined _RELOCATABLE || defined __PIC__ + .4byte .LFB1-. /* FDE initial location */ +#else + .4byte .LFB1 /* FDE initial location */ +#endif + .4byte .LFE1-.LFB1 /* FDE address range */ +#if defined _RELOCATABLE || defined __PIC__ + .uleb128 0x0 /* Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI0-.LFB1 + .byte 0xd /* DW_CFA_def_cfa_register */ + .uleb128 0x08 + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI5-.LCFI0 + .byte 0x11 /* DW_CFA_offset_extended_sf */ + .uleb128 0x41 + .sleb128 -1 + .byte 0x9f /* DW_CFA_offset, column 0x1f */ + .uleb128 0x1 + .byte 0x9e /* DW_CFA_offset, column 0x1e */ + .uleb128 0x2 + .byte 0x9d /* DW_CFA_offset, column 0x1d */ + .uleb128 0x3 + .byte 0x9c /* DW_CFA_offset, column 0x1c */ + .uleb128 0x4 + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI6-.LCFI5 + .byte 0xd /* DW_CFA_def_cfa_register */ + .uleb128 0x1c + .align 2 +.LEFDE1: +#endif + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/prep_cif.c b/libffi/src/prep_cif.c new file mode 100644 index 000000000..c1c3b9a6c --- /dev/null +++ b/libffi/src/prep_cif.c @@ -0,0 +1,171 @@ +/* ----------------------------------------------------------------------- + prep_cif.c - Copyright (c) 1996, 1998, 2007 Red Hat, Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> + +/* Round up to FFI_SIZEOF_ARG. */ + +#define STACK_ARG_SIZE(x) ALIGN(x, FFI_SIZEOF_ARG) + +/* Perform machine independent initialization of aggregate type + specifications. */ + +static ffi_status initialize_aggregate(ffi_type *arg) +{ + ffi_type **ptr; + + FFI_ASSERT(arg != NULL); + + FFI_ASSERT(arg->elements != NULL); + FFI_ASSERT(arg->size == 0); + FFI_ASSERT(arg->alignment == 0); + + ptr = &(arg->elements[0]); + + while ((*ptr) != NULL) + { + if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK)) + return FFI_BAD_TYPEDEF; + + /* Perform a sanity check on the argument type */ + FFI_ASSERT_VALID_TYPE(*ptr); + + arg->size = ALIGN(arg->size, (*ptr)->alignment); + arg->size += (*ptr)->size; + + arg->alignment = (arg->alignment > (*ptr)->alignment) ? + arg->alignment : (*ptr)->alignment; + + ptr++; + } + + /* Structure size includes tail padding. This is important for + structures that fit in one register on ABIs like the PowerPC64 + Linux ABI that right justify small structs in a register. + It's also needed for nested structure layout, for example + struct A { long a; char b; }; struct B { struct A x; char y; }; + should find y at an offset of 2*sizeof(long) and result in a + total size of 3*sizeof(long). */ + arg->size = ALIGN (arg->size, arg->alignment); + + if (arg->size == 0) + return FFI_BAD_TYPEDEF; + else + return FFI_OK; +} + +#ifndef __CRIS__ +/* The CRIS ABI specifies structure elements to have byte + alignment only, so it completely overrides this functions, + which assumes "natural" alignment and padding. */ + +/* Perform machine independent ffi_cif preparation, then call + machine dependent routine. */ + +ffi_status ffi_prep_cif(ffi_cif *cif, ffi_abi abi, unsigned int nargs, + ffi_type *rtype, ffi_type **atypes) +{ + unsigned bytes = 0; + unsigned int i; + ffi_type **ptr; + + FFI_ASSERT(cif != NULL); + FFI_ASSERT((abi > FFI_FIRST_ABI) && (abi <= FFI_DEFAULT_ABI)); + + cif->abi = abi; + cif->arg_types = atypes; + cif->nargs = nargs; + cif->rtype = rtype; + + cif->flags = 0; + + /* Initialize the return type if necessary */ + if ((cif->rtype->size == 0) && (initialize_aggregate(cif->rtype) != FFI_OK)) + return FFI_BAD_TYPEDEF; + + /* Perform a sanity check on the return type */ + FFI_ASSERT_VALID_TYPE(cif->rtype); + + /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */ +#if !defined M68K && !defined __i386__ && !defined __x86_64__ && !defined S390 && !defined PA + /* Make space for the return structure pointer */ + if (cif->rtype->type == FFI_TYPE_STRUCT +#ifdef SPARC + && (cif->abi != FFI_V9 || cif->rtype->size > 32) +#endif + ) + bytes = STACK_ARG_SIZE(sizeof(void*)); +#endif + + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + + /* Initialize any uninitialized aggregate type definitions */ + if (((*ptr)->size == 0) && (initialize_aggregate((*ptr)) != FFI_OK)) + return FFI_BAD_TYPEDEF; + + /* Perform a sanity check on the argument type, do this + check after the initialization. */ + FFI_ASSERT_VALID_TYPE(*ptr); + +#if !defined __i386__ && !defined __x86_64__ && !defined S390 && !defined PA +#ifdef SPARC + if (((*ptr)->type == FFI_TYPE_STRUCT + && ((*ptr)->size > 16 || cif->abi != FFI_V9)) + || ((*ptr)->type == FFI_TYPE_LONGDOUBLE + && cif->abi != FFI_V9)) + bytes += sizeof(void*); + else +#endif + { + /* Add any padding if necessary */ + if (((*ptr)->alignment - 1) & bytes) + bytes = ALIGN(bytes, (*ptr)->alignment); + + bytes += STACK_ARG_SIZE((*ptr)->size); + } +#endif + } + + cif->bytes = bytes; + + /* Perform machine dependent cif processing */ + return ffi_prep_cif_machdep(cif); +} +#endif /* not __CRIS__ */ + +#if FFI_CLOSURES + +ffi_status +ffi_prep_closure (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data) +{ + return ffi_prep_closure_loc (closure, cif, fun, user_data, closure); +} + +#endif diff --git a/libffi/src/raw_api.c b/libffi/src/raw_api.c new file mode 100644 index 000000000..ce21372e2 --- /dev/null +++ b/libffi/src/raw_api.c @@ -0,0 +1,254 @@ +/* ----------------------------------------------------------------------- + raw_api.c - Copyright (c) 1999, 2008 Red Hat, Inc. + + Author: Kresten Krab Thorup <krab@gnu.org> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +/* This file defines generic functions for use with the raw api. */ + +#include <ffi.h> +#include <ffi_common.h> + +#if !FFI_NO_RAW_API + +size_t +ffi_raw_size (ffi_cif *cif) +{ + size_t result = 0; + int i; + + ffi_type **at = cif->arg_types; + + for (i = cif->nargs-1; i >= 0; i--, at++) + { +#if !FFI_NO_STRUCTS + if ((*at)->type == FFI_TYPE_STRUCT) + result += ALIGN (sizeof (void*), FFI_SIZEOF_ARG); + else +#endif + result += ALIGN ((*at)->size, FFI_SIZEOF_ARG); + } + + return result; +} + + +void +ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args) +{ + unsigned i; + ffi_type **tp = cif->arg_types; + +#if WORDS_BIGENDIAN + + for (i = 0; i < cif->nargs; i++, tp++, args++) + { + switch ((*tp)->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 1); + break; + + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 2); + break; + +#if FFI_SIZEOF_ARG >= 4 + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + *args = (void*) ((char*)(raw++) + FFI_SIZEOF_ARG - 4); + break; +#endif + +#if !FFI_NO_STRUCTS + case FFI_TYPE_STRUCT: + *args = (raw++)->ptr; + break; +#endif + + case FFI_TYPE_POINTER: + *args = (void*) &(raw++)->ptr; + break; + + default: + *args = raw; + raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + } + } + +#else /* WORDS_BIGENDIAN */ + +#if !PDP + + /* then assume little endian */ + for (i = 0; i < cif->nargs; i++, tp++, args++) + { +#if !FFI_NO_STRUCTS + if ((*tp)->type == FFI_TYPE_STRUCT) + { + *args = (raw++)->ptr; + } + else +#endif + { + *args = (void*) raw; + raw += ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*); + } + } + +#else +#error "pdp endian not supported" +#endif /* ! PDP */ + +#endif /* WORDS_BIGENDIAN */ +} + +void +ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw) +{ + unsigned i; + ffi_type **tp = cif->arg_types; + + for (i = 0; i < cif->nargs; i++, tp++, args++) + { + switch ((*tp)->type) + { + case FFI_TYPE_UINT8: + (raw++)->uint = *(UINT8*) (*args); + break; + + case FFI_TYPE_SINT8: + (raw++)->sint = *(SINT8*) (*args); + break; + + case FFI_TYPE_UINT16: + (raw++)->uint = *(UINT16*) (*args); + break; + + case FFI_TYPE_SINT16: + (raw++)->sint = *(SINT16*) (*args); + break; + +#if FFI_SIZEOF_ARG >= 4 + case FFI_TYPE_UINT32: + (raw++)->uint = *(UINT32*) (*args); + break; + + case FFI_TYPE_SINT32: + (raw++)->sint = *(SINT32*) (*args); + break; +#endif + +#if !FFI_NO_STRUCTS + case FFI_TYPE_STRUCT: + (raw++)->ptr = *args; + break; +#endif + + case FFI_TYPE_POINTER: + (raw++)->ptr = **(void***) args; + break; + + default: + memcpy ((void*) raw->data, (void*)*args, (*tp)->size); + raw += ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + } + } +} + +#if !FFI_NATIVE_RAW_API + + +/* This is a generic definition of ffi_raw_call, to be used if the + * native system does not provide a machine-specific implementation. + * Having this, allows code to be written for the raw API, without + * the need for system-specific code to handle input in that format; + * these following couple of functions will handle the translation forth + * and back automatically. */ + +void ffi_raw_call (ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *raw) +{ + void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); + ffi_raw_to_ptrarray (cif, raw, avalue); + ffi_call (cif, fn, rvalue, avalue); +} + +#if FFI_CLOSURES /* base system provides closures */ + +static void +ffi_translate_args (ffi_cif *cif, void *rvalue, + void **avalue, void *user_data) +{ + ffi_raw *raw = (ffi_raw*)alloca (ffi_raw_size (cif)); + ffi_raw_closure *cl = (ffi_raw_closure*)user_data; + + ffi_ptrarray_to_raw (cif, avalue, raw); + (*cl->fun) (cif, rvalue, raw, cl->user_data); +} + +ffi_status +ffi_prep_raw_closure_loc (ffi_raw_closure* cl, + ffi_cif *cif, + void (*fun)(ffi_cif*,void*,ffi_raw*,void*), + void *user_data, + void *codeloc) +{ + ffi_status status; + + status = ffi_prep_closure_loc ((ffi_closure*) cl, + cif, + &ffi_translate_args, + codeloc, + codeloc); + if (status == FFI_OK) + { + cl->fun = fun; + cl->user_data = user_data; + } + + return status; +} + +#endif /* FFI_CLOSURES */ +#endif /* !FFI_NATIVE_RAW_API */ + +#if FFI_CLOSURES + +/* Again, here is the generic version of ffi_prep_raw_closure, which + * will install an intermediate "hub" for translation of arguments from + * the pointer-array format, to the raw format */ + +ffi_status +ffi_prep_raw_closure (ffi_raw_closure* cl, + ffi_cif *cif, + void (*fun)(ffi_cif*,void*,ffi_raw*,void*), + void *user_data) +{ + return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl); +} + +#endif /* FFI_CLOSURES */ + +#endif /* !FFI_NO_RAW_API */ diff --git a/libffi/src/s390/ffi.c b/libffi/src/s390/ffi.c new file mode 100644 index 000000000..ca2675bc8 --- /dev/null +++ b/libffi/src/s390/ffi.c @@ -0,0 +1,780 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2000, 2007 Software AG + Copyright (c) 2008 Red Hat, Inc + + S390 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ +/*====================================================================*/ +/* Includes */ +/* -------- */ +/*====================================================================*/ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdio.h> + +/*====================== End of Includes =============================*/ + +/*====================================================================*/ +/* Defines */ +/* ------- */ +/*====================================================================*/ + +/* Maximum number of GPRs available for argument passing. */ +#define MAX_GPRARGS 5 + +/* Maximum number of FPRs available for argument passing. */ +#ifdef __s390x__ +#define MAX_FPRARGS 4 +#else +#define MAX_FPRARGS 2 +#endif + +/* Round to multiple of 16. */ +#define ROUND_SIZE(size) (((size) + 15) & ~15) + +/* If these values change, sysv.S must be adapted! */ +#define FFI390_RET_VOID 0 +#define FFI390_RET_STRUCT 1 +#define FFI390_RET_FLOAT 2 +#define FFI390_RET_DOUBLE 3 +#define FFI390_RET_INT32 4 +#define FFI390_RET_INT64 5 + +/*===================== End of Defines ===============================*/ + +/*====================================================================*/ +/* Prototypes */ +/* ---------- */ +/*====================================================================*/ + +static void ffi_prep_args (unsigned char *, extended_cif *); +void +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) +__attribute__ ((visibility ("hidden"))) +#endif +ffi_closure_helper_SYSV (ffi_closure *, unsigned long *, + unsigned long long *, unsigned long *); + +/*====================== End of Prototypes ===========================*/ + +/*====================================================================*/ +/* Externals */ +/* --------- */ +/*====================================================================*/ + +extern void ffi_call_SYSV(unsigned, + extended_cif *, + void (*)(unsigned char *, extended_cif *), + unsigned, + void *, + void (*fn)(void)); + +extern void ffi_closure_SYSV(void); + +/*====================== End of Externals ============================*/ + +/*====================================================================*/ +/* */ +/* Name - ffi_check_struct_type. */ +/* */ +/* Function - Determine if a structure can be passed within a */ +/* general purpose or floating point register. */ +/* */ +/*====================================================================*/ + +static int +ffi_check_struct_type (ffi_type *arg) +{ + size_t size = arg->size; + + /* If the struct has just one element, look at that element + to find out whether to consider the struct as floating point. */ + while (arg->type == FFI_TYPE_STRUCT + && arg->elements[0] && !arg->elements[1]) + arg = arg->elements[0]; + + /* Structs of size 1, 2, 4, and 8 are passed in registers, + just like the corresponding int/float types. */ + switch (size) + { + case 1: + return FFI_TYPE_UINT8; + + case 2: + return FFI_TYPE_UINT16; + + case 4: + if (arg->type == FFI_TYPE_FLOAT) + return FFI_TYPE_FLOAT; + else + return FFI_TYPE_UINT32; + + case 8: + if (arg->type == FFI_TYPE_DOUBLE) + return FFI_TYPE_DOUBLE; + else + return FFI_TYPE_UINT64; + + default: + break; + } + + /* Other structs are passed via a pointer to the data. */ + return FFI_TYPE_POINTER; +} + +/*======================== End of Routine ============================*/ + +/*====================================================================*/ +/* */ +/* Name - ffi_prep_args. */ +/* */ +/* Function - Prepare parameters for call to function. */ +/* */ +/* ffi_prep_args is called by the assembly routine once stack space */ +/* has been allocated for the function's arguments. */ +/* */ +/*====================================================================*/ + +static void +ffi_prep_args (unsigned char *stack, extended_cif *ecif) +{ + /* The stack space will be filled with those areas: + + FPR argument register save area (highest addresses) + GPR argument register save area + temporary struct copies + overflow argument area (lowest addresses) + + We set up the following pointers: + + p_fpr: bottom of the FPR area (growing upwards) + p_gpr: bottom of the GPR area (growing upwards) + p_ov: bottom of the overflow area (growing upwards) + p_struct: top of the struct copy area (growing downwards) + + All areas are kept aligned to twice the word size. */ + + int gpr_off = ecif->cif->bytes; + int fpr_off = gpr_off + ROUND_SIZE (MAX_GPRARGS * sizeof (long)); + + unsigned long long *p_fpr = (unsigned long long *)(stack + fpr_off); + unsigned long *p_gpr = (unsigned long *)(stack + gpr_off); + unsigned char *p_struct = (unsigned char *)p_gpr; + unsigned long *p_ov = (unsigned long *)stack; + + int n_fpr = 0; + int n_gpr = 0; + int n_ov = 0; + + ffi_type **ptr; + void **p_argv = ecif->avalue; + int i; + + /* If we returning a structure then we set the first parameter register + to the address of where we are returning this structure. */ + + if (ecif->cif->flags == FFI390_RET_STRUCT) + p_gpr[n_gpr++] = (unsigned long) ecif->rvalue; + + /* Now for the arguments. */ + + for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs; + i > 0; + i--, ptr++, p_argv++) + { + void *arg = *p_argv; + int type = (*ptr)->type; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + /* 16-byte long double is passed like a struct. */ + if (type == FFI_TYPE_LONGDOUBLE) + type = FFI_TYPE_STRUCT; +#endif + + /* Check how a structure type is passed. */ + if (type == FFI_TYPE_STRUCT) + { + type = ffi_check_struct_type (*ptr); + + /* If we pass the struct via pointer, copy the data. */ + if (type == FFI_TYPE_POINTER) + { + p_struct -= ROUND_SIZE ((*ptr)->size); + memcpy (p_struct, (char *)arg, (*ptr)->size); + arg = &p_struct; + } + } + + /* Now handle all primitive int/pointer/float data types. */ + switch (type) + { + case FFI_TYPE_DOUBLE: + if (n_fpr < MAX_FPRARGS) + p_fpr[n_fpr++] = *(unsigned long long *) arg; + else +#ifdef __s390x__ + p_ov[n_ov++] = *(unsigned long *) arg; +#else + p_ov[n_ov++] = ((unsigned long *) arg)[0], + p_ov[n_ov++] = ((unsigned long *) arg)[1]; +#endif + break; + + case FFI_TYPE_FLOAT: + if (n_fpr < MAX_FPRARGS) + p_fpr[n_fpr++] = (long long) *(unsigned int *) arg << 32; + else + p_ov[n_ov++] = *(unsigned int *) arg; + break; + + case FFI_TYPE_POINTER: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = (unsigned long)*(unsigned char **) arg; + else + p_ov[n_ov++] = (unsigned long)*(unsigned char **) arg; + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: +#ifdef __s390x__ + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(unsigned long *) arg; + else + p_ov[n_ov++] = *(unsigned long *) arg; +#else + if (n_gpr == MAX_GPRARGS-1) + n_gpr = MAX_GPRARGS; + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = ((unsigned long *) arg)[0], + p_gpr[n_gpr++] = ((unsigned long *) arg)[1]; + else + p_ov[n_ov++] = ((unsigned long *) arg)[0], + p_ov[n_ov++] = ((unsigned long *) arg)[1]; +#endif + break; + + case FFI_TYPE_UINT32: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(unsigned int *) arg; + else + p_ov[n_ov++] = *(unsigned int *) arg; + break; + + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(signed int *) arg; + else + p_ov[n_ov++] = *(signed int *) arg; + break; + + case FFI_TYPE_UINT16: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(unsigned short *) arg; + else + p_ov[n_ov++] = *(unsigned short *) arg; + break; + + case FFI_TYPE_SINT16: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(signed short *) arg; + else + p_ov[n_ov++] = *(signed short *) arg; + break; + + case FFI_TYPE_UINT8: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(unsigned char *) arg; + else + p_ov[n_ov++] = *(unsigned char *) arg; + break; + + case FFI_TYPE_SINT8: + if (n_gpr < MAX_GPRARGS) + p_gpr[n_gpr++] = *(signed char *) arg; + else + p_ov[n_ov++] = *(signed char *) arg; + break; + + default: + FFI_ASSERT (0); + break; + } + } +} + +/*======================== End of Routine ============================*/ + +/*====================================================================*/ +/* */ +/* Name - ffi_prep_cif_machdep. */ +/* */ +/* Function - Perform machine dependent CIF processing. */ +/* */ +/*====================================================================*/ + +ffi_status +ffi_prep_cif_machdep(ffi_cif *cif) +{ + size_t struct_size = 0; + int n_gpr = 0; + int n_fpr = 0; + int n_ov = 0; + + ffi_type **ptr; + int i; + + /* Determine return value handling. */ + + switch (cif->rtype->type) + { + /* Void is easy. */ + case FFI_TYPE_VOID: + cif->flags = FFI390_RET_VOID; + break; + + /* Structures are returned via a hidden pointer. */ + case FFI_TYPE_STRUCT: + cif->flags = FFI390_RET_STRUCT; + n_gpr++; /* We need one GPR to pass the pointer. */ + break; + + /* Floating point values are returned in fpr 0. */ + case FFI_TYPE_FLOAT: + cif->flags = FFI390_RET_FLOAT; + break; + + case FFI_TYPE_DOUBLE: + cif->flags = FFI390_RET_DOUBLE; + break; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + cif->flags = FFI390_RET_STRUCT; + n_gpr++; + break; +#endif + /* Integer values are returned in gpr 2 (and gpr 3 + for 64-bit values on 31-bit machines). */ + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + cif->flags = FFI390_RET_INT64; + break; + + case FFI_TYPE_POINTER: + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + /* These are to be extended to word size. */ +#ifdef __s390x__ + cif->flags = FFI390_RET_INT64; +#else + cif->flags = FFI390_RET_INT32; +#endif + break; + + default: + FFI_ASSERT (0); + break; + } + + /* Now for the arguments. */ + + for (ptr = cif->arg_types, i = cif->nargs; + i > 0; + i--, ptr++) + { + int type = (*ptr)->type; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + /* 16-byte long double is passed like a struct. */ + if (type == FFI_TYPE_LONGDOUBLE) + type = FFI_TYPE_STRUCT; +#endif + + /* Check how a structure type is passed. */ + if (type == FFI_TYPE_STRUCT) + { + type = ffi_check_struct_type (*ptr); + + /* If we pass the struct via pointer, we must reserve space + to copy its data for proper call-by-value semantics. */ + if (type == FFI_TYPE_POINTER) + struct_size += ROUND_SIZE ((*ptr)->size); + } + + /* Now handle all primitive int/float data types. */ + switch (type) + { + /* The first MAX_FPRARGS floating point arguments + go in FPRs, the rest overflow to the stack. */ + + case FFI_TYPE_DOUBLE: + if (n_fpr < MAX_FPRARGS) + n_fpr++; + else + n_ov += sizeof (double) / sizeof (long); + break; + + case FFI_TYPE_FLOAT: + if (n_fpr < MAX_FPRARGS) + n_fpr++; + else + n_ov++; + break; + + /* On 31-bit machines, 64-bit integers are passed in GPR pairs, + if one is still available, or else on the stack. If only one + register is free, skip the register (it won't be used for any + subsequent argument either). */ + +#ifndef __s390x__ + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + if (n_gpr == MAX_GPRARGS-1) + n_gpr = MAX_GPRARGS; + if (n_gpr < MAX_GPRARGS) + n_gpr += 2; + else + n_ov += 2; + break; +#endif + + /* Everything else is passed in GPRs (until MAX_GPRARGS + have been used) or overflows to the stack. */ + + default: + if (n_gpr < MAX_GPRARGS) + n_gpr++; + else + n_ov++; + break; + } + } + + /* Total stack space as required for overflow arguments + and temporary structure copies. */ + + cif->bytes = ROUND_SIZE (n_ov * sizeof (long)) + struct_size; + + return FFI_OK; +} + +/*======================== End of Routine ============================*/ + +/*====================================================================*/ +/* */ +/* Name - ffi_call. */ +/* */ +/* Function - Call the FFI routine. */ +/* */ +/*====================================================================*/ + +void +ffi_call(ffi_cif *cif, + void (*fn)(void), + void *rvalue, + void **avalue) +{ + int ret_type = cif->flags; + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + ecif.rvalue = rvalue; + + /* If we don't have a return value, we need to fake one. */ + if (rvalue == NULL) + { + if (ret_type == FFI390_RET_STRUCT) + ecif.rvalue = alloca (cif->rtype->size); + else + ret_type = FFI390_RET_VOID; + } + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV (cif->bytes, &ecif, ffi_prep_args, + ret_type, ecif.rvalue, fn); + break; + + default: + FFI_ASSERT (0); + break; + } +} + +/*======================== End of Routine ============================*/ + +/*====================================================================*/ +/* */ +/* Name - ffi_closure_helper_SYSV. */ +/* */ +/* Function - Call a FFI closure target function. */ +/* */ +/*====================================================================*/ + +void +ffi_closure_helper_SYSV (ffi_closure *closure, + unsigned long *p_gpr, + unsigned long long *p_fpr, + unsigned long *p_ov) +{ + unsigned long long ret_buffer; + + void *rvalue = &ret_buffer; + void **avalue; + void **p_arg; + + int n_gpr = 0; + int n_fpr = 0; + int n_ov = 0; + + ffi_type **ptr; + int i; + + /* Allocate buffer for argument list pointers. */ + + p_arg = avalue = alloca (closure->cif->nargs * sizeof (void *)); + + /* If we returning a structure, pass the structure address + directly to the target function. Otherwise, have the target + function store the return value to the GPR save area. */ + + if (closure->cif->flags == FFI390_RET_STRUCT) + rvalue = (void *) p_gpr[n_gpr++]; + + /* Now for the arguments. */ + + for (ptr = closure->cif->arg_types, i = closure->cif->nargs; + i > 0; + i--, p_arg++, ptr++) + { + int deref_struct_pointer = 0; + int type = (*ptr)->type; + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + /* 16-byte long double is passed like a struct. */ + if (type == FFI_TYPE_LONGDOUBLE) + type = FFI_TYPE_STRUCT; +#endif + + /* Check how a structure type is passed. */ + if (type == FFI_TYPE_STRUCT) + { + type = ffi_check_struct_type (*ptr); + + /* If we pass the struct via pointer, remember to + retrieve the pointer later. */ + if (type == FFI_TYPE_POINTER) + deref_struct_pointer = 1; + } + + /* Pointers are passed like UINTs of the same size. */ + if (type == FFI_TYPE_POINTER) +#ifdef __s390x__ + type = FFI_TYPE_UINT64; +#else + type = FFI_TYPE_UINT32; +#endif + + /* Now handle all primitive int/float data types. */ + switch (type) + { + case FFI_TYPE_DOUBLE: + if (n_fpr < MAX_FPRARGS) + *p_arg = &p_fpr[n_fpr++]; + else + *p_arg = &p_ov[n_ov], + n_ov += sizeof (double) / sizeof (long); + break; + + case FFI_TYPE_FLOAT: + if (n_fpr < MAX_FPRARGS) + *p_arg = &p_fpr[n_fpr++]; + else + *p_arg = (char *)&p_ov[n_ov++] + sizeof (long) - 4; + break; + + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: +#ifdef __s390x__ + if (n_gpr < MAX_GPRARGS) + *p_arg = &p_gpr[n_gpr++]; + else + *p_arg = &p_ov[n_ov++]; +#else + if (n_gpr == MAX_GPRARGS-1) + n_gpr = MAX_GPRARGS; + if (n_gpr < MAX_GPRARGS) + *p_arg = &p_gpr[n_gpr], n_gpr += 2; + else + *p_arg = &p_ov[n_ov], n_ov += 2; +#endif + break; + + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + if (n_gpr < MAX_GPRARGS) + *p_arg = (char *)&p_gpr[n_gpr++] + sizeof (long) - 4; + else + *p_arg = (char *)&p_ov[n_ov++] + sizeof (long) - 4; + break; + + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + if (n_gpr < MAX_GPRARGS) + *p_arg = (char *)&p_gpr[n_gpr++] + sizeof (long) - 2; + else + *p_arg = (char *)&p_ov[n_ov++] + sizeof (long) - 2; + break; + + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + if (n_gpr < MAX_GPRARGS) + *p_arg = (char *)&p_gpr[n_gpr++] + sizeof (long) - 1; + else + *p_arg = (char *)&p_ov[n_ov++] + sizeof (long) - 1; + break; + + default: + FFI_ASSERT (0); + break; + } + + /* If this is a struct passed via pointer, we need to + actually retrieve that pointer. */ + if (deref_struct_pointer) + *p_arg = *(void **)*p_arg; + } + + + /* Call the target function. */ + (closure->fun) (closure->cif, rvalue, avalue, closure->user_data); + + /* Convert the return value. */ + switch (closure->cif->rtype->type) + { + /* Void is easy, and so is struct. */ + case FFI_TYPE_VOID: + case FFI_TYPE_STRUCT: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: +#endif + break; + + /* Floating point values are returned in fpr 0. */ + case FFI_TYPE_FLOAT: + p_fpr[0] = (long long) *(unsigned int *) rvalue << 32; + break; + + case FFI_TYPE_DOUBLE: + p_fpr[0] = *(unsigned long long *) rvalue; + break; + + /* Integer values are returned in gpr 2 (and gpr 3 + for 64-bit values on 31-bit machines). */ + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: +#ifdef __s390x__ + p_gpr[0] = *(unsigned long *) rvalue; +#else + p_gpr[0] = ((unsigned long *) rvalue)[0], + p_gpr[1] = ((unsigned long *) rvalue)[1]; +#endif + break; + + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT32: + case FFI_TYPE_UINT16: + case FFI_TYPE_UINT8: + p_gpr[0] = *(unsigned long *) rvalue; + break; + + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_SINT16: + case FFI_TYPE_SINT8: + p_gpr[0] = *(signed long *) rvalue; + break; + + default: + FFI_ASSERT (0); + break; + } +} + +/*======================== End of Routine ============================*/ + +/*====================================================================*/ +/* */ +/* Name - ffi_prep_closure_loc. */ +/* */ +/* Function - Prepare a FFI closure. */ +/* */ +/*====================================================================*/ + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif *cif, + void (*fun) (ffi_cif *, void *, void **, void *), + void *user_data, + void *codeloc) +{ + FFI_ASSERT (cif->abi == FFI_SYSV); + +#ifndef __s390x__ + *(short *)&closure->tramp [0] = 0x0d10; /* basr %r1,0 */ + *(short *)&closure->tramp [2] = 0x9801; /* lm %r0,%r1,6(%r1) */ + *(short *)&closure->tramp [4] = 0x1006; + *(short *)&closure->tramp [6] = 0x07f1; /* br %r1 */ + *(long *)&closure->tramp [8] = (long)codeloc; + *(long *)&closure->tramp[12] = (long)&ffi_closure_SYSV; +#else + *(short *)&closure->tramp [0] = 0x0d10; /* basr %r1,0 */ + *(short *)&closure->tramp [2] = 0xeb01; /* lmg %r0,%r1,14(%r1) */ + *(short *)&closure->tramp [4] = 0x100e; + *(short *)&closure->tramp [6] = 0x0004; + *(short *)&closure->tramp [8] = 0x07f1; /* br %r1 */ + *(long *)&closure->tramp[16] = (long)codeloc; + *(long *)&closure->tramp[24] = (long)&ffi_closure_SYSV; +#endif + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +/*======================== End of Routine ============================*/ + diff --git a/libffi/src/s390/ffitarget.h b/libffi/src/s390/ffitarget.h new file mode 100644 index 000000000..386273897 --- /dev/null +++ b/libffi/src/s390/ffitarget.h @@ -0,0 +1,62 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for S390. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#if defined (__s390x__) +#ifndef S390X +#define S390X +#endif +#endif + +/* ---- System specific configurations ----------------------------------- */ + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#ifdef S390X +#define FFI_TRAMPOLINE_SIZE 32 +#else +#define FFI_TRAMPOLINE_SIZE 16 +#endif +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/libffi/src/s390/sysv.S b/libffi/src/s390/sysv.S new file mode 100644 index 000000000..4731a3177 --- /dev/null +++ b/libffi/src/s390/sysv.S @@ -0,0 +1,434 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2000 Software AG + Copyright (c) 2008 Red Hat, Inc. + + S390 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#ifndef __s390x__ + +.text + + # r2: cif->bytes + # r3: &ecif + # r4: ffi_prep_args + # r5: ret_type + # r6: ecif.rvalue + # ov: fn + + # This assumes we are using gas. + .globl ffi_call_SYSV + .type ffi_call_SYSV,%function +ffi_call_SYSV: +.LFB1: + stm %r6,%r15,24(%r15) # Save registers +.LCFI0: + basr %r13,0 # Set up base register +.Lbase: + lr %r11,%r15 # Set up frame pointer +.LCFI1: + sr %r15,%r2 + ahi %r15,-96-48 # Allocate stack + lr %r8,%r6 # Save ecif.rvalue + sr %r9,%r9 + ic %r9,.Ltable-.Lbase(%r13,%r5) # Load epilog address + l %r7,96(%r11) # Load function address + st %r11,0(%r15) # Set up back chain + ahi %r11,-48 # Register save area +.LCFI2: + + la %r2,96(%r15) # Save area + # r3 already holds &ecif + basr %r14,%r4 # Call ffi_prep_args + + lm %r2,%r6,0(%r11) # Load arguments + ld %f0,32(%r11) + ld %f2,40(%r11) + la %r14,0(%r13,%r9) # Set return address + br %r7 # ... and call function + +.LretNone: # Return void + l %r4,48+56(%r11) + lm %r6,%r15,48+24(%r11) + br %r4 + +.LretFloat: + l %r4,48+56(%r11) + ste %f0,0(%r8) # Return float + lm %r6,%r15,48+24(%r11) + br %r4 + +.LretDouble: + l %r4,48+56(%r11) + std %f0,0(%r8) # Return double + lm %r6,%r15,48+24(%r11) + br %r4 + +.LretInt32: + l %r4,48+56(%r11) + st %r2,0(%r8) # Return int + lm %r6,%r15,48+24(%r11) + br %r4 + +.LretInt64: + l %r4,48+56(%r11) + stm %r2,%r3,0(%r8) # Return long long + lm %r6,%r15,48+24(%r11) + br %r4 + +.Ltable: + .byte .LretNone-.Lbase # FFI390_RET_VOID + .byte .LretNone-.Lbase # FFI390_RET_STRUCT + .byte .LretFloat-.Lbase # FFI390_RET_FLOAT + .byte .LretDouble-.Lbase # FFI390_RET_DOUBLE + .byte .LretInt32-.Lbase # FFI390_RET_INT32 + .byte .LretInt64-.Lbase # FFI390_RET_INT64 + +.LFE1: +.ffi_call_SYSV_end: + .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV + + + .globl ffi_closure_SYSV + .type ffi_closure_SYSV,%function +ffi_closure_SYSV: +.LFB2: + stm %r12,%r15,48(%r15) # Save registers +.LCFI10: + basr %r13,0 # Set up base register +.Lcbase: + stm %r2,%r6,8(%r15) # Save arguments + std %f0,64(%r15) + std %f2,72(%r15) + lr %r1,%r15 # Set up stack frame + ahi %r15,-96 +.LCFI11: + l %r12,.Lchelper-.Lcbase(%r13) # Get helper function + lr %r2,%r0 # Closure + la %r3,8(%r1) # GPRs + la %r4,64(%r1) # FPRs + la %r5,96(%r1) # Overflow + st %r1,0(%r15) # Set up back chain + + bas %r14,0(%r12,%r13) # Call helper + + l %r4,96+56(%r15) + ld %f0,96+64(%r15) # Load return registers + lm %r2,%r3,96+8(%r15) + lm %r12,%r15,96+48(%r15) + br %r4 + + .align 4 +.Lchelper: + .long ffi_closure_helper_SYSV-.Lcbase + +.LFE2: + +.ffi_closure_SYSV_end: + .size ffi_closure_SYSV,.ffi_closure_SYSV_end-ffi_closure_SYSV + + + .section .eh_frame,EH_FRAME_FLAGS,@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry +.LSCIE1: + .4byte 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version + .ascii "zR\0" # CIE Augmentation + .uleb128 0x1 # CIE Code Alignment Factor + .sleb128 -4 # CIE Data Alignment Factor + .byte 0xe # CIE RA Column + .uleb128 0x1 # Augmentation size + .byte 0x1b # FDE Encoding (pcrel sdata4) + .byte 0xc # DW_CFA_def_cfa + .uleb128 0xf + .uleb128 0x60 + .align 4 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 # FDE Length +.LASFDE1: + .4byte .LASFDE1-.Lframe1 # FDE CIE offset + .4byte .LFB1-. # FDE initial location + .4byte .LFE1-.LFB1 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI0-.LFB1 + .byte 0x8f # DW_CFA_offset, column 0xf + .uleb128 0x9 + .byte 0x8e # DW_CFA_offset, column 0xe + .uleb128 0xa + .byte 0x8d # DW_CFA_offset, column 0xd + .uleb128 0xb + .byte 0x8c # DW_CFA_offset, column 0xc + .uleb128 0xc + .byte 0x8b # DW_CFA_offset, column 0xb + .uleb128 0xd + .byte 0x8a # DW_CFA_offset, column 0xa + .uleb128 0xe + .byte 0x89 # DW_CFA_offset, column 0x9 + .uleb128 0xf + .byte 0x88 # DW_CFA_offset, column 0x8 + .uleb128 0x10 + .byte 0x87 # DW_CFA_offset, column 0x7 + .uleb128 0x11 + .byte 0x86 # DW_CFA_offset, column 0x6 + .uleb128 0x12 + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI1-.LCFI0 + .byte 0xd # DW_CFA_def_cfa_register + .uleb128 0xb + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI2-.LCFI1 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x90 + .align 4 +.LEFDE1: +.LSFDE2: + .4byte .LEFDE2-.LASFDE2 # FDE Length +.LASFDE2: + .4byte .LASFDE2-.Lframe1 # FDE CIE offset + .4byte .LFB2-. # FDE initial location + .4byte .LFE2-.LFB2 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI10-.LFB2 + .byte 0x8f # DW_CFA_offset, column 0xf + .uleb128 0x9 + .byte 0x8e # DW_CFA_offset, column 0xe + .uleb128 0xa + .byte 0x8d # DW_CFA_offset, column 0xd + .uleb128 0xb + .byte 0x8c # DW_CFA_offset, column 0xc + .uleb128 0xc + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI11-.LCFI10 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0xc0 + .align 4 +.LEFDE2: + +#else + +.text + + # r2: cif->bytes + # r3: &ecif + # r4: ffi_prep_args + # r5: ret_type + # r6: ecif.rvalue + # ov: fn + + # This assumes we are using gas. + .globl ffi_call_SYSV + .type ffi_call_SYSV,%function +ffi_call_SYSV: +.LFB1: + stmg %r6,%r15,48(%r15) # Save registers +.LCFI0: + larl %r13,.Lbase # Set up base register + lgr %r11,%r15 # Set up frame pointer +.LCFI1: + sgr %r15,%r2 + aghi %r15,-160-80 # Allocate stack + lgr %r8,%r6 # Save ecif.rvalue + llgc %r9,.Ltable-.Lbase(%r13,%r5) # Load epilog address + lg %r7,160(%r11) # Load function address + stg %r11,0(%r15) # Set up back chain + aghi %r11,-80 # Register save area +.LCFI2: + + la %r2,160(%r15) # Save area + # r3 already holds &ecif + basr %r14,%r4 # Call ffi_prep_args + + lmg %r2,%r6,0(%r11) # Load arguments + ld %f0,48(%r11) + ld %f2,56(%r11) + ld %f4,64(%r11) + ld %f6,72(%r11) + la %r14,0(%r13,%r9) # Set return address + br %r7 # ... and call function + +.Lbase: +.LretNone: # Return void + lg %r4,80+112(%r11) + lmg %r6,%r15,80+48(%r11) + br %r4 + +.LretFloat: + lg %r4,80+112(%r11) + ste %f0,0(%r8) # Return float + lmg %r6,%r15,80+48(%r11) + br %r4 + +.LretDouble: + lg %r4,80+112(%r11) + std %f0,0(%r8) # Return double + lmg %r6,%r15,80+48(%r11) + br %r4 + +.LretInt32: + lg %r4,80+112(%r11) + st %r2,0(%r8) # Return int + lmg %r6,%r15,80+48(%r11) + br %r4 + +.LretInt64: + lg %r4,80+112(%r11) + stg %r2,0(%r8) # Return long + lmg %r6,%r15,80+48(%r11) + br %r4 + +.Ltable: + .byte .LretNone-.Lbase # FFI390_RET_VOID + .byte .LretNone-.Lbase # FFI390_RET_STRUCT + .byte .LretFloat-.Lbase # FFI390_RET_FLOAT + .byte .LretDouble-.Lbase # FFI390_RET_DOUBLE + .byte .LretInt32-.Lbase # FFI390_RET_INT32 + .byte .LretInt64-.Lbase # FFI390_RET_INT64 + +.LFE1: +.ffi_call_SYSV_end: + .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV + + + .globl ffi_closure_SYSV + .type ffi_closure_SYSV,%function +ffi_closure_SYSV: +.LFB2: + stmg %r14,%r15,112(%r15) # Save registers +.LCFI10: + stmg %r2,%r6,16(%r15) # Save arguments + std %f0,128(%r15) + std %f2,136(%r15) + std %f4,144(%r15) + std %f6,152(%r15) + lgr %r1,%r15 # Set up stack frame + aghi %r15,-160 +.LCFI11: + lgr %r2,%r0 # Closure + la %r3,16(%r1) # GPRs + la %r4,128(%r1) # FPRs + la %r5,160(%r1) # Overflow + stg %r1,0(%r15) # Set up back chain + + brasl %r14,ffi_closure_helper_SYSV # Call helper + + lg %r14,160+112(%r15) + ld %f0,160+128(%r15) # Load return registers + lg %r2,160+16(%r15) + la %r15,160(%r15) + br %r14 +.LFE2: + +.ffi_closure_SYSV_end: + .size ffi_closure_SYSV,.ffi_closure_SYSV_end-ffi_closure_SYSV + + + + .section .eh_frame,EH_FRAME_FLAGS,@progbits +.Lframe1: + .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry +.LSCIE1: + .4byte 0x0 # CIE Identifier Tag + .byte 0x1 # CIE Version + .ascii "zR\0" # CIE Augmentation + .uleb128 0x1 # CIE Code Alignment Factor + .sleb128 -8 # CIE Data Alignment Factor + .byte 0xe # CIE RA Column + .uleb128 0x1 # Augmentation size + .byte 0x1b # FDE Encoding (pcrel sdata4) + .byte 0xc # DW_CFA_def_cfa + .uleb128 0xf + .uleb128 0xa0 + .align 8 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 # FDE Length +.LASFDE1: + .4byte .LASFDE1-.Lframe1 # FDE CIE offset + .4byte .LFB1-. # FDE initial location + .4byte .LFE1-.LFB1 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI0-.LFB1 + .byte 0x8f # DW_CFA_offset, column 0xf + .uleb128 0x5 + .byte 0x8e # DW_CFA_offset, column 0xe + .uleb128 0x6 + .byte 0x8d # DW_CFA_offset, column 0xd + .uleb128 0x7 + .byte 0x8c # DW_CFA_offset, column 0xc + .uleb128 0x8 + .byte 0x8b # DW_CFA_offset, column 0xb + .uleb128 0x9 + .byte 0x8a # DW_CFA_offset, column 0xa + .uleb128 0xa + .byte 0x89 # DW_CFA_offset, column 0x9 + .uleb128 0xb + .byte 0x88 # DW_CFA_offset, column 0x8 + .uleb128 0xc + .byte 0x87 # DW_CFA_offset, column 0x7 + .uleb128 0xd + .byte 0x86 # DW_CFA_offset, column 0x6 + .uleb128 0xe + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI1-.LCFI0 + .byte 0xd # DW_CFA_def_cfa_register + .uleb128 0xb + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI2-.LCFI1 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0xf0 + .align 8 +.LEFDE1: +.LSFDE2: + .4byte .LEFDE2-.LASFDE2 # FDE Length +.LASFDE2: + .4byte .LASFDE2-.Lframe1 # FDE CIE offset + .4byte .LFB2-. # FDE initial location + .4byte .LFE2-.LFB2 # FDE address range + .uleb128 0x0 # Augmentation size + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI10-.LFB2 + .byte 0x8f # DW_CFA_offset, column 0xf + .uleb128 0x5 + .byte 0x8e # DW_CFA_offset, column 0xe + .uleb128 0x6 + .byte 0x4 # DW_CFA_advance_loc4 + .4byte .LCFI11-.LCFI10 + .byte 0xe # DW_CFA_def_cfa_offset + .uleb128 0x140 + .align 8 +.LEFDE2: + +#endif + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/sh/ffi.c b/libffi/src/sh/ffi.c new file mode 100644 index 000000000..69bd025fb --- /dev/null +++ b/libffi/src/sh/ffi.c @@ -0,0 +1,716 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Kaz Kojima + Copyright (c) 2008 Red Hat, Inc. + + SuperH Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +#define NGREGARG 4 +#if defined(__SH4__) +#define NFREGARG 8 +#endif + +#if defined(__HITACHI__) +#define STRUCT_VALUE_ADDRESS_WITH_ARG 1 +#else +#define STRUCT_VALUE_ADDRESS_WITH_ARG 0 +#endif + +/* If the structure has essentialy an unique element, return its type. */ +static int +simple_type (ffi_type *arg) +{ + if (arg->type != FFI_TYPE_STRUCT) + return arg->type; + else if (arg->elements[1]) + return FFI_TYPE_STRUCT; + + return simple_type (arg->elements[0]); +} + +static int +return_type (ffi_type *arg) +{ + unsigned short type; + + if (arg->type != FFI_TYPE_STRUCT) + return arg->type; + + type = simple_type (arg->elements[0]); + if (! arg->elements[1]) + { + switch (type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + return FFI_TYPE_INT; + + default: + return type; + } + } + + /* gcc uses r0/r1 pair for some kind of structures. */ + if (arg->size <= 2 * sizeof (int)) + { + int i = 0; + ffi_type *e; + + while ((e = arg->elements[i++])) + { + type = simple_type (e); + switch (type) + { + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_INT: + case FFI_TYPE_FLOAT: + return FFI_TYPE_UINT64; + + default: + break; + } + } + } + + return FFI_TYPE_STRUCT; +} + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +void ffi_prep_args(char *stack, extended_cif *ecif) +{ + register unsigned int i; + register int tmp; + register unsigned int avn; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + int greg, ireg; +#if defined(__SH4__) + int freg = 0; +#endif + + tmp = 0; + argp = stack; + + if (return_type (ecif->cif->rtype) == FFI_TYPE_STRUCT) + { + *(void **) argp = ecif->rvalue; + argp += 4; + ireg = STRUCT_VALUE_ADDRESS_WITH_ARG ? 1 : 0; + } + else + ireg = 0; + + /* Set arguments for registers. */ + greg = ireg; + avn = ecif->cif->nargs; + p_argv = ecif->avalue; + + for (i = 0, p_arg = ecif->cif->arg_types; i < avn; i++, p_arg++, p_argv++) + { + size_t z; + + z = (*p_arg)->size; + if (z < sizeof(int)) + { + if (greg++ >= NGREGARG) + continue; + + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_STRUCT: + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + break; + + default: + FFI_ASSERT(0); + } + argp += z; + } + else if (z == sizeof(int)) + { +#if defined(__SH4__) + if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + if (freg++ >= NFREGARG) + continue; + } + else +#endif + { + if (greg++ >= NGREGARG) + continue; + } + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + argp += z; + } +#if defined(__SH4__) + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + { + if (freg + 1 >= NFREGARG) + continue; + freg = (freg + 1) & ~1; + freg += 2; + memcpy (argp, *p_argv, z); + argp += z; + } +#endif + else + { + int n = (z + sizeof (int) - 1) / sizeof (int); +#if defined(__SH4__) + if (greg + n - 1 >= NGREGARG) + continue; +#else + if (greg >= NGREGARG) + continue; +#endif + greg += n; + memcpy (argp, *p_argv, z); + argp += n * sizeof (int); + } + } + + /* Set arguments on stack. */ + greg = ireg; +#if defined(__SH4__) + freg = 0; +#endif + p_argv = ecif->avalue; + + for (i = 0, p_arg = ecif->cif->arg_types; i < avn; i++, p_arg++, p_argv++) + { + size_t z; + + z = (*p_arg)->size; + if (z < sizeof(int)) + { + if (greg++ < NGREGARG) + continue; + + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_STRUCT: + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + break; + + default: + FFI_ASSERT(0); + } + argp += z; + } + else if (z == sizeof(int)) + { +#if defined(__SH4__) + if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + if (freg++ < NFREGARG) + continue; + } + else +#endif + { + if (greg++ < NGREGARG) + continue; + } + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + argp += z; + } +#if defined(__SH4__) + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + { + if (freg + 1 < NFREGARG) + { + freg = (freg + 1) & ~1; + freg += 2; + continue; + } + memcpy (argp, *p_argv, z); + argp += z; + } +#endif + else + { + int n = (z + sizeof (int) - 1) / sizeof (int); + if (greg + n - 1 < NGREGARG) + { + greg += n; + continue; + } +#if (! defined(__SH4__)) + else if (greg < NGREGARG) + { + greg = NGREGARG; + continue; + } +#endif + memcpy (argp, *p_argv, z); + argp += n * sizeof (int); + } + } + + return; +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + int i, j; + int size, type; + int n, m; + int greg; +#if defined(__SH4__) + int freg = 0; +#endif + + cif->flags = 0; + + greg = ((return_type (cif->rtype) == FFI_TYPE_STRUCT) && + STRUCT_VALUE_ADDRESS_WITH_ARG) ? 1 : 0; + +#if defined(__SH4__) + for (i = j = 0; i < cif->nargs && j < 12; i++) + { + type = (cif->arg_types)[i]->type; + switch (type) + { + case FFI_TYPE_FLOAT: + if (freg >= NFREGARG) + continue; + freg++; + cif->flags += ((cif->arg_types)[i]->type) << (2 * j); + j++; + break; + + case FFI_TYPE_DOUBLE: + if ((freg + 1) >= NFREGARG) + continue; + freg = (freg + 1) & ~1; + freg += 2; + cif->flags += ((cif->arg_types)[i]->type) << (2 * j); + j++; + break; + + default: + size = (cif->arg_types)[i]->size; + n = (size + sizeof (int) - 1) / sizeof (int); + if (greg + n - 1 >= NGREGARG) + continue; + greg += n; + for (m = 0; m < n; m++) + cif->flags += FFI_TYPE_INT << (2 * j++); + break; + } + } +#else + for (i = j = 0; i < cif->nargs && j < 4; i++) + { + size = (cif->arg_types)[i]->size; + n = (size + sizeof (int) - 1) / sizeof (int); + if (greg >= NGREGARG) + continue; + else if (greg + n - 1 >= NGREGARG) + n = NGREGARG - greg; + greg += n; + for (m = 0; m < n; m++) + cif->flags += FFI_TYPE_INT << (2 * j++); + } +#endif + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_STRUCT: + cif->flags += (unsigned) (return_type (cif->rtype)) << 24; + break; + + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags += (unsigned) cif->rtype->type << 24; + break; + + default: + cif->flags += FFI_TYPE_INT << 24; + break; + } + + return FFI_OK; +} + +extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + UINT64 trvalue; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if (cif->rtype->type == FFI_TYPE_STRUCT + && return_type (cif->rtype) != FFI_TYPE_STRUCT) + ecif.rvalue = &trvalue; + else if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue, + fn); + break; + default: + FFI_ASSERT(0); + break; + } + + if (rvalue + && cif->rtype->type == FFI_TYPE_STRUCT + && return_type (cif->rtype) != FFI_TYPE_STRUCT) + memcpy (rvalue, &trvalue, cif->rtype->size); +} + +extern void ffi_closure_SYSV (void); +#if defined(__SH4__) +extern void __ic_invalidate (void *line); +#endif + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp; + unsigned int insn; + + FFI_ASSERT (cif->abi == FFI_GCC_SYSV); + + tramp = (unsigned int *) &closure->tramp[0]; + /* Set T bit if the function returns a struct pointed with R2. */ + insn = (return_type (cif->rtype) == FFI_TYPE_STRUCT + ? 0x0018 /* sett */ + : 0x0008 /* clrt */); + +#ifdef __LITTLE_ENDIAN__ + tramp[0] = 0xd301d102; + tramp[1] = 0x0000412b | (insn << 16); +#else + tramp[0] = 0xd102d301; + tramp[1] = 0x412b0000 | insn; +#endif + *(void **) &tramp[2] = (void *)codeloc; /* ctx */ + *(void **) &tramp[3] = (void *)ffi_closure_SYSV; /* funaddr */ + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + +#if defined(__SH4__) + /* Flush the icache. */ + __ic_invalidate(codeloc); +#endif + + return FFI_OK; +} + +/* Basically the trampoline invokes ffi_closure_SYSV, and on + * entry, r3 holds the address of the closure. + * After storing the registers that could possibly contain + * parameters to be passed into the stack frame and setting + * up space for a return value, ffi_closure_SYSV invokes the + * following helper function to do most of the work. + */ + +#ifdef __LITTLE_ENDIAN__ +#define OFS_INT8 0 +#define OFS_INT16 0 +#else +#define OFS_INT8 3 +#define OFS_INT16 2 +#endif + +int +ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue, + unsigned long *pgr, unsigned long *pfr, + unsigned long *pst) +{ + void **avalue; + ffi_type **p_arg; + int i, avn; + int ireg, greg = 0; +#if defined(__SH4__) + int freg = 0; +#endif + ffi_cif *cif; + + cif = closure->cif; + avalue = alloca(cif->nargs * sizeof(void *)); + + /* Copy the caller's structure return value address so that the closure + returns the data directly to the caller. */ + if (cif->rtype->type == FFI_TYPE_STRUCT && STRUCT_VALUE_ADDRESS_WITH_ARG) + { + rvalue = (void *) *pgr++; + ireg = 1; + } + else + ireg = 0; + + cif = closure->cif; + greg = ireg; + avn = cif->nargs; + + /* Grab the addresses of the arguments from the stack frame. */ + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) + { + size_t z; + + z = (*p_arg)->size; + if (z < sizeof(int)) + { + if (greg++ >= NGREGARG) + continue; + + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = (((char *)pgr) + OFS_INT8); + break; + + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = (((char *)pgr) + OFS_INT16); + break; + + case FFI_TYPE_STRUCT: + avalue[i] = pgr; + break; + + default: + FFI_ASSERT(0); + } + pgr++; + } + else if (z == sizeof(int)) + { +#if defined(__SH4__) + if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + if (freg++ >= NFREGARG) + continue; + avalue[i] = pfr; + pfr++; + } + else +#endif + { + if (greg++ >= NGREGARG) + continue; + avalue[i] = pgr; + pgr++; + } + } +#if defined(__SH4__) + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + { + if (freg + 1 >= NFREGARG) + continue; + if (freg & 1) + pfr++; + freg = (freg + 1) & ~1; + freg += 2; + avalue[i] = pfr; + pfr += 2; + } +#endif + else + { + int n = (z + sizeof (int) - 1) / sizeof (int); +#if defined(__SH4__) + if (greg + n - 1 >= NGREGARG) + continue; +#else + if (greg >= NGREGARG) + continue; +#endif + greg += n; + avalue[i] = pgr; + pgr += n; + } + } + + greg = ireg; +#if defined(__SH4__) + freg = 0; +#endif + + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) + { + size_t z; + + z = (*p_arg)->size; + if (z < sizeof(int)) + { + if (greg++ < NGREGARG) + continue; + + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + avalue[i] = (((char *)pst) + OFS_INT8); + break; + + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + avalue[i] = (((char *)pst) + OFS_INT16); + break; + + case FFI_TYPE_STRUCT: + avalue[i] = pst; + break; + + default: + FFI_ASSERT(0); + } + pst++; + } + else if (z == sizeof(int)) + { +#if defined(__SH4__) + if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + if (freg++ < NFREGARG) + continue; + } + else +#endif + { + if (greg++ < NGREGARG) + continue; + } + avalue[i] = pst; + pst++; + } +#if defined(__SH4__) + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + { + if (freg + 1 < NFREGARG) + { + freg = (freg + 1) & ~1; + freg += 2; + continue; + } + avalue[i] = pst; + pst += 2; + } +#endif + else + { + int n = (z + sizeof (int) - 1) / sizeof (int); + if (greg + n - 1 < NGREGARG) + { + greg += n; + continue; + } +#if (! defined(__SH4__)) + else if (greg < NGREGARG) + { + greg += n; + pst += greg - NGREGARG; + continue; + } +#endif + avalue[i] = pst; + pst += n; + } + } + + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_SYSV how to perform return type promotions. */ + return return_type (cif->rtype); +} diff --git a/libffi/src/sh/ffitarget.h b/libffi/src/sh/ffitarget.h new file mode 100644 index 000000000..218ae3d0a --- /dev/null +++ b/libffi/src/sh/ffitarget.h @@ -0,0 +1,49 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for SuperH. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- Generic type definitions ----------------------------------------- */ + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 16 +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/libffi/src/sh/sysv.S b/libffi/src/sh/sysv.S new file mode 100644 index 000000000..5be7516d6 --- /dev/null +++ b/libffi/src/sh/sysv.S @@ -0,0 +1,850 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2002, 2003, 2004, 2006, 2008 Kaz Kojima + + SuperH Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +/* XXX these lose for some platforms, I'm sure. */ +#define CNAME(x) x +#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x): +#endif + +#if defined(__HITACHI__) +#define STRUCT_VALUE_ADDRESS_WITH_ARG 1 +#else +#define STRUCT_VALUE_ADDRESS_WITH_ARG 0 +#endif + +.text + + # r4: ffi_prep_args + # r5: &ecif + # r6: bytes + # r7: flags + # sp+0: rvalue + # sp+4: fn + + # This assumes we are using gas. +ENTRY(ffi_call_SYSV) + # Save registers +.LFB1: + mov.l r8,@-r15 +.LCFI0: + mov.l r9,@-r15 +.LCFI1: + mov.l r10,@-r15 +.LCFI2: + mov.l r12,@-r15 +.LCFI3: + mov.l r14,@-r15 +.LCFI4: + sts.l pr,@-r15 +.LCFI5: + mov r15,r14 +.LCFI6: +#if defined(__SH4__) + mov r6,r8 + mov r7,r9 + + sub r6,r15 + add #-16,r15 + mov #~7,r0 + and r0,r15 + + mov r4,r0 + jsr @r0 + mov r15,r4 + + mov r9,r1 + shlr8 r9 + shlr8 r9 + shlr8 r9 + + mov #FFI_TYPE_STRUCT,r2 + cmp/eq r2,r9 + bf 1f +#if STRUCT_VALUE_ADDRESS_WITH_ARG + mov.l @r15+,r4 + bra 2f + mov #5,r2 +#else + mov.l @r15+,r10 +#endif +1: + mov #4,r2 +2: + mov #4,r3 + +L_pass: + cmp/pl r8 + bf L_call_it + + mov r1,r0 + and #3,r0 + +L_pass_d: + cmp/eq #FFI_TYPE_DOUBLE,r0 + bf L_pass_f + + mov r3,r0 + and #1,r0 + tst r0,r0 + bt 1f + add #1,r3 +1: + mov #12,r0 + cmp/hs r0,r3 + bt/s 3f + shlr2 r1 + bsr L_pop_d + nop +3: + add #2,r3 + bra L_pass + add #-8,r8 + +L_pop_d: + mov r3,r0 + add r0,r0 + add r3,r0 + add #-12,r0 + braf r0 + nop +#ifdef __LITTLE_ENDIAN__ + fmov.s @r15+,fr5 + rts + fmov.s @r15+,fr4 + fmov.s @r15+,fr7 + rts + fmov.s @r15+,fr6 + fmov.s @r15+,fr9 + rts + fmov.s @r15+,fr8 + fmov.s @r15+,fr11 + rts + fmov.s @r15+,fr10 +#else + fmov.s @r15+,fr4 + rts + fmov.s @r15+,fr5 + fmov.s @r15+,fr6 + rts + fmov.s @r15+,fr7 + fmov.s @r15+,fr8 + rts + fmov.s @r15+,fr9 + fmov.s @r15+,fr10 + rts + fmov.s @r15+,fr11 +#endif + +L_pass_f: + cmp/eq #FFI_TYPE_FLOAT,r0 + bf L_pass_i + + mov #12,r0 + cmp/hs r0,r3 + bt/s 2f + shlr2 r1 + bsr L_pop_f + nop +2: + add #1,r3 + bra L_pass + add #-4,r8 + +L_pop_f: + mov r3,r0 + shll2 r0 + add #-16,r0 + braf r0 + nop +#ifdef __LITTLE_ENDIAN__ + rts + fmov.s @r15+,fr5 + rts + fmov.s @r15+,fr4 + rts + fmov.s @r15+,fr7 + rts + fmov.s @r15+,fr6 + rts + fmov.s @r15+,fr9 + rts + fmov.s @r15+,fr8 + rts + fmov.s @r15+,fr11 + rts + fmov.s @r15+,fr10 +#else + rts + fmov.s @r15+,fr4 + rts + fmov.s @r15+,fr5 + rts + fmov.s @r15+,fr6 + rts + fmov.s @r15+,fr7 + rts + fmov.s @r15+,fr8 + rts + fmov.s @r15+,fr9 + rts + fmov.s @r15+,fr10 + rts + fmov.s @r15+,fr11 +#endif + +L_pass_i: + cmp/eq #FFI_TYPE_INT,r0 + bf L_call_it + + mov #8,r0 + cmp/hs r0,r2 + bt/s 2f + shlr2 r1 + bsr L_pop_i + nop +2: + add #1,r2 + bra L_pass + add #-4,r8 + +L_pop_i: + mov r2,r0 + shll2 r0 + add #-16,r0 + braf r0 + nop + rts + mov.l @r15+,r4 + rts + mov.l @r15+,r5 + rts + mov.l @r15+,r6 + rts + mov.l @r15+,r7 + +L_call_it: + # call function +#if (! STRUCT_VALUE_ADDRESS_WITH_ARG) + mov r10, r2 +#endif + mov.l @(28,r14),r1 + jsr @r1 + nop + +L_ret_d: + mov #FFI_TYPE_DOUBLE,r2 + cmp/eq r2,r9 + bf L_ret_ll + + mov.l @(24,r14),r1 +#ifdef __LITTLE_ENDIAN__ + fmov.s fr1,@r1 + add #4,r1 + bra L_epilogue + fmov.s fr0,@r1 +#else + fmov.s fr0,@r1 + add #4,r1 + bra L_epilogue + fmov.s fr1,@r1 +#endif + +L_ret_ll: + mov #FFI_TYPE_SINT64,r2 + cmp/eq r2,r9 + bt/s 1f + mov #FFI_TYPE_UINT64,r2 + cmp/eq r2,r9 + bf L_ret_f + +1: + mov.l @(24,r14),r2 + mov.l r0,@r2 + bra L_epilogue + mov.l r1,@(4,r2) + +L_ret_f: + mov #FFI_TYPE_FLOAT,r2 + cmp/eq r2,r9 + bf L_ret_i + + mov.l @(24,r14),r1 + bra L_epilogue + fmov.s fr0,@r1 + +L_ret_i: + mov #FFI_TYPE_INT,r2 + cmp/eq r2,r9 + bf L_epilogue + + mov.l @(24,r14),r1 + bra L_epilogue + mov.l r0,@r1 + +L_epilogue: + # Remove the space we pushed for the args + mov r14,r15 + + lds.l @r15+,pr + mov.l @r15+,r14 + mov.l @r15+,r12 + mov.l @r15+,r10 + mov.l @r15+,r9 + rts + mov.l @r15+,r8 +#else + mov r6,r8 + mov r7,r9 + + sub r6,r15 + add #-16,r15 + mov #~7,r0 + and r0,r15 + + mov r4,r0 + jsr @r0 + mov r15,r4 + + mov r9,r3 + shlr8 r9 + shlr8 r9 + shlr8 r9 + + mov #FFI_TYPE_STRUCT,r2 + cmp/eq r2,r9 + bf 1f +#if STRUCT_VALUE_ADDRESS_WITH_ARG + mov.l @r15+,r4 + bra 2f + mov #5,r2 +#else + mov.l @r15+,r10 +#endif +1: + mov #4,r2 +2: + +L_pass: + cmp/pl r8 + bf L_call_it + + mov r3,r0 + and #3,r0 + +L_pass_d: + cmp/eq #FFI_TYPE_DOUBLE,r0 + bf L_pass_i + + mov r15,r0 + and #7,r0 + tst r0,r0 + bt 1f + add #4,r15 +1: + mov #8,r0 + cmp/hs r0,r2 + bt/s 2f + shlr2 r3 + bsr L_pop_d + nop +2: + add #2,r2 + bra L_pass + add #-8,r8 + +L_pop_d: + mov r2,r0 + add r0,r0 + add r2,r0 + add #-12,r0 + add r0,r0 + braf r0 + nop + mov.l @r15+,r4 + rts + mov.l @r15+,r5 + mov.l @r15+,r5 + rts + mov.l @r15+,r6 + mov.l @r15+,r6 + rts + mov.l @r15+,r7 + rts + mov.l @r15+,r7 + +L_pass_i: + cmp/eq #FFI_TYPE_INT,r0 + bf L_call_it + + mov #8,r0 + cmp/hs r0,r2 + bt/s 2f + shlr2 r3 + bsr L_pop_i + nop +2: + add #1,r2 + bra L_pass + add #-4,r8 + +L_pop_i: + mov r2,r0 + shll2 r0 + add #-16,r0 + braf r0 + nop + rts + mov.l @r15+,r4 + rts + mov.l @r15+,r5 + rts + mov.l @r15+,r6 + rts + mov.l @r15+,r7 + +L_call_it: + # call function +#if (! STRUCT_VALUE_ADDRESS_WITH_ARG) + mov r10, r2 +#endif + mov.l @(28,r14),r1 + jsr @r1 + nop + +L_ret_d: + mov #FFI_TYPE_DOUBLE,r2 + cmp/eq r2,r9 + bf L_ret_ll + + mov.l @(24,r14),r2 + mov.l r0,@r2 + bra L_epilogue + mov.l r1,@(4,r2) + +L_ret_ll: + mov #FFI_TYPE_SINT64,r2 + cmp/eq r2,r9 + bt/s 1f + mov #FFI_TYPE_UINT64,r2 + cmp/eq r2,r9 + bf L_ret_i + +1: + mov.l @(24,r14),r2 + mov.l r0,@r2 + bra L_epilogue + mov.l r1,@(4,r2) + +L_ret_i: + mov #FFI_TYPE_FLOAT,r2 + cmp/eq r2,r9 + bt 1f + mov #FFI_TYPE_INT,r2 + cmp/eq r2,r9 + bf L_epilogue +1: + mov.l @(24,r14),r1 + bra L_epilogue + mov.l r0,@r1 + +L_epilogue: + # Remove the space we pushed for the args + mov r14,r15 + + lds.l @r15+,pr + mov.l @r15+,r14 + mov.l @r15+,r12 + mov.l @r15+,r10 + mov.l @r15+,r9 + rts + mov.l @r15+,r8 +#endif +.LFE1: +.ffi_call_SYSV_end: + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + +.globl ffi_closure_helper_SYSV + +ENTRY(ffi_closure_SYSV) +.LFB2: + mov.l r7,@-r15 +.LCFI7: + mov.l r6,@-r15 +.LCFI8: + mov.l r5,@-r15 +.LCFI9: + mov.l r4,@-r15 +.LCFIA: + mov.l r14,@-r15 +.LCFIB: + sts.l pr,@-r15 + + /* Stack layout: + xx bytes (on stack parameters) + 16 bytes (register parameters) + 4 bytes (saved frame pointer) + 4 bytes (saved return address) + 32 bytes (floating register parameters, SH-4 only) + 8 bytes (result) + 4 bytes (pad) + 4 bytes (5th arg) + <- new stack pointer + */ +.LCFIC: +#if defined(__SH4__) + add #-48,r15 +#else + add #-16,r15 +#endif +.LCFID: + mov r15,r14 +.LCFIE: + +#if defined(__SH4__) + mov r14,r1 + add #48,r1 +#ifdef __LITTLE_ENDIAN__ + fmov.s fr10,@-r1 + fmov.s fr11,@-r1 + fmov.s fr8,@-r1 + fmov.s fr9,@-r1 + fmov.s fr6,@-r1 + fmov.s fr7,@-r1 + fmov.s fr4,@-r1 + fmov.s fr5,@-r1 +#else + fmov.s fr11,@-r1 + fmov.s fr10,@-r1 + fmov.s fr9,@-r1 + fmov.s fr8,@-r1 + fmov.s fr7,@-r1 + fmov.s fr6,@-r1 + fmov.s fr5,@-r1 + fmov.s fr4,@-r1 +#endif + mov r1,r7 + mov r14,r6 + add #56,r6 +#else + mov r14,r6 + add #24,r6 +#endif + + bt/s 10f + mov r2, r5 + mov r14,r1 + add #8,r1 + mov r1,r5 +10: + + mov r14,r1 +#if defined(__SH4__) + add #72,r1 +#else + add #40,r1 +#endif + mov.l r1,@r14 + +#ifdef PIC + mov.l L_got,r1 + mova L_got,r0 + add r0,r1 + mov.l L_helper,r0 + add r1,r0 +#else + mov.l L_helper,r0 +#endif + jsr @r0 + mov r3,r4 + + shll r0 + mov r0,r1 + mova L_table,r0 + add r1,r0 + mov.w @r0,r0 + mov r14,r2 + braf r0 + add #8,r2 +0: + .align 2 +#ifdef PIC +L_got: + .long _GLOBAL_OFFSET_TABLE_ +L_helper: + .long ffi_closure_helper_SYSV@GOTOFF +#else +L_helper: + .long ffi_closure_helper_SYSV +#endif +L_table: + .short L_case_v - 0b /* FFI_TYPE_VOID */ + .short L_case_i - 0b /* FFI_TYPE_INT */ +#if defined(__SH4__) + .short L_case_f - 0b /* FFI_TYPE_FLOAT */ + .short L_case_d - 0b /* FFI_TYPE_DOUBLE */ + .short L_case_d - 0b /* FFI_TYPE_LONGDOUBLE */ +#else + .short L_case_i - 0b /* FFI_TYPE_FLOAT */ + .short L_case_ll - 0b /* FFI_TYPE_DOUBLE */ + .short L_case_ll - 0b /* FFI_TYPE_LONGDOUBLE */ +#endif + .short L_case_uq - 0b /* FFI_TYPE_UINT8 */ + .short L_case_q - 0b /* FFI_TYPE_SINT8 */ + .short L_case_uh - 0b /* FFI_TYPE_UINT16 */ + .short L_case_h - 0b /* FFI_TYPE_SINT16 */ + .short L_case_i - 0b /* FFI_TYPE_UINT32 */ + .short L_case_i - 0b /* FFI_TYPE_SINT32 */ + .short L_case_ll - 0b /* FFI_TYPE_UINT64 */ + .short L_case_ll - 0b /* FFI_TYPE_SINT64 */ + .short L_case_v - 0b /* FFI_TYPE_STRUCT */ + .short L_case_i - 0b /* FFI_TYPE_POINTER */ + +#if defined(__SH4__) +L_case_d: +#ifdef __LITTLE_ENDIAN__ + fmov.s @r2+,fr1 + bra L_case_v + fmov.s @r2,fr0 +#else + fmov.s @r2+,fr0 + bra L_case_v + fmov.s @r2,fr1 +#endif + +L_case_f: + bra L_case_v + fmov.s @r2,fr0 +#endif + +L_case_ll: + mov.l @r2+,r0 + bra L_case_v + mov.l @r2,r1 + +L_case_i: + bra L_case_v + mov.l @r2,r0 + +L_case_q: +#ifdef __LITTLE_ENDIAN__ +#else + add #3,r2 +#endif + bra L_case_v + mov.b @r2,r0 + +L_case_uq: +#ifdef __LITTLE_ENDIAN__ +#else + add #3,r2 +#endif + mov.b @r2,r0 + bra L_case_v + extu.b r0,r0 + +L_case_h: +#ifdef __LITTLE_ENDIAN__ +#else + add #2,r2 +#endif + bra L_case_v + mov.w @r2,r0 + +L_case_uh: +#ifdef __LITTLE_ENDIAN__ +#else + add #2,r2 +#endif + mov.w @r2,r0 + extu.w r0,r0 + /* fall through */ + +L_case_v: +#if defined(__SH4__) + add #48,r15 +#else + add #16,r15 +#endif + lds.l @r15+,pr + mov.l @r15+,r14 + rts + add #16,r15 +.LFE2: +.ffi_closure_SYSV_end: + .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif + + .section ".eh_frame","aw",@progbits +__FRAME_BEGIN__: + .4byte .LECIE1-.LSCIE1 /* Length of Common Information Entry */ +.LSCIE1: + .4byte 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ +#ifdef PIC + .ascii "zR\0" /* CIE Augmentation */ +#else + .byte 0x0 /* CIE Augmentation */ +#endif + .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ + .byte 0x7c /* sleb128 -4; CIE Data Alignment Factor */ + .byte 0x11 /* CIE RA Column */ +#ifdef PIC + .uleb128 0x1 /* Augmentation size */ + .byte 0x10 /* FDE Encoding (pcrel) */ +#endif + .byte 0xc /* DW_CFA_def_cfa */ + .byte 0xf /* uleb128 0xf */ + .byte 0x0 /* uleb128 0x0 */ + .align 2 +.LECIE1: +.LSFDE1: + .4byte .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .4byte .LASFDE1-__FRAME_BEGIN__ /* FDE CIE offset */ +#ifdef PIC + .4byte .LFB1-. /* FDE initial location */ +#else + .4byte .LFB1 /* FDE initial location */ +#endif + .4byte .LFE1-.LFB1 /* FDE address range */ +#ifdef PIC + .uleb128 0x0 /* Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI0-.LFB1 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x4 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI1-.LCFI0 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI2-.LCFI1 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0xc /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI3-.LCFI2 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x10 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI4-.LCFI3 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x14 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI5-.LCFI4 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x18 /* uleb128 0x4 */ + .byte 0x91 /* DW_CFA_offset, column 0x11 */ + .byte 0x6 /* uleb128 0x6 */ + .byte 0x8e /* DW_CFA_offset, column 0xe */ + .byte 0x5 /* uleb128 0x5 */ + .byte 0x8c /* DW_CFA_offset, column 0xc */ + .byte 0x4 /* uleb128 0x4 */ + .byte 0x8a /* DW_CFA_offset, column 0xa */ + .byte 0x3 /* uleb128 0x3 */ + .byte 0x89 /* DW_CFA_offset, column 0x9 */ + .byte 0x2 /* uleb128 0x2 */ + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .byte 0x1 /* uleb128 0x1 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI6-.LCFI5 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0xe /* uleb128 0xe */ + .align 2 +.LEFDE1: + +.LSFDE3: + .4byte .LEFDE3-.LASFDE3 /* FDE Length */ +.LASFDE3: + .4byte .LASFDE3-__FRAME_BEGIN__ /* FDE CIE offset */ +#ifdef PIC + .4byte .LFB2-. /* FDE initial location */ +#else + .4byte .LFB2 /* FDE initial location */ +#endif + .4byte .LFE2-.LFB2 /* FDE address range */ +#ifdef PIC + .uleb128 0x0 /* Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI7-.LFB2 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x4 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI8-.LCFI7 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFI9-.LCFI8 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0xc /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFIA-.LCFI9 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x10 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFIB-.LCFIA + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x14 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFIC-.LCFIB + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x18 /* uleb128 0x4 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFID-.LCFIC + .byte 0xe /* DW_CFA_def_cfa_offset */ +#if defined(__SH4__) + .byte 24+48 /* uleb128 24+48 */ +#else + .byte 24+16 /* uleb128 24+16 */ +#endif + .byte 0x91 /* DW_CFA_offset, column 0x11 */ + .byte 0x6 /* uleb128 0x6 */ + .byte 0x8e /* DW_CFA_offset, column 0xe */ + .byte 0x5 /* uleb128 0x5 */ + .byte 0x84 /* DW_CFA_offset, column 0x4 */ + .byte 0x4 /* uleb128 0x4 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x3 /* uleb128 0x3 */ + .byte 0x86 /* DW_CFA_offset, column 0x6 */ + .byte 0x2 /* uleb128 0x2 */ + .byte 0x87 /* DW_CFA_offset, column 0x7 */ + .byte 0x1 /* uleb128 0x1 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte .LCFIE-.LCFID + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0xe /* uleb128 0xe */ + .align 2 +.LEFDE3: diff --git a/libffi/src/sh64/ffi.c b/libffi/src/sh64/ffi.c new file mode 100644 index 000000000..8fbc05ca6 --- /dev/null +++ b/libffi/src/sh64/ffi.c @@ -0,0 +1,468 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2003, 2004, 2006, 2007 Kaz Kojima + Copyright (c) 2008 Anthony Green + + SuperH SHmedia Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +#define NGREGARG 8 +#define NFREGARG 12 + +static int +return_type (ffi_type *arg) +{ + + if (arg->type != FFI_TYPE_STRUCT) + return arg->type; + + /* gcc uses r2 if the result can be packed in on register. */ + if (arg->size <= sizeof (UINT8)) + return FFI_TYPE_UINT8; + else if (arg->size <= sizeof (UINT16)) + return FFI_TYPE_UINT16; + else if (arg->size <= sizeof (UINT32)) + return FFI_TYPE_UINT32; + else if (arg->size <= sizeof (UINT64)) + return FFI_TYPE_UINT64; + + return FFI_TYPE_STRUCT; +} + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +void ffi_prep_args(char *stack, extended_cif *ecif) +{ + register unsigned int i; + register unsigned int avn; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + + if (return_type (ecif->cif->rtype) == FFI_TYPE_STRUCT) + { + *(void **) argp = ecif->rvalue; + argp += sizeof (UINT64); + } + + avn = ecif->cif->nargs; + p_argv = ecif->avalue; + + for (i = 0, p_arg = ecif->cif->arg_types; i < avn; i++, p_arg++, p_argv++) + { + size_t z; + int align; + + z = (*p_arg)->size; + align = (*p_arg)->alignment; + if (z < sizeof (UINT32)) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(SINT64 *) argp = (SINT64) *(SINT8 *)(*p_argv); + break; + + case FFI_TYPE_UINT8: + *(UINT64 *) argp = (UINT64) *(UINT8 *)(*p_argv); + break; + + case FFI_TYPE_SINT16: + *(SINT64 *) argp = (SINT64) *(SINT16 *)(*p_argv); + break; + + case FFI_TYPE_UINT16: + *(UINT64 *) argp = (UINT64) *(UINT16 *)(*p_argv); + break; + + case FFI_TYPE_STRUCT: + memcpy (argp, *p_argv, z); + break; + + default: + FFI_ASSERT(0); + } + argp += sizeof (UINT64); + } + else if (z == sizeof (UINT32) && align == sizeof (UINT32)) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + *(SINT64 *) argp = (SINT64) *(SINT32 *) (*p_argv); + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT32: + case FFI_TYPE_STRUCT: + *(UINT64 *) argp = (UINT64) *(UINT32 *) (*p_argv); + break; + + default: + FFI_ASSERT(0); + break; + } + argp += sizeof (UINT64); + } + else if (z == sizeof (UINT64) + && align == sizeof (UINT64) + && ((int) *p_argv & (sizeof (UINT64) - 1)) == 0) + { + *(UINT64 *) argp = *(UINT64 *) (*p_argv); + argp += sizeof (UINT64); + } + else + { + int n = (z + sizeof (UINT64) - 1) / sizeof (UINT64); + + memcpy (argp, *p_argv, z); + argp += n * sizeof (UINT64); + } + } + + return; +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + int i, j; + int size, type; + int n, m; + int greg; + int freg; + int fpair = -1; + + greg = (return_type (cif->rtype) == FFI_TYPE_STRUCT ? 1 : 0); + freg = 0; + cif->flags2 = 0; + + for (i = j = 0; i < cif->nargs; i++) + { + type = (cif->arg_types)[i]->type; + switch (type) + { + case FFI_TYPE_FLOAT: + greg++; + cif->bytes += sizeof (UINT64) - sizeof (float); + if (freg >= NFREGARG - 1) + continue; + if (fpair < 0) + { + fpair = freg; + freg += 2; + } + else + fpair = -1; + cif->flags2 += ((cif->arg_types)[i]->type) << (2 * j++); + break; + + case FFI_TYPE_DOUBLE: + if (greg++ >= NGREGARG && (freg + 1) >= NFREGARG) + continue; + if ((freg + 1) < NFREGARG) + { + freg += 2; + cif->flags2 += ((cif->arg_types)[i]->type) << (2 * j++); + } + else + cif->flags2 += FFI_TYPE_INT << (2 * j++); + break; + + default: + size = (cif->arg_types)[i]->size; + if (size < sizeof (UINT64)) + cif->bytes += sizeof (UINT64) - size; + n = (size + sizeof (UINT64) - 1) / sizeof (UINT64); + if (greg >= NGREGARG) + continue; + else if (greg + n - 1 >= NGREGARG) + greg = NGREGARG; + else + greg += n; + for (m = 0; m < n; m++) + cif->flags2 += FFI_TYPE_INT << (2 * j++); + break; + } + } + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_STRUCT: + cif->flags = return_type (cif->rtype); + break; + + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags = cif->rtype->type; + break; + + default: + cif->flags = FFI_TYPE_INT; + break; + } + + return FFI_OK; +} + +/*@-declundef@*/ +/*@-exportheader@*/ +extern void ffi_call_SYSV(void (*)(char *, extended_cif *), + /*@out@*/ extended_cif *, + unsigned, unsigned, long long, + /*@out@*/ unsigned *, + void (*fn)(void)); +/*@=declundef@*/ +/*@=exportheader@*/ + +void ffi_call(/*@dependent@*/ ffi_cif *cif, + void (*fn)(void), + /*@out@*/ void *rvalue, + /*@dependent@*/ void **avalue) +{ + extended_cif ecif; + UINT64 trvalue; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if (cif->rtype->type == FFI_TYPE_STRUCT + && return_type (cif->rtype) != FFI_TYPE_STRUCT) + ecif.rvalue = &trvalue; + else if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, cif->flags2, + ecif.rvalue, fn); + break; + default: + FFI_ASSERT(0); + break; + } + + if (rvalue + && cif->rtype->type == FFI_TYPE_STRUCT + && return_type (cif->rtype) != FFI_TYPE_STRUCT) + memcpy (rvalue, &trvalue, cif->rtype->size); +} + +extern void ffi_closure_SYSV (void); +extern void __ic_invalidate (void *line); + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp; + + FFI_ASSERT (cif->abi == FFI_GCC_SYSV); + + tramp = (unsigned int *) &closure->tramp[0]; + /* Since ffi_closure is an aligned object, the ffi trampoline is + called as an SHcompact code. Sigh. + SHcompact part: + mova @(1,pc),r0; add #1,r0; jmp @r0; nop; + SHmedia part: + movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0 + movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */ +#ifdef __LITTLE_ENDIAN__ + tramp[0] = 0x7001c701; + tramp[1] = 0x0009402b; +#else + tramp[0] = 0xc7017001; + tramp[1] = 0x402b0009; +#endif + tramp[2] = 0xcc000010 | (((UINT32) ffi_closure_SYSV) >> 16) << 10; + tramp[3] = 0xc8000010 | (((UINT32) ffi_closure_SYSV) & 0xffff) << 10; + tramp[4] = 0x6bf10600; + tramp[5] = 0xcc000010 | (((UINT32) codeloc) >> 16) << 10; + tramp[6] = 0xc8000010 | (((UINT32) codeloc) & 0xffff) << 10; + tramp[7] = 0x4401fff0; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Flush the icache. */ + asm volatile ("ocbwb %0,0; synco; icbi %1,0; synci" : : "r" (tramp), + "r"(codeloc)); + + return FFI_OK; +} + +/* Basically the trampoline invokes ffi_closure_SYSV, and on + * entry, r3 holds the address of the closure. + * After storing the registers that could possibly contain + * parameters to be passed into the stack frame and setting + * up space for a return value, ffi_closure_SYSV invokes the + * following helper function to do most of the work. + */ + +int +ffi_closure_helper_SYSV (ffi_closure *closure, UINT64 *rvalue, + UINT64 *pgr, UINT64 *pfr, UINT64 *pst) +{ + void **avalue; + ffi_type **p_arg; + int i, avn; + int greg, freg; + ffi_cif *cif; + int fpair = -1; + + cif = closure->cif; + avalue = alloca (cif->nargs * sizeof (void *)); + + /* Copy the caller's structure return value address so that the closure + returns the data directly to the caller. */ + if (return_type (cif->rtype) == FFI_TYPE_STRUCT) + { + rvalue = (UINT64 *) *pgr; + greg = 1; + } + else + greg = 0; + + freg = 0; + cif = closure->cif; + avn = cif->nargs; + + /* Grab the addresses of the arguments from the stack frame. */ + for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++) + { + size_t z; + void *p; + + z = (*p_arg)->size; + if (z < sizeof (UINT32)) + { + p = pgr + greg++; + + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + case FFI_TYPE_STRUCT: +#ifdef __LITTLE_ENDIAN__ + avalue[i] = p; +#else + avalue[i] = ((char *) p) + sizeof (UINT32) - z; +#endif + break; + + default: + FFI_ASSERT(0); + } + } + else if (z == sizeof (UINT32)) + { + if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + if (freg < NFREGARG - 1) + { + if (fpair >= 0) + { + avalue[i] = (UINT32 *) pfr + fpair; + fpair = -1; + } + else + { +#ifdef __LITTLE_ENDIAN__ + fpair = freg; + avalue[i] = (UINT32 *) pfr + (1 ^ freg); +#else + fpair = 1 ^ freg; + avalue[i] = (UINT32 *) pfr + freg; +#endif + freg += 2; + } + } + else +#ifdef __LITTLE_ENDIAN__ + avalue[i] = pgr + greg; +#else + avalue[i] = (UINT32 *) (pgr + greg) + 1; +#endif + } + else +#ifdef __LITTLE_ENDIAN__ + avalue[i] = pgr + greg; +#else + avalue[i] = (UINT32 *) (pgr + greg) + 1; +#endif + greg++; + } + else if ((*p_arg)->type == FFI_TYPE_DOUBLE) + { + if (freg + 1 >= NFREGARG) + avalue[i] = pgr + greg; + else + { + avalue[i] = pfr + (freg >> 1); + freg += 2; + } + greg++; + } + else + { + int n = (z + sizeof (UINT64) - 1) / sizeof (UINT64); + + avalue[i] = pgr + greg; + greg += n; + } + } + + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_SYSV how to perform return type promotions. */ + return return_type (cif->rtype); +} + diff --git a/libffi/src/sh64/ffitarget.h b/libffi/src/sh64/ffitarget.h new file mode 100644 index 000000000..4e922fc79 --- /dev/null +++ b/libffi/src/sh64/ffitarget.h @@ -0,0 +1,53 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for SuperH - SHmedia. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- Generic type definitions ----------------------------------------- */ + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; + +#define FFI_EXTRA_CIF_FIELDS long long flags2 +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 32 +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/libffi/src/sh64/sysv.S b/libffi/src/sh64/sysv.S new file mode 100644 index 000000000..c4587d5f3 --- /dev/null +++ b/libffi/src/sh64/sysv.S @@ -0,0 +1,539 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2003, 2004, 2006, 2008 Kaz Kojima + + SuperH SHmedia Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +/* XXX these lose for some platforms, I'm sure. */ +#define CNAME(x) x +#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x): +#endif + +#ifdef __LITTLE_ENDIAN__ +#define OFS_FLT 0 +#else +#define OFS_FLT 4 +#endif + + .section .text..SHmedia32,"ax" + + # r2: ffi_prep_args + # r3: &ecif + # r4: bytes + # r5: flags + # r6: flags2 + # r7: rvalue + # r8: fn + + # This assumes we are using gas. + .align 5 +ENTRY(ffi_call_SYSV) + # Save registers +.LFB1: + addi.l r15, -48, r15 +.LCFI0: + st.q r15, 40, r32 + st.q r15, 32, r31 + st.q r15, 24, r30 + st.q r15, 16, r29 + st.q r15, 8, r28 + st.l r15, 4, r18 + st.l r15, 0, r14 +.LCFI1: + add.l r15, r63, r14 +.LCFI2: +# add r4, r63, r28 + add r5, r63, r29 + add r6, r63, r30 + add r7, r63, r31 + add r8, r63, r32 + + addi r4, (64 + 7), r4 + andi r4, ~7, r4 + sub.l r15, r4, r15 + + ptabs/l r2, tr0 + add r15, r63, r2 + blink tr0, r18 + + addi r15, 64, r22 + movi 0, r0 + movi 0, r1 + movi -1, r23 + + pt/l 1f, tr1 + bnei/l r29, FFI_TYPE_STRUCT, tr1 + ld.l r15, 0, r19 + addi r15, 8, r15 + addi r0, 1, r0 +1: + +.L_pass: + andi r30, 3, r20 + shlri r30, 2, r30 + + pt/l .L_call_it, tr0 + pt/l .L_pass_i, tr1 + pt/l .L_pass_f, tr2 + + beqi/l r20, FFI_TYPE_VOID, tr0 + beqi/l r20, FFI_TYPE_INT, tr1 + beqi/l r20, FFI_TYPE_FLOAT, tr2 + +.L_pass_d: + addi r0, 1, r0 + pt/l 3f, tr0 + movi 12, r20 + bge/l r1, r20, tr0 + + pt/l .L_pop_d, tr1 + pt/l 2f, tr0 + blink tr1, r63 +2: + addi.l r15, 8, r15 +3: + pt/l .L_pass, tr0 + addi r1, 2, r1 + blink tr0, r63 + +.L_pop_d: + pt/l .L_pop_d_tbl, tr1 + gettr tr1, r20 + shlli r1, 2, r21 + add r20, r21, r20 + ptabs/l r20, tr1 + blink tr1, r63 + +.L_pop_d_tbl: + fld.d r15, 0, dr0 + blink tr0, r63 + fld.d r15, 0, dr2 + blink tr0, r63 + fld.d r15, 0, dr4 + blink tr0, r63 + fld.d r15, 0, dr6 + blink tr0, r63 + fld.d r15, 0, dr8 + blink tr0, r63 + fld.d r15, 0, dr10 + blink tr0, r63 + +.L_pass_f: + addi r0, 1, r0 + pt/l 3f, tr0 + movi 12, r20 + bge/l r1, r20, tr0 + + pt/l .L_pop_f, tr1 + pt/l 2f, tr0 + blink tr1, r63 +2: + addi.l r15, 8, r15 +3: + pt/l .L_pass, tr0 + blink tr0, r63 + +.L_pop_f: + pt/l .L_pop_f_tbl, tr1 + pt/l 5f, tr2 + gettr tr1, r20 + bge/l r23, r63, tr2 + add r1, r63, r23 + shlli r1, 3, r21 + addi r1, 2, r1 + add r20, r21, r20 + ptabs/l r20, tr1 + blink tr1, r63 +5: + addi r23, 1, r21 + movi -1, r23 + shlli r21, 3, r21 + add r20, r21, r20 + ptabs/l r20, tr1 + blink tr1, r63 + +.L_pop_f_tbl: + fld.s r15, OFS_FLT, fr0 + blink tr0, r63 + fld.s r15, OFS_FLT, fr1 + blink tr0, r63 + fld.s r15, OFS_FLT, fr2 + blink tr0, r63 + fld.s r15, OFS_FLT, fr3 + blink tr0, r63 + fld.s r15, OFS_FLT, fr4 + blink tr0, r63 + fld.s r15, OFS_FLT, fr5 + blink tr0, r63 + fld.s r15, OFS_FLT, fr6 + blink tr0, r63 + fld.s r15, OFS_FLT, fr7 + blink tr0, r63 + fld.s r15, OFS_FLT, fr8 + blink tr0, r63 + fld.s r15, OFS_FLT, fr9 + blink tr0, r63 + fld.s r15, OFS_FLT, fr10 + blink tr0, r63 + fld.s r15, OFS_FLT, fr11 + blink tr0, r63 + +.L_pass_i: + pt/l 3f, tr0 + movi 8, r20 + bge/l r0, r20, tr0 + + pt/l .L_pop_i, tr1 + pt/l 2f, tr0 + blink tr1, r63 +2: + addi.l r15, 8, r15 +3: + pt/l .L_pass, tr0 + addi r0, 1, r0 + blink tr0, r63 + +.L_pop_i: + pt/l .L_pop_i_tbl, tr1 + gettr tr1, r20 + shlli r0, 3, r21 + add r20, r21, r20 + ptabs/l r20, tr1 + blink tr1, r63 + +.L_pop_i_tbl: + ld.q r15, 0, r2 + blink tr0, r63 + ld.q r15, 0, r3 + blink tr0, r63 + ld.q r15, 0, r4 + blink tr0, r63 + ld.q r15, 0, r5 + blink tr0, r63 + ld.q r15, 0, r6 + blink tr0, r63 + ld.q r15, 0, r7 + blink tr0, r63 + ld.q r15, 0, r8 + blink tr0, r63 + ld.q r15, 0, r9 + blink tr0, r63 + +.L_call_it: + # call function + pt/l 1f, tr1 + bnei/l r29, FFI_TYPE_STRUCT, tr1 + add r19, r63, r2 +1: + add r22, r63, r15 + ptabs/l r32, tr0 + blink tr0, r18 + + pt/l .L_ret_i, tr0 + pt/l .L_ret_ll, tr1 + pt/l .L_ret_d, tr2 + pt/l .L_ret_f, tr3 + pt/l .L_epilogue, tr4 + + beqi/l r29, FFI_TYPE_INT, tr0 + beqi/l r29, FFI_TYPE_UINT32, tr0 + beqi/l r29, FFI_TYPE_SINT64, tr1 + beqi/l r29, FFI_TYPE_UINT64, tr1 + beqi/l r29, FFI_TYPE_DOUBLE, tr2 + beqi/l r29, FFI_TYPE_FLOAT, tr3 + + pt/l .L_ret_q, tr0 + pt/l .L_ret_h, tr1 + + beqi/l r29, FFI_TYPE_UINT8, tr0 + beqi/l r29, FFI_TYPE_UINT16, tr1 + blink tr4, r63 + +.L_ret_d: + fst.d r31, 0, dr0 + blink tr4, r63 + +.L_ret_ll: + st.q r31, 0, r2 + blink tr4, r63 + +.L_ret_f: + fst.s r31, OFS_FLT, fr0 + blink tr4, r63 + +.L_ret_q: + st.b r31, 0, r2 + blink tr4, r63 + +.L_ret_h: + st.w r31, 0, r2 + blink tr4, r63 + +.L_ret_i: + st.l r31, 0, r2 + # Fall + +.L_epilogue: + # Remove the space we pushed for the args + add r14, r63, r15 + + ld.l r15, 0, r14 + ld.l r15, 4, r18 + ld.q r15, 8, r28 + ld.q r15, 16, r29 + ld.q r15, 24, r30 + ld.q r15, 32, r31 + ld.q r15, 40, r32 + addi.l r15, 48, r15 + ptabs r18, tr0 + blink tr0, r63 + +.LFE1: +.ffi_call_SYSV_end: + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + + .align 5 +ENTRY(ffi_closure_SYSV) +.LFB2: + addi.l r15, -136, r15 +.LCFI3: + st.l r15, 12, r18 + st.l r15, 8, r14 + st.l r15, 4, r12 +.LCFI4: + add r15, r63, r14 +.LCFI5: + /* Stack layout: + ... + 64 bytes (register parameters) + 48 bytes (floating register parameters) + 8 bytes (result) + 4 bytes (r18) + 4 bytes (r14) + 4 bytes (r12) + 4 bytes (for align) + <- new stack pointer + */ + fst.d r14, 24, dr0 + fst.d r14, 32, dr2 + fst.d r14, 40, dr4 + fst.d r14, 48, dr6 + fst.d r14, 56, dr8 + fst.d r14, 64, dr10 + st.q r14, 72, r2 + st.q r14, 80, r3 + st.q r14, 88, r4 + st.q r14, 96, r5 + st.q r14, 104, r6 + st.q r14, 112, r7 + st.q r14, 120, r8 + st.q r14, 128, r9 + + add r1, r63, r2 + addi r14, 16, r3 + addi r14, 72, r4 + addi r14, 24, r5 + addi r14, 136, r6 +#ifdef PIC + movi (((datalabel _GLOBAL_OFFSET_TABLE_-(.LPCS0-.)) >> 16) & 65535), r12 + shori ((datalabel _GLOBAL_OFFSET_TABLE_-(.LPCS0-.)) & 65535), r12 +.LPCS0: ptrel/u r12, tr0 + movi ((ffi_closure_helper_SYSV@GOTPLT) & 65535), r1 + gettr tr0, r12 + ldx.l r1, r12, r1 + ptabs r1, tr0 +#else + pt/l ffi_closure_helper_SYSV, tr0 +#endif + blink tr0, r18 + + shlli r2, 1, r1 + movi (((datalabel .L_table) >> 16) & 65535), r2 + shori ((datalabel .L_table) & 65535), r2 + ldx.w r2, r1, r1 + add r1, r2, r1 + pt/l .L_case_v, tr1 + ptabs r1, tr0 + blink tr0, r63 + + .align 2 +.L_table: + .word .L_case_v - datalabel .L_table /* FFI_TYPE_VOID */ + .word .L_case_i - datalabel .L_table /* FFI_TYPE_INT */ + .word .L_case_f - datalabel .L_table /* FFI_TYPE_FLOAT */ + .word .L_case_d - datalabel .L_table /* FFI_TYPE_DOUBLE */ + .word .L_case_d - datalabel .L_table /* FFI_TYPE_LONGDOUBLE */ + .word .L_case_uq - datalabel .L_table /* FFI_TYPE_UINT8 */ + .word .L_case_q - datalabel .L_table /* FFI_TYPE_SINT8 */ + .word .L_case_uh - datalabel .L_table /* FFI_TYPE_UINT16 */ + .word .L_case_h - datalabel .L_table /* FFI_TYPE_SINT16 */ + .word .L_case_i - datalabel .L_table /* FFI_TYPE_UINT32 */ + .word .L_case_i - datalabel .L_table /* FFI_TYPE_SINT32 */ + .word .L_case_ll - datalabel .L_table /* FFI_TYPE_UINT64 */ + .word .L_case_ll - datalabel .L_table /* FFI_TYPE_SINT64 */ + .word .L_case_v - datalabel .L_table /* FFI_TYPE_STRUCT */ + .word .L_case_i - datalabel .L_table /* FFI_TYPE_POINTER */ + + .align 2 +.L_case_d: + fld.d r14, 16, dr0 + blink tr1, r63 +.L_case_f: + fld.s r14, 16, fr0 + blink tr1, r63 +.L_case_ll: + ld.q r14, 16, r2 + blink tr1, r63 +.L_case_i: + ld.l r14, 16, r2 + blink tr1, r63 +.L_case_q: + ld.b r14, 16, r2 + blink tr1, r63 +.L_case_uq: + ld.ub r14, 16, r2 + blink tr1, r63 +.L_case_h: + ld.w r14, 16, r2 + blink tr1, r63 +.L_case_uh: + ld.uw r14, 16, r2 + blink tr1, r63 +.L_case_v: + add.l r14, r63, r15 + ld.l r15, 4, r12 + ld.l r15, 8, r14 + ld.l r15, 12, r18 + addi.l r15, 136, r15 + ptabs r18, tr0 + blink tr0, r63 + +.LFE2: +.ffi_closure_SYSV_end: + .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif + + .section ".eh_frame","aw",@progbits +__FRAME_BEGIN__: + .4byte .LECIE1-.LSCIE1 /* Length of Common Information Entry */ +.LSCIE1: + .4byte 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ +#ifdef PIC + .ascii "zR\0" /* CIE Augmentation */ +#else + .byte 0x0 /* CIE Augmentation */ +#endif + .uleb128 0x1 /* CIE Code Alignment Factor */ + .sleb128 -4 /* CIE Data Alignment Factor */ + .byte 0x12 /* CIE RA Column */ +#ifdef PIC + .uleb128 0x1 /* Augmentation size */ + .byte 0x10 /* FDE Encoding (pcrel) */ +#endif + .byte 0xc /* DW_CFA_def_cfa */ + .uleb128 0xf + .uleb128 0x0 + .align 2 +.LECIE1: +.LSFDE1: + .4byte datalabel .LEFDE1-datalabel .LASFDE1 /* FDE Length */ +.LASFDE1: + .4byte datalabel .LASFDE1-datalabel __FRAME_BEGIN__ +#ifdef PIC + .4byte .LFB1-. /* FDE initial location */ +#else + .4byte .LFB1 /* FDE initial location */ +#endif + .4byte datalabel .LFE1-datalabel .LFB1 /* FDE address range */ +#ifdef PIC + .uleb128 0x0 /* Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte datalabel .LCFI0-datalabel .LFB1 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 0x30 + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte datalabel .LCFI1-datalabel .LCFI0 + .byte 0x8e /* DW_CFA_offset, column 0xe */ + .uleb128 0xc + .byte 0x92 /* DW_CFA_offset, column 0x12 */ + .uleb128 0xb + .byte 0x9c /* DW_CFA_offset, column 0x1c */ + .uleb128 0xa + .byte 0x9d /* DW_CFA_offset, column 0x1d */ + .uleb128 0x8 + .byte 0x9e /* DW_CFA_offset, column 0x1e */ + .uleb128 0x6 + .byte 0x9f /* DW_CFA_offset, column 0x1f */ + .uleb128 0x4 + .byte 0xa0 /* DW_CFA_offset, column 0x20 */ + .uleb128 0x2 + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte datalabel .LCFI2-datalabel .LCFI1 + .byte 0xd /* DW_CFA_def_cfa_register */ + .uleb128 0xe + .align 2 +.LEFDE1: + +.LSFDE3: + .4byte datalabel .LEFDE3-datalabel .LASFDE3 /* FDE Length */ +.LASFDE3: + .4byte datalabel .LASFDE3-datalabel __FRAME_BEGIN__ +#ifdef PIC + .4byte .LFB2-. /* FDE initial location */ +#else + .4byte .LFB2 /* FDE initial location */ +#endif + .4byte datalabel .LFE2-datalabel .LFB2 /* FDE address range */ +#ifdef PIC + .uleb128 0x0 /* Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte datalabel .LCFI3-datalabel .LFB2 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 0x88 + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte datalabel .LCFI4-datalabel .LCFI3 + .byte 0x8c /* DW_CFA_offset, column 0xc */ + .uleb128 0x21 + .byte 0x8e /* DW_CFA_offset, column 0xe */ + .uleb128 0x20 + .byte 0x92 /* DW_CFA_offset, column 0x12 */ + .uleb128 0x1f + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte datalabel .LCFI5-datalabel .LCFI4 + .byte 0xd /* DW_CFA_def_cfa_register */ + .uleb128 0xe + .align 2 +.LEFDE3: diff --git a/libffi/src/sparc/ffi.c b/libffi/src/sparc/ffi.c new file mode 100644 index 000000000..1d01f59ec --- /dev/null +++ b/libffi/src/sparc/ffi.c @@ -0,0 +1,625 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1996, 2003, 2004, 2007, 2008 Red Hat, Inc. + + SPARC Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +void ffi_prep_args_v8(char *stack, extended_cif *ecif) +{ + int i; + void **p_argv; + char *argp; + ffi_type **p_arg; + + /* Skip 16 words for the window save area */ + argp = stack + 16*sizeof(int); + + /* This should only really be done when we are returning a structure, + however, it's faster just to do it all the time... + + if ( ecif->cif->rtype->type == FFI_TYPE_STRUCT ) */ + *(int *) argp = (long)ecif->rvalue; + + /* And 1 word for the structure return value. */ + argp += sizeof(int); + +#ifdef USING_PURIFY + /* Purify will probably complain in our assembly routine, unless we + zero out this memory. */ + + ((int*)argp)[0] = 0; + ((int*)argp)[1] = 0; + ((int*)argp)[2] = 0; + ((int*)argp)[3] = 0; + ((int*)argp)[4] = 0; + ((int*)argp)[5] = 0; +#endif + + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; i; i--, p_arg++) + { + size_t z; + + if ((*p_arg)->type == FFI_TYPE_STRUCT +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + || (*p_arg)->type == FFI_TYPE_LONGDOUBLE +#endif + ) + { + *(unsigned int *) argp = (unsigned long)(* p_argv); + z = sizeof(int); + } + else + { + z = (*p_arg)->size; + if (z < sizeof(int)) + { + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = *(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned int *) argp = *(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed int *) argp = *(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned int *) argp = *(UINT16 *)(* p_argv); + break; + + default: + FFI_ASSERT(0); + } + } + else + { + memcpy(argp, *p_argv, z); + } + } + p_argv++; + argp += z; + } + + return; +} + +int ffi_prep_args_v9(char *stack, extended_cif *ecif) +{ + int i, ret = 0; + int tmp; + void **p_argv; + char *argp; + ffi_type **p_arg; + + tmp = 0; + + /* Skip 16 words for the window save area */ + argp = stack + 16*sizeof(long long); + +#ifdef USING_PURIFY + /* Purify will probably complain in our assembly routine, unless we + zero out this memory. */ + + ((long long*)argp)[0] = 0; + ((long long*)argp)[1] = 0; + ((long long*)argp)[2] = 0; + ((long long*)argp)[3] = 0; + ((long long*)argp)[4] = 0; + ((long long*)argp)[5] = 0; +#endif + + p_argv = ecif->avalue; + + if (ecif->cif->rtype->type == FFI_TYPE_STRUCT && + ecif->cif->rtype->size > 32) + { + *(unsigned long long *) argp = (unsigned long)ecif->rvalue; + argp += sizeof(long long); + tmp = 1; + } + + for (i = 0, p_arg = ecif->cif->arg_types; i < ecif->cif->nargs; + i++, p_arg++) + { + size_t z; + + z = (*p_arg)->size; + switch ((*p_arg)->type) + { + case FFI_TYPE_STRUCT: + if (z > 16) + { + /* For structures larger than 16 bytes we pass reference. */ + *(unsigned long long *) argp = (unsigned long)* p_argv; + argp += sizeof(long long); + tmp++; + p_argv++; + continue; + } + /* FALLTHROUGH */ + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: +#endif + ret = 1; /* We should promote into FP regs as well as integer. */ + break; + } + if (z < sizeof(long long)) + { + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed long long *) argp = *(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(unsigned long long *) argp = *(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(signed long long *) argp = *(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(unsigned long long *) argp = *(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_SINT32: + *(signed long long *) argp = *(SINT32 *)(* p_argv); + break; + + case FFI_TYPE_UINT32: + *(unsigned long long *) argp = *(UINT32 *)(* p_argv); + break; + + case FFI_TYPE_FLOAT: + *(float *) (argp + 4) = *(FLOAT32 *)(* p_argv); /* Right justify */ + break; + + case FFI_TYPE_STRUCT: + memcpy(argp, *p_argv, z); + break; + + default: + FFI_ASSERT(0); + } + z = sizeof(long long); + tmp++; + } + else if (z == sizeof(long long)) + { + memcpy(argp, *p_argv, z); + z = sizeof(long long); + tmp++; + } + else + { + if ((tmp & 1) && (*p_arg)->alignment > 8) + { + tmp++; + argp += sizeof(long long); + } + memcpy(argp, *p_argv, z); + z = 2 * sizeof(long long); + tmp += 2; + } + p_argv++; + argp += z; + } + + return ret; +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + int wordsize; + + if (cif->abi != FFI_V9) + { + wordsize = 4; + + /* If we are returning a struct, this will already have been added. + Otherwise we need to add it because it's always got to be there! */ + + if (cif->rtype->type != FFI_TYPE_STRUCT) + cif->bytes += wordsize; + + /* sparc call frames require that space is allocated for 6 args, + even if they aren't used. Make that space if necessary. */ + + if (cif->bytes < 4*6+4) + cif->bytes = 4*6+4; + } + else + { + wordsize = 8; + + /* sparc call frames require that space is allocated for 6 args, + even if they aren't used. Make that space if necessary. */ + + if (cif->bytes < 8*6) + cif->bytes = 8*6; + } + + /* Adjust cif->bytes. to include 16 words for the window save area, + and maybe the struct/union return pointer area, */ + + cif->bytes += 16 * wordsize; + + /* The stack must be 2 word aligned, so round bytes up + appropriately. */ + + cif->bytes = ALIGN(cif->bytes, 2 * wordsize); + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: +#endif + cif->flags = cif->rtype->type; + break; + + case FFI_TYPE_STRUCT: + if (cif->abi == FFI_V9 && cif->rtype->size > 32) + cif->flags = FFI_TYPE_VOID; + else + cif->flags = FFI_TYPE_STRUCT; + break; + + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + if (cif->abi == FFI_V9) + cif->flags = FFI_TYPE_INT; + else + cif->flags = cif->rtype->type; + break; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + if (cif->abi == FFI_V9) + cif->flags = FFI_TYPE_INT; + else + cif->flags = FFI_TYPE_SINT64; + break; + + default: + cif->flags = FFI_TYPE_INT; + break; + } + return FFI_OK; +} + +int ffi_v9_layout_struct(ffi_type *arg, int off, char *ret, char *intg, char *flt) +{ + ffi_type **ptr = &arg->elements[0]; + + while (*ptr != NULL) + { + if (off & ((*ptr)->alignment - 1)) + off = ALIGN(off, (*ptr)->alignment); + + switch ((*ptr)->type) + { + case FFI_TYPE_STRUCT: + off = ffi_v9_layout_struct(*ptr, off, ret, intg, flt); + off = ALIGN(off, FFI_SIZEOF_ARG); + break; + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: +#endif + memmove(ret + off, flt + off, (*ptr)->size); + off += (*ptr)->size; + break; + default: + memmove(ret + off, intg + off, (*ptr)->size); + off += (*ptr)->size; + break; + } + ptr++; + } + return off; +} + + +#ifdef SPARC64 +extern int ffi_call_v9(void *, extended_cif *, unsigned, + unsigned, unsigned *, void (*fn)(void)); +#else +extern int ffi_call_v8(void *, extended_cif *, unsigned, + unsigned, unsigned *, void (*fn)(void)); +#endif + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + void *rval = rvalue; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + ecif.rvalue = rvalue; + if (cif->rtype->type == FFI_TYPE_STRUCT) + { + if (cif->rtype->size <= 32) + rval = alloca(64); + else + { + rval = NULL; + if (rvalue == NULL) + ecif.rvalue = alloca(cif->rtype->size); + } + } + + switch (cif->abi) + { + case FFI_V8: +#ifdef SPARC64 + /* We don't yet support calling 32bit code from 64bit */ + FFI_ASSERT(0); +#else + ffi_call_v8(ffi_prep_args_v8, &ecif, cif->bytes, + cif->flags, rvalue, fn); +#endif + break; + case FFI_V9: +#ifdef SPARC64 + ffi_call_v9(ffi_prep_args_v9, &ecif, cif->bytes, + cif->flags, rval, fn); + if (rvalue && rval && cif->rtype->type == FFI_TYPE_STRUCT) + ffi_v9_layout_struct(cif->rtype, 0, (char *)rvalue, (char *)rval, ((char *)rval)+32); +#else + /* And vice versa */ + FFI_ASSERT(0); +#endif + break; + default: + FFI_ASSERT(0); + break; + } + +} + + +#ifdef SPARC64 +extern void ffi_closure_v9(void); +#else +extern void ffi_closure_v8(void); +#endif + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + unsigned int *tramp = (unsigned int *) &closure->tramp[0]; + unsigned long fn; +#ifdef SPARC64 + /* Trampoline address is equal to the closure address. We take advantage + of that to reduce the trampoline size by 8 bytes. */ + FFI_ASSERT (cif->abi == FFI_V9); + fn = (unsigned long) ffi_closure_v9; + tramp[0] = 0x83414000; /* rd %pc, %g1 */ + tramp[1] = 0xca586010; /* ldx [%g1+16], %g5 */ + tramp[2] = 0x81c14000; /* jmp %g5 */ + tramp[3] = 0x01000000; /* nop */ + *((unsigned long *) &tramp[4]) = fn; +#else + unsigned long ctx = (unsigned long) codeloc; + FFI_ASSERT (cif->abi == FFI_V8); + fn = (unsigned long) ffi_closure_v8; + tramp[0] = 0x03000000 | fn >> 10; /* sethi %hi(fn), %g1 */ + tramp[1] = 0x05000000 | ctx >> 10; /* sethi %hi(ctx), %g2 */ + tramp[2] = 0x81c06000 | (fn & 0x3ff); /* jmp %g1+%lo(fn) */ + tramp[3] = 0x8410a000 | (ctx & 0x3ff);/* or %g2, %lo(ctx) */ +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + /* Flush the Icache. FIXME: alignment isn't certain, assume 8 bytes */ +#ifdef SPARC64 + asm volatile ("flush %0" : : "r" (closure) : "memory"); + asm volatile ("flush %0" : : "r" (((char *) closure) + 8) : "memory"); +#else + asm volatile ("iflush %0" : : "r" (closure) : "memory"); + asm volatile ("iflush %0" : : "r" (((char *) closure) + 8) : "memory"); +#endif + + return FFI_OK; +} + +int +ffi_closure_sparc_inner_v8(ffi_closure *closure, + void *rvalue, unsigned long *gpr, unsigned long *scratch) +{ + ffi_cif *cif; + ffi_type **arg_types; + void **avalue; + int i, argn; + + cif = closure->cif; + arg_types = cif->arg_types; + avalue = alloca(cif->nargs * sizeof(void *)); + + /* Copy the caller's structure return address so that the closure + returns the data directly to the caller. */ + if (cif->flags == FFI_TYPE_STRUCT +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + || cif->flags == FFI_TYPE_LONGDOUBLE +#endif + ) + rvalue = (void *) gpr[0]; + + /* Always skip the structure return address. */ + argn = 1; + + /* Grab the addresses of the arguments from the stack frame. */ + for (i = 0; i < cif->nargs; i++) + { + if (arg_types[i]->type == FFI_TYPE_STRUCT +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + || arg_types[i]->type == FFI_TYPE_LONGDOUBLE +#endif + ) + { + /* Straight copy of invisible reference. */ + avalue[i] = (void *)gpr[argn++]; + } + else if ((arg_types[i]->type == FFI_TYPE_DOUBLE + || arg_types[i]->type == FFI_TYPE_SINT64 + || arg_types[i]->type == FFI_TYPE_UINT64) + /* gpr is 8-byte aligned. */ + && (argn % 2) != 0) + { + /* Align on a 8-byte boundary. */ + scratch[0] = gpr[argn]; + scratch[1] = gpr[argn+1]; + avalue[i] = scratch; + scratch -= 2; + argn += 2; + } + else + { + /* Always right-justify. */ + argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + avalue[i] = ((char *) &gpr[argn]) - arg_types[i]->size; + } + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_sparc how to perform return type promotions. */ + return cif->rtype->type; +} + +int +ffi_closure_sparc_inner_v9(ffi_closure *closure, + void *rvalue, unsigned long *gpr, double *fpr) +{ + ffi_cif *cif; + ffi_type **arg_types; + void **avalue; + int i, argn, fp_slot_max; + + cif = closure->cif; + arg_types = cif->arg_types; + avalue = alloca(cif->nargs * sizeof(void *)); + + /* Copy the caller's structure return address so that the closure + returns the data directly to the caller. */ + if (cif->flags == FFI_TYPE_VOID + && cif->rtype->type == FFI_TYPE_STRUCT) + { + rvalue = (void *) gpr[0]; + /* Skip the structure return address. */ + argn = 1; + } + else + argn = 0; + + fp_slot_max = 16 - argn; + + /* Grab the addresses of the arguments from the stack frame. */ + for (i = 0; i < cif->nargs; i++) + { + if (arg_types[i]->type == FFI_TYPE_STRUCT) + { + if (arg_types[i]->size > 16) + { + /* Straight copy of invisible reference. */ + avalue[i] = (void *)gpr[argn++]; + } + else + { + /* Left-justify. */ + ffi_v9_layout_struct(arg_types[i], + 0, + (char *) &gpr[argn], + (char *) &gpr[argn], + (char *) &fpr[argn]); + avalue[i] = &gpr[argn]; + argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + } + } + else + { + /* Right-justify. */ + argn += ALIGN(arg_types[i]->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; + + /* Align on a 16-byte boundary. */ +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + if (arg_types[i]->type == FFI_TYPE_LONGDOUBLE && (argn % 2) != 0) + argn++; +#endif + if (i < fp_slot_max + && (arg_types[i]->type == FFI_TYPE_FLOAT + || arg_types[i]->type == FFI_TYPE_DOUBLE +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + || arg_types[i]->type == FFI_TYPE_LONGDOUBLE +#endif + )) + avalue[i] = ((char *) &fpr[argn]) - arg_types[i]->size; + else + avalue[i] = ((char *) &gpr[argn]) - arg_types[i]->size; + } + } + + /* Invoke the closure. */ + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Tell ffi_closure_sparc how to perform return type promotions. */ + return cif->rtype->type; +} diff --git a/libffi/src/sparc/ffitarget.h b/libffi/src/sparc/ffitarget.h new file mode 100644 index 000000000..50554b880 --- /dev/null +++ b/libffi/src/sparc/ffitarget.h @@ -0,0 +1,68 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc. + Target configuration macros for SPARC. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- System specific configurations ----------------------------------- */ + +#if defined(__arch64__) || defined(__sparcv9) +#ifndef SPARC64 +#define SPARC64 +#endif +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_V8, + FFI_V8PLUS, + FFI_V9, +#ifdef SPARC64 + FFI_DEFAULT_ABI = FFI_V9, +#else + FFI_DEFAULT_ABI = FFI_V8, +#endif + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 + +#ifdef SPARC64 +#define FFI_TRAMPOLINE_SIZE 24 +#else +#define FFI_TRAMPOLINE_SIZE 16 +#endif + +#endif + diff --git a/libffi/src/sparc/v8.S b/libffi/src/sparc/v8.S new file mode 100644 index 000000000..2c4eb60a0 --- /dev/null +++ b/libffi/src/sparc/v8.S @@ -0,0 +1,313 @@ +/* ----------------------------------------------------------------------- + v8.S - Copyright (c) 1996, 1997, 2003, 2004, 2008 Red Hat, Inc. + + SPARC Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#define STACKFRAME 96 /* Minimum stack framesize for SPARC */ +#define ARGS (64+4) /* Offset of register area in frame */ + +.text + .align 8 +.globl ffi_call_v8 +.globl _ffi_call_v8 + +ffi_call_v8: +_ffi_call_v8: +.LLFB1: + save %sp, -STACKFRAME, %sp +.LLCFI0: + + sub %sp, %i2, %sp ! alloca() space in stack for frame to set up + add %sp, STACKFRAME, %l0 ! %l0 has start of + ! frame to set up + + mov %l0, %o0 ! call routine to set up frame + call %i0 + mov %i1, %o1 ! (delay) + + ld [%l0+ARGS], %o0 ! call foreign function + ld [%l0+ARGS+4], %o1 + ld [%l0+ARGS+8], %o2 + ld [%l0+ARGS+12], %o3 + ld [%l0+ARGS+16], %o4 + ld [%l0+ARGS+20], %o5 + call %i5 + mov %l0, %sp ! (delay) switch to frame + nop ! STRUCT returning functions skip 12 instead of 8 bytes + + ! If the return value pointer is NULL, assume no return value. + tst %i4 + bz done + nop + + cmp %i3, FFI_TYPE_INT + be,a done + st %o0, [%i4] ! (delay) + + cmp %i3, FFI_TYPE_FLOAT + be,a done + st %f0, [%i4+0] ! (delay) + + cmp %i3, FFI_TYPE_DOUBLE + be,a double + st %f0, [%i4+0] ! (delay) + + cmp %i3, FFI_TYPE_SINT8 + be,a sint8 + sll %o0, 24, %o0 ! (delay) + + cmp %i3, FFI_TYPE_UINT8 + be,a uint8 + sll %o0, 24, %o0 ! (delay) + + cmp %i3, FFI_TYPE_SINT16 + be,a sint16 + sll %o0, 16, %o0 ! (delay) + + cmp %i3, FFI_TYPE_UINT16 + be,a uint16 + sll %o0, 16, %o0 ! (delay) + + cmp %i3, FFI_TYPE_SINT64 + be,a longlong + st %o0, [%i4+0] ! (delay) +done: + ret + restore + +double: + st %f1, [%i4+4] + ret + restore + +sint8: + sra %o0, 24, %o0 + st %o0, [%i4+0] + ret + restore + +uint8: + srl %o0, 24, %o0 + st %o0, [%i4+0] + ret + restore + +sint16: + sra %o0, 16, %o0 + st %o0, [%i4+0] + ret + restore + +uint16: + srl %o0, 16, %o0 + st %o0, [%i4+0] + ret + restore + +longlong: + st %o1, [%i4+4] + ret + restore +.LLFE1: + +.ffi_call_v8_end: + .size ffi_call_v8,.ffi_call_v8_end-ffi_call_v8 + + +#undef STACKFRAME +#define STACKFRAME 104 /* 16*4 register window + + 1*4 struct return + + 6*4 args backing store + + 3*4 locals */ + +/* ffi_closure_v8(...) + + Receives the closure argument in %g2. */ + + .text + .align 8 + .globl ffi_closure_v8 + +ffi_closure_v8: +#ifdef HAVE_AS_REGISTER_PSEUDO_OP + .register %g2, #scratch +#endif +.LLFB2: + ! Reserve frame space for all arguments in case + ! we need to align them on a 8-byte boundary. + ld [%g2+FFI_TRAMPOLINE_SIZE], %g1 + ld [%g1+4], %g1 + sll %g1, 3, %g1 + add %g1, STACKFRAME, %g1 + ! %g1 == STACKFRAME + 8*nargs + neg %g1 + save %sp, %g1, %sp +.LLCFI1: + + ! Store all of the potential argument registers in va_list format. + st %i0, [%fp+68+0] + st %i1, [%fp+68+4] + st %i2, [%fp+68+8] + st %i3, [%fp+68+12] + st %i4, [%fp+68+16] + st %i5, [%fp+68+20] + + ! Call ffi_closure_sparc_inner to do the bulk of the work. + mov %g2, %o0 + add %fp, -8, %o1 + add %fp, 64, %o2 + call ffi_closure_sparc_inner_v8 + add %fp, -16, %o3 + + ! Load up the return value in the proper type. + ! See ffi_prep_cif_machdep for the list of cases. + cmp %o0, FFI_TYPE_VOID + be done1 + + cmp %o0, FFI_TYPE_INT + be done1 + ld [%fp-8], %i0 + + cmp %o0, FFI_TYPE_FLOAT + be,a done1 + ld [%fp-8], %f0 + + cmp %o0, FFI_TYPE_DOUBLE + be,a done1 + ldd [%fp-8], %f0 + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + cmp %o0, FFI_TYPE_LONGDOUBLE + be done2 +#endif + + cmp %o0, FFI_TYPE_STRUCT + be done2 + + cmp %o0, FFI_TYPE_SINT64 + be,a done1 + ldd [%fp-8], %i0 + + ld [%fp-8], %i0 +done1: + jmp %i7+8 + restore +done2: + ! Skip 'unimp'. + jmp %i7+12 + restore +.LLFE2: + +.ffi_closure_v8_end: + .size ffi_closure_v8,.ffi_closure_v8_end-ffi_closure_v8 + +#ifdef SPARC64 +#define WS 8 +#define nword xword +#define uanword uaxword +#else +#define WS 4 +#define nword long +#define uanword uaword +#endif + +#ifdef HAVE_RO_EH_FRAME + .section ".eh_frame",#alloc +#else + .section ".eh_frame",#alloc,#write +#endif +.LLframe1: + .uaword .LLECIE1-.LLSCIE1 ! Length of Common Information Entry +.LLSCIE1: + .uaword 0x0 ! CIE Identifier Tag + .byte 0x1 ! CIE Version + .ascii "zR\0" ! CIE Augmentation + .byte 0x1 ! uleb128 0x1; CIE Code Alignment Factor + .byte 0x80-WS ! sleb128 -WS; CIE Data Alignment Factor + .byte 0xf ! CIE RA Column + .byte 0x1 ! uleb128 0x1; Augmentation size +#ifdef HAVE_AS_SPARC_UA_PCREL + .byte 0x1b ! FDE Encoding (pcrel sdata4) +#else + .byte 0x50 ! FDE Encoding (aligned absolute) +#endif + .byte 0xc ! DW_CFA_def_cfa + .byte 0xe ! uleb128 0xe + .byte 0x0 ! uleb128 0x0 + .align WS +.LLECIE1: +.LLSFDE1: + .uaword .LLEFDE1-.LLASFDE1 ! FDE Length +.LLASFDE1: + .uaword .LLASFDE1-.LLframe1 ! FDE CIE offset +#ifdef HAVE_AS_SPARC_UA_PCREL + .uaword %r_disp32(.LLFB1) + .uaword .LLFE1-.LLFB1 ! FDE address range +#else + .align WS + .nword .LLFB1 + .uanword .LLFE1-.LLFB1 ! FDE address range +#endif + .byte 0x0 ! uleb128 0x0; Augmentation size + .byte 0x4 ! DW_CFA_advance_loc4 + .uaword .LLCFI0-.LLFB1 + .byte 0xd ! DW_CFA_def_cfa_register + .byte 0x1e ! uleb128 0x1e + .byte 0x2d ! DW_CFA_GNU_window_save + .byte 0x9 ! DW_CFA_register + .byte 0xf ! uleb128 0xf + .byte 0x1f ! uleb128 0x1f + .align WS +.LLEFDE1: +.LLSFDE2: + .uaword .LLEFDE2-.LLASFDE2 ! FDE Length +.LLASFDE2: + .uaword .LLASFDE2-.LLframe1 ! FDE CIE offset +#ifdef HAVE_AS_SPARC_UA_PCREL + .uaword %r_disp32(.LLFB2) + .uaword .LLFE2-.LLFB2 ! FDE address range +#else + .align WS + .nword .LLFB2 + .uanword .LLFE2-.LLFB2 ! FDE address range +#endif + .byte 0x0 ! uleb128 0x0; Augmentation size + .byte 0x4 ! DW_CFA_advance_loc4 + .uaword .LLCFI1-.LLFB2 + .byte 0xd ! DW_CFA_def_cfa_register + .byte 0x1e ! uleb128 0x1e + .byte 0x2d ! DW_CFA_GNU_window_save + .byte 0x9 ! DW_CFA_register + .byte 0xf ! uleb128 0xf + .byte 0x1f ! uleb128 0x1f + .align WS +.LLEFDE2: + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/sparc/v9.S b/libffi/src/sparc/v9.S new file mode 100644 index 000000000..489ff0293 --- /dev/null +++ b/libffi/src/sparc/v9.S @@ -0,0 +1,307 @@ +/* ----------------------------------------------------------------------- + v9.S - Copyright (c) 2000, 2003, 2004, 2008 Red Hat, Inc. + + SPARC 64-bit Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#ifdef SPARC64 +/* Only compile this in for 64bit builds, because otherwise the object file + will have inproper architecture due to used instructions. */ + +#define STACKFRAME 128 /* Minimum stack framesize for SPARC */ +#define STACK_BIAS 2047 +#define ARGS (128) /* Offset of register area in frame */ + +.text + .align 8 +.globl ffi_call_v9 +.globl _ffi_call_v9 + +ffi_call_v9: +_ffi_call_v9: +.LLFB1: + save %sp, -STACKFRAME, %sp +.LLCFI0: + + sub %sp, %i2, %sp ! alloca() space in stack for frame to set up + add %sp, STACKFRAME+STACK_BIAS, %l0 ! %l0 has start of + ! frame to set up + + mov %l0, %o0 ! call routine to set up frame + call %i0 + mov %i1, %o1 ! (delay) + brz,pt %o0, 1f + ldx [%l0+ARGS], %o0 ! call foreign function + + ldd [%l0+ARGS], %f0 + ldd [%l0+ARGS+8], %f2 + ldd [%l0+ARGS+16], %f4 + ldd [%l0+ARGS+24], %f6 + ldd [%l0+ARGS+32], %f8 + ldd [%l0+ARGS+40], %f10 + ldd [%l0+ARGS+48], %f12 + ldd [%l0+ARGS+56], %f14 + ldd [%l0+ARGS+64], %f16 + ldd [%l0+ARGS+72], %f18 + ldd [%l0+ARGS+80], %f20 + ldd [%l0+ARGS+88], %f22 + ldd [%l0+ARGS+96], %f24 + ldd [%l0+ARGS+104], %f26 + ldd [%l0+ARGS+112], %f28 + ldd [%l0+ARGS+120], %f30 + +1: ldx [%l0+ARGS+8], %o1 + ldx [%l0+ARGS+16], %o2 + ldx [%l0+ARGS+24], %o3 + ldx [%l0+ARGS+32], %o4 + ldx [%l0+ARGS+40], %o5 + call %i5 + sub %l0, STACK_BIAS, %sp ! (delay) switch to frame + + ! If the return value pointer is NULL, assume no return value. + brz,pn %i4, done + nop + + cmp %i3, FFI_TYPE_INT + be,a,pt %icc, done + stx %o0, [%i4+0] ! (delay) + + cmp %i3, FFI_TYPE_FLOAT + be,a,pn %icc, done + st %f0, [%i4+0] ! (delay) + + cmp %i3, FFI_TYPE_DOUBLE + be,a,pn %icc, done + std %f0, [%i4+0] ! (delay) + + cmp %i3, FFI_TYPE_STRUCT + be,pn %icc, dostruct + + cmp %i3, FFI_TYPE_LONGDOUBLE + bne,pt %icc, done + nop + std %f0, [%i4+0] + std %f2, [%i4+8] + +done: ret + restore + +dostruct: + /* This will not work correctly for unions. */ + stx %o0, [%i4+0] + stx %o1, [%i4+8] + stx %o2, [%i4+16] + stx %o3, [%i4+24] + std %f0, [%i4+32] + std %f2, [%i4+40] + std %f4, [%i4+48] + std %f6, [%i4+56] + ret + restore +.LLFE1: + +.ffi_call_v9_end: + .size ffi_call_v9,.ffi_call_v9_end-ffi_call_v9 + + +#undef STACKFRAME +#define STACKFRAME 336 /* 16*8 register window + + 6*8 args backing store + + 20*8 locals */ +#define FP %fp+STACK_BIAS + +/* ffi_closure_v9(...) + + Receives the closure argument in %g1. */ + + .text + .align 8 + .globl ffi_closure_v9 + +ffi_closure_v9: +.LLFB2: + save %sp, -STACKFRAME, %sp +.LLCFI1: + + ! Store all of the potential argument registers in va_list format. + stx %i0, [FP+128+0] + stx %i1, [FP+128+8] + stx %i2, [FP+128+16] + stx %i3, [FP+128+24] + stx %i4, [FP+128+32] + stx %i5, [FP+128+40] + + ! Store possible floating point argument registers too. + std %f0, [FP-128] + std %f2, [FP-120] + std %f4, [FP-112] + std %f6, [FP-104] + std %f8, [FP-96] + std %f10, [FP-88] + std %f12, [FP-80] + std %f14, [FP-72] + std %f16, [FP-64] + std %f18, [FP-56] + std %f20, [FP-48] + std %f22, [FP-40] + std %f24, [FP-32] + std %f26, [FP-24] + std %f28, [FP-16] + std %f30, [FP-8] + + ! Call ffi_closure_sparc_inner to do the bulk of the work. + mov %g1, %o0 + add %fp, STACK_BIAS-160, %o1 + add %fp, STACK_BIAS+128, %o2 + call ffi_closure_sparc_inner_v9 + add %fp, STACK_BIAS-128, %o3 + + ! Load up the return value in the proper type. + ! See ffi_prep_cif_machdep for the list of cases. + cmp %o0, FFI_TYPE_VOID + be,pn %icc, done1 + + cmp %o0, FFI_TYPE_INT + be,pn %icc, integer + + cmp %o0, FFI_TYPE_FLOAT + be,a,pn %icc, done1 + ld [FP-160], %f0 + + cmp %o0, FFI_TYPE_DOUBLE + be,a,pn %icc, done1 + ldd [FP-160], %f0 + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + cmp %o0, FFI_TYPE_LONGDOUBLE + be,a,pn %icc, longdouble1 + ldd [FP-160], %f0 +#endif + + ! FFI_TYPE_STRUCT + ldx [FP-152], %i1 + ldx [FP-144], %i2 + ldx [FP-136], %i3 + ldd [FP-160], %f0 + ldd [FP-152], %f2 + ldd [FP-144], %f4 + ldd [FP-136], %f6 + +integer: + ldx [FP-160], %i0 + +done1: + ret + restore + +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE +longdouble1: + ldd [FP-152], %f2 + ret + restore +#endif +.LLFE2: + +.ffi_closure_v9_end: + .size ffi_closure_v9,.ffi_closure_v9_end-ffi_closure_v9 + +#ifdef HAVE_RO_EH_FRAME + .section ".eh_frame",#alloc +#else + .section ".eh_frame",#alloc,#write +#endif +.LLframe1: + .uaword .LLECIE1-.LLSCIE1 ! Length of Common Information Entry +.LLSCIE1: + .uaword 0x0 ! CIE Identifier Tag + .byte 0x1 ! CIE Version + .ascii "zR\0" ! CIE Augmentation + .byte 0x1 ! uleb128 0x1; CIE Code Alignment Factor + .byte 0x78 ! sleb128 -8; CIE Data Alignment Factor + .byte 0xf ! CIE RA Column + .byte 0x1 ! uleb128 0x1; Augmentation size +#ifdef HAVE_AS_SPARC_UA_PCREL + .byte 0x1b ! FDE Encoding (pcrel sdata4) +#else + .byte 0x50 ! FDE Encoding (aligned absolute) +#endif + .byte 0xc ! DW_CFA_def_cfa + .byte 0xe ! uleb128 0xe + .byte 0xff,0xf ! uleb128 0x7ff + .align 8 +.LLECIE1: +.LLSFDE1: + .uaword .LLEFDE1-.LLASFDE1 ! FDE Length +.LLASFDE1: + .uaword .LLASFDE1-.LLframe1 ! FDE CIE offset +#ifdef HAVE_AS_SPARC_UA_PCREL + .uaword %r_disp32(.LLFB1) + .uaword .LLFE1-.LLFB1 ! FDE address range +#else + .align 8 + .xword .LLFB1 + .uaxword .LLFE1-.LLFB1 ! FDE address range +#endif + .byte 0x0 ! uleb128 0x0; Augmentation size + .byte 0x4 ! DW_CFA_advance_loc4 + .uaword .LLCFI0-.LLFB1 + .byte 0xd ! DW_CFA_def_cfa_register + .byte 0x1e ! uleb128 0x1e + .byte 0x2d ! DW_CFA_GNU_window_save + .byte 0x9 ! DW_CFA_register + .byte 0xf ! uleb128 0xf + .byte 0x1f ! uleb128 0x1f + .align 8 +.LLEFDE1: +.LLSFDE2: + .uaword .LLEFDE2-.LLASFDE2 ! FDE Length +.LLASFDE2: + .uaword .LLASFDE2-.LLframe1 ! FDE CIE offset +#ifdef HAVE_AS_SPARC_UA_PCREL + .uaword %r_disp32(.LLFB2) + .uaword .LLFE2-.LLFB2 ! FDE address range +#else + .align 8 + .xword .LLFB2 + .uaxword .LLFE2-.LLFB2 ! FDE address range +#endif + .byte 0x0 ! uleb128 0x0; Augmentation size + .byte 0x4 ! DW_CFA_advance_loc4 + .uaword .LLCFI1-.LLFB2 + .byte 0xd ! DW_CFA_def_cfa_register + .byte 0x1e ! uleb128 0x1e + .byte 0x2d ! DW_CFA_GNU_window_save + .byte 0x9 ! DW_CFA_register + .byte 0xf ! uleb128 0xf + .byte 0x1f ! uleb128 0x1f + .align 8 +.LLEFDE2: +#endif + +#ifdef __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/types.c b/libffi/src/types.c new file mode 100644 index 000000000..0a11eb0fb --- /dev/null +++ b/libffi/src/types.c @@ -0,0 +1,77 @@ +/* ----------------------------------------------------------------------- + types.c - Copyright (c) 1996, 1998 Red Hat, Inc. + + Predefined ffi_types needed by libffi. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +/* Hide the basic type definitions from the header file, so that we + can redefine them here as "const". */ +#define LIBFFI_HIDE_BASIC_TYPES + +#include <ffi.h> +#include <ffi_common.h> + +/* Type definitions */ + +#define FFI_TYPEDEF(name, type, id) \ +struct struct_align_##name { \ + char c; \ + type x; \ +}; \ +const ffi_type ffi_type_##name = { \ + sizeof(type), \ + offsetof(struct struct_align_##name, x), \ + id, NULL \ +} + +/* Size and alignment are fake here. They must not be 0. */ +const ffi_type ffi_type_void = { + 1, 1, FFI_TYPE_VOID, NULL +}; + +FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8); +FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8); +FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16); +FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16); +FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32); +FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32); +FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64); +FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64); + +FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER); + +FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT); +FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE); + +#ifdef __alpha__ +/* Even if we're not configured to default to 128-bit long double, + maintain binary compatibility, as -mlong-double-128 can be used + at any time. */ +/* Validate the hard-coded number below. */ +# if defined(__LONG_DOUBLE_128__) && FFI_TYPE_LONGDOUBLE != 4 +# error FFI_TYPE_LONGDOUBLE out of date +# endif +const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL }; +#elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE +FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE); +#endif diff --git a/libffi/src/x86/darwin.S b/libffi/src/x86/darwin.S new file mode 100644 index 000000000..8f0f0707a --- /dev/null +++ b/libffi/src/x86/darwin.S @@ -0,0 +1,444 @@ +/* ----------------------------------------------------------------------- + darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc. + Copyright (C) 2008 Free Software Foundation, Inc. + + X86 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- + */ + +#ifndef __x86_64__ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +.text + +.globl _ffi_prep_args + + .align 4 +.globl _ffi_call_SYSV + +_ffi_call_SYSV: +.LFB1: + pushl %ebp +.LCFI0: + movl %esp,%ebp +.LCFI1: + subl $8,%esp + /* Make room for all of the new args. */ + movl 16(%ebp),%ecx + subl %ecx,%esp + + movl %esp,%eax + + /* Place all of the ffi_prep_args in position */ + subl $8,%esp + pushl 12(%ebp) + pushl %eax + call *8(%ebp) + + /* Return stack to previous state and call the function */ + addl $16,%esp + + call *28(%ebp) + + /* Load %ecx with the return type code */ + movl 20(%ebp),%ecx + + /* Protect %esi. We're going to pop it in the epilogue. */ + pushl %esi + + /* If the return value pointer is NULL, assume no return value. */ + cmpl $0,24(%ebp) + jne 0f + + /* Even if there is no space for the return value, we are + obliged to handle floating-point values. */ + cmpl $FFI_TYPE_FLOAT,%ecx + jne noretval + fstp %st(0) + + jmp epilogue +0: + .align 4 + call 1f +.Lstore_table: + .long noretval-.Lstore_table /* FFI_TYPE_VOID */ + .long retint-.Lstore_table /* FFI_TYPE_INT */ + .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ + .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ + .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ + .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ + .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ + .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ + .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ + .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ + .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ + .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ + .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ + .long retint-.Lstore_table /* FFI_TYPE_POINTER */ + .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ + .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ +1: + pop %esi + add (%esi, %ecx, 4), %esi + jmp *%esi + + /* Sign/zero extend as appropriate. */ +retsint8: + movsbl %al, %eax + jmp retint + +retsint16: + movswl %ax, %eax + jmp retint + +retuint8: + movzbl %al, %eax + jmp retint + +retuint16: + movzwl %ax, %eax + jmp retint + +retfloat: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstps (%ecx) + jmp epilogue + +retdouble: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstpl (%ecx) + jmp epilogue + +retlongdouble: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstpt (%ecx) + jmp epilogue + +retint64: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + movl %edx,4(%ecx) + jmp epilogue + +retstruct1b: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movb %al,0(%ecx) + jmp epilogue + +retstruct2b: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movw %ax,0(%ecx) + jmp epilogue + +retint: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + +retstruct: + /* Nothing to do! */ + +noretval: +epilogue: + popl %esi + movl %ebp,%esp + popl %ebp + ret + +.LFE1: +.ffi_call_SYSV_end: + + .align 4 +FFI_HIDDEN (ffi_closure_SYSV) +.globl _ffi_closure_SYSV + +_ffi_closure_SYSV: +.LFB2: + pushl %ebp +.LCFI2: + movl %esp, %ebp +.LCFI3: + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 8(%ebp), %edx + movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ + movl %ebx, 8(%esp) +.LCFI7: + call L_ffi_closure_SYSV_inner$stub + movl 8(%esp), %ebx + movl -12(%ebp), %ecx + cmpl $FFI_TYPE_INT, %eax + je .Lcls_retint + + /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, + FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ + cmpl $FFI_TYPE_UINT64, %eax + jge 0f + cmpl $FFI_TYPE_UINT8, %eax + jge .Lcls_retint + +0: cmpl $FFI_TYPE_FLOAT, %eax + je .Lcls_retfloat + cmpl $FFI_TYPE_DOUBLE, %eax + je .Lcls_retdouble + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je .Lcls_retldouble + cmpl $FFI_TYPE_SINT64, %eax + je .Lcls_retllong + cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax + je .Lcls_retstruct1b + cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax + je .Lcls_retstruct2b + cmpl $FFI_TYPE_STRUCT, %eax + je .Lcls_retstruct +.Lcls_epilogue: + movl %ebp, %esp + popl %ebp + ret +.Lcls_retint: + movl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retfloat: + flds (%ecx) + jmp .Lcls_epilogue +.Lcls_retdouble: + fldl (%ecx) + jmp .Lcls_epilogue +.Lcls_retldouble: + fldt (%ecx) + jmp .Lcls_epilogue +.Lcls_retllong: + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lcls_epilogue +.Lcls_retstruct1b: + movsbl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retstruct2b: + movswl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retstruct: + lea -8(%ebp),%esp + movl %ebp, %esp + popl %ebp + ret $4 +.LFE2: + +#if !FFI_NO_RAW_API + +#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) +#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) +#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) +#define CIF_FLAGS_OFFSET 20 + + .align 4 +FFI_HIDDEN (ffi_closure_raw_SYSV) +.globl _ffi_closure_raw_SYSV + +_ffi_closure_raw_SYSV: +.LFB3: + pushl %ebp +.LCFI4: + movl %esp, %ebp +.LCFI5: + pushl %esi +.LCFI6: + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ + movl %edx, 8(%esp) /* raw_args */ + leal -24(%ebp), %edx + movl %edx, 4(%esp) /* &res */ + movl %esi, (%esp) /* cif */ + call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ + movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ + cmpl $FFI_TYPE_INT, %eax + je .Lrcls_retint + + /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, + FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ + cmpl $FFI_TYPE_UINT64, %eax + jge 0f + cmpl $FFI_TYPE_UINT8, %eax + jge .Lrcls_retint +0: + cmpl $FFI_TYPE_FLOAT, %eax + je .Lrcls_retfloat + cmpl $FFI_TYPE_DOUBLE, %eax + je .Lrcls_retdouble + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je .Lrcls_retldouble + cmpl $FFI_TYPE_SINT64, %eax + je .Lrcls_retllong +.Lrcls_epilogue: + addl $36, %esp + popl %esi + popl %ebp + ret +.Lrcls_retint: + movl -24(%ebp), %eax + jmp .Lrcls_epilogue +.Lrcls_retfloat: + flds -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retdouble: + fldl -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retldouble: + fldt -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retllong: + movl -24(%ebp), %eax + movl -20(%ebp), %edx + jmp .Lrcls_epilogue +.LFE3: +#endif + +.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 +L_ffi_closure_SYSV_inner$stub: + .indirect_symbol _ffi_closure_SYSV_inner + hlt ; hlt ; hlt ; hlt ; hlt + + +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 +LSCIE1: + .long 0x0 + .byte 0x1 + .ascii "zR\0" + .byte 0x1 + .byte 0x7c + .byte 0x8 + .byte 0x1 + .byte 0x10 + .byte 0xc + .byte 0x5 + .byte 0x4 + .byte 0x88 + .byte 0x1 + .align 2 +LECIE1: +.globl _ffi_call_SYSV.eh +_ffi_call_SYSV.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 +LASFDE1: + .long LASFDE1-EH_frame1 + .long .LFB1-. + .set L$set$2,.LFE1-.LFB1 + .long L$set$2 + .byte 0x0 + .byte 0x4 + .set L$set$3,.LCFI0-.LFB1 + .long L$set$3 + .byte 0xe + .byte 0x8 + .byte 0x84 + .byte 0x2 + .byte 0x4 + .set L$set$4,.LCFI1-.LCFI0 + .long L$set$4 + .byte 0xd + .byte 0x4 + .align 2 +LEFDE1: +.globl _ffi_closure_SYSV.eh +_ffi_closure_SYSV.eh: +LSFDE2: + .set L$set$5,LEFDE2-LASFDE2 + .long L$set$5 +LASFDE2: + .long LASFDE2-EH_frame1 + .long .LFB2-. + .set L$set$6,.LFE2-.LFB2 + .long L$set$6 + .byte 0x0 + .byte 0x4 + .set L$set$7,.LCFI2-.LFB2 + .long L$set$7 + .byte 0xe + .byte 0x8 + .byte 0x84 + .byte 0x2 + .byte 0x4 + .set L$set$8,.LCFI3-.LCFI2 + .long L$set$8 + .byte 0xd + .byte 0x4 + .align 2 +LEFDE2: + +#if !FFI_NO_RAW_API + +.globl _ffi_closure_raw_SYSV.eh +_ffi_closure_raw_SYSV.eh: +LSFDE3: + .set L$set$10,LEFDE3-LASFDE3 + .long L$set$10 +LASFDE3: + .long LASFDE3-EH_frame1 + .long .LFB3-. + .set L$set$11,.LFE3-.LFB3 + .long L$set$11 + .byte 0x0 + .byte 0x4 + .set L$set$12,.LCFI4-.LFB3 + .long L$set$12 + .byte 0xe + .byte 0x8 + .byte 0x84 + .byte 0x2 + .byte 0x4 + .set L$set$13,.LCFI5-.LCFI4 + .long L$set$13 + .byte 0xd + .byte 0x4 + .byte 0x4 + .set L$set$14,.LCFI6-.LCFI5 + .long L$set$14 + .byte 0x85 + .byte 0x3 + .align 2 +LEFDE3: + +#endif + +#endif /* ifndef __x86_64__ */ diff --git a/libffi/src/x86/darwin64.S b/libffi/src/x86/darwin64.S new file mode 100644 index 000000000..2f7394ef4 --- /dev/null +++ b/libffi/src/x86/darwin64.S @@ -0,0 +1,416 @@ +/* ----------------------------------------------------------------------- + darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc. + Copyright (c) 2008 Red Hat, Inc. + derived from unix64.S + + x86-64 Foreign Function Interface for Darwin. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifdef __x86_64__ +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + .file "darwin64.S" +.text + +/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)(void)); + + Bit o trickiness here -- ARGS+BYTES is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 3 + .globl _ffi_call_unix64 + +_ffi_call_unix64: +LUW0: + movq (%rsp), %r10 /* Load return address. */ + leaq (%rdi, %rsi), %rax /* Find local stack base. */ + movq %rdx, (%rax) /* Save flags. */ + movq %rcx, 8(%rax) /* Save raddr. */ + movq %rbp, 16(%rax) /* Save old frame pointer. */ + movq %r10, 24(%rax) /* Relocate return address. */ + movq %rax, %rbp /* Finalize local stack frame. */ +LUW1: + movq %rdi, %r10 /* Save a copy of the register area. */ + movq %r8, %r11 /* Save a copy of the target fn. */ + movl %r9d, %eax /* Set number of SSE registers. */ + + /* Load up all argument registers. */ + movq (%r10), %rdi + movq 8(%r10), %rsi + movq 16(%r10), %rdx + movq 24(%r10), %rcx + movq 32(%r10), %r8 + movq 40(%r10), %r9 + testl %eax, %eax + jnz Lload_sse +Lret_from_load_sse: + + /* Deallocate the reg arg area. */ + leaq 176(%r10), %rsp + + /* Call the user function. */ + call *%r11 + + /* Deallocate stack arg area; local stack frame in redzone. */ + leaq 24(%rbp), %rsp + + movq 0(%rbp), %rcx /* Reload flags. */ + movq 8(%rbp), %rdi /* Reload raddr. */ + movq 16(%rbp), %rbp /* Reload old frame pointer. */ +LUW2: + + /* The first byte of the flags contains the FFI_TYPE. */ + movzbl %cl, %r10d + leaq Lstore_table(%rip), %r11 + movslq (%r11, %r10, 4), %r10 + addq %r11, %r10 + jmp *%r10 + +Lstore_table: + .long Lst_void-Lstore_table /* FFI_TYPE_VOID */ + .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */ + .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */ + .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */ + .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */ + .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */ + .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */ + .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */ + .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */ + .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */ + .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */ + .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */ + .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */ + .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */ + + .text + .align 3 +Lst_void: + ret + .align 3 +Lst_uint8: + movzbq %al, %rax + movq %rax, (%rdi) + ret + .align 3 +Lst_sint8: + movsbq %al, %rax + movq %rax, (%rdi) + ret + .align 3 +Lst_uint16: + movzwq %ax, %rax + movq %rax, (%rdi) + .align 3 +Lst_sint16: + movswq %ax, %rax + movq %rax, (%rdi) + ret + .align 3 +Lst_uint32: + movl %eax, %eax + movq %rax, (%rdi) + .align 3 +Lst_sint32: + cltq + movq %rax, (%rdi) + ret + .align 3 +Lst_int64: + movq %rax, (%rdi) + ret + .align 3 +Lst_float: + movss %xmm0, (%rdi) + ret + .align 3 +Lst_double: + movsd %xmm0, (%rdi) + ret +Lst_ldouble: + fstpt (%rdi) + ret + .align 3 +Lst_struct: + leaq -20(%rsp), %rsi /* Scratch area in redzone. */ + + /* We have to locate the values now, and since we don't want to + write too much data into the user's return value, we spill the + value to a 16 byte scratch area first. Bits 8, 9, and 10 + control where the values are located. Only one of the three + bits will be set; see ffi_prep_cif_machdep for the pattern. */ + movd %xmm0, %r10 + movd %xmm1, %r11 + testl $0x100, %ecx + cmovnz %rax, %rdx + cmovnz %r10, %rax + testl $0x200, %ecx + cmovnz %r10, %rdx + testl $0x400, %ecx + cmovnz %r10, %rax + cmovnz %r11, %rdx + movq %rax, (%rsi) + movq %rdx, 8(%rsi) + + /* Bits 12-31 contain the true size of the structure. Copy from + the scratch area to the true destination. */ + shrl $12, %ecx + rep movsb + ret + + /* Many times we can avoid loading any SSE registers at all. + It's not worth an indirect jump to load the exact set of + SSE registers needed; zero or all is a good compromise. */ + .align 3 +LUW3: +Lload_sse: + movdqa 48(%r10), %xmm0 + movdqa 64(%r10), %xmm1 + movdqa 80(%r10), %xmm2 + movdqa 96(%r10), %xmm3 + movdqa 112(%r10), %xmm4 + movdqa 128(%r10), %xmm5 + movdqa 144(%r10), %xmm6 + movdqa 160(%r10), %xmm7 + jmp Lret_from_load_sse + +LUW4: + .align 3 + .globl _ffi_closure_unix64 + +_ffi_closure_unix64: +LUW5: + /* The carry flag is set by the trampoline iff SSE registers + are used. Don't clobber it before the branch instruction. */ + leaq -200(%rsp), %rsp +LUW6: + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + jc Lsave_sse +Lret_from_save_sse: + + movq %r10, %rdi + leaq 176(%rsp), %rsi + movq %rsp, %rdx + leaq 208(%rsp), %rcx + call _ffi_closure_unix64_inner + + /* Deallocate stack frame early; return value is now in redzone. */ + addq $200, %rsp +LUW7: + + /* The first byte of the return value contains the FFI_TYPE. */ + movzbl %al, %r10d + leaq Lload_table(%rip), %r11 + movslq (%r11, %r10, 4), %r10 + addq %r11, %r10 + jmp *%r10 + +Lload_table: + .long Lld_void-Lload_table /* FFI_TYPE_VOID */ + .long Lld_int32-Lload_table /* FFI_TYPE_INT */ + .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */ + .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */ + .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */ + .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */ + .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */ + .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */ + .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */ + .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */ + .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */ + .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */ + .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */ + .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */ + .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */ + + .text + .align 3 +Lld_void: + ret + .align 3 +Lld_int8: + movzbl -24(%rsp), %eax + ret + .align 3 +Lld_int16: + movzwl -24(%rsp), %eax + ret + .align 3 +Lld_int32: + movl -24(%rsp), %eax + ret + .align 3 +Lld_int64: + movq -24(%rsp), %rax + ret + .align 3 +Lld_float: + movss -24(%rsp), %xmm0 + ret + .align 3 +Lld_double: + movsd -24(%rsp), %xmm0 + ret + .align 3 +Lld_ldouble: + fldt -24(%rsp) + ret + .align 3 +Lld_struct: + /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, + %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading + both rdx and xmm1 with the second word. For the remaining, + bit 8 set means xmm0 gets the second word, and bit 9 means + that rax gets the second word. */ + movq -24(%rsp), %rcx + movq -16(%rsp), %rdx + movq -16(%rsp), %xmm1 + testl $0x100, %eax + cmovnz %rdx, %rcx + movd %rcx, %xmm0 + testl $0x200, %eax + movq -24(%rsp), %rax + cmovnz %rdx, %rax + ret + + /* See the comment above Lload_sse; the same logic applies here. */ + .align 3 +LUW8: +Lsave_sse: + movdqa %xmm0, 48(%rsp) + movdqa %xmm1, 64(%rsp) + movdqa %xmm2, 80(%rsp) + movdqa %xmm3, 96(%rsp) + movdqa %xmm4, 112(%rsp) + movdqa %xmm5, 128(%rsp) + movdqa %xmm6, 144(%rsp) + movdqa %xmm7, 160(%rsp) + jmp Lret_from_save_sse + +LUW9: +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 /* CIE Length */ + .long L$set$0 +LSCIE1: + .long 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ + .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */ + .byte 0x10 /* CIE RA Column */ + .byte 0x1 /* uleb128 0x1; Augmentation size */ + .byte 0x10 /* FDE Encoding (pcrel sdata4) */ + .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ + .byte 0x7 /* uleb128 0x7 */ + .byte 0x8 /* uleb128 0x8 */ + .byte 0x90 /* DW_CFA_offset, column 0x10 */ + .byte 0x1 + .align 3 +LECIE1: + .globl _ffi_call_unix64.eh +_ffi_call_unix64.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */ + .long L$set$1 +LASFDE1: + .long LASFDE1-EH_frame1 /* FDE CIE offset */ + .quad LUW0-. /* FDE initial location */ + .set L$set$2,LUW4-LUW0 /* FDE address range */ + .quad L$set$2 + .byte 0x0 /* Augmentation size */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .set L$set$3,LUW1-LUW0 + .long L$set$3 + + /* New stack frame based off rbp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-8, so from the + perspective of the unwind info, it hasn't moved. */ + .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ + .byte 0x6 + .byte 0x20 + .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ + .byte 0x2 + .byte 0xa /* DW_CFA_remember_state */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .set L$set$4,LUW2-LUW1 + .long L$set$4 + .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ + .byte 0x7 + .byte 0x8 + .byte 0xc0+6 /* DW_CFA_restore, %rbp */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .set L$set$5,LUW3-LUW2 + .long L$set$5 + .byte 0xb /* DW_CFA_restore_state */ + + .align 3 +LEFDE1: + .globl _ffi_closure_unix64.eh +_ffi_closure_unix64.eh: +LSFDE3: + .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */ + .long L$set$6 +LASFDE3: + .long LASFDE3-EH_frame1 /* FDE CIE offset */ + .quad LUW5-. /* FDE initial location */ + .set L$set$7,LUW9-LUW5 /* FDE address range */ + .quad L$set$7 + .byte 0x0 /* Augmentation size */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .set L$set$8,LUW6-LUW5 + .long L$set$8 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 208,1 /* uleb128 208 */ + .byte 0xa /* DW_CFA_remember_state */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .set L$set$9,LUW7-LUW6 + .long L$set$9 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .set L$set$10,LUW8-LUW7 + .long L$set$10 + .byte 0xb /* DW_CFA_restore_state */ + + .align 3 +LEFDE3: + .subsections_via_symbols + +#endif /* __x86_64__ */ diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c new file mode 100644 index 000000000..fea9d6dea --- /dev/null +++ b/libffi/src/x86/ffi.c @@ -0,0 +1,665 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc. + Copyright (c) 2002 Ranjit Mathew + Copyright (c) 2002 Bo Thorsen + Copyright (c) 2002 Roger Sayle + Copyright (C) 2008, 2010 Free Software Foundation, Inc. + + x86 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#if !defined(__x86_64__) || defined(_WIN64) + +#ifdef _WIN64 +#include <windows.h> +#endif + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ + +void ffi_prep_args(char *stack, extended_cif *ecif) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + + if (ecif->cif->flags == FFI_TYPE_STRUCT +#ifdef X86_WIN64 + && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2 + && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8) +#endif + ) + { + *(void **) argp = ecif->rvalue; + argp += sizeof(void*); + } + + p_argv = ecif->avalue; + + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + i != 0; + i--, p_arg++) + { + size_t z; + + /* Align if necessary */ + if ((sizeof(void*) - 1) & (size_t) argp) + argp = (char *) ALIGN(argp, sizeof(void*)); + + z = (*p_arg)->size; +#ifdef X86_WIN64 + if (z > sizeof(ffi_arg) + || ((*p_arg)->type == FFI_TYPE_STRUCT + && (z != 1 && z != 2 && z != 4 && z != 8)) +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE + || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE) +#endif + ) + { + z = sizeof(ffi_arg); + *(void **)argp = *p_argv; + } + else if ((*p_arg)->type == FFI_TYPE_FLOAT) + { + memcpy(argp, *p_argv, z); + } + else +#endif + if (z < sizeof(ffi_arg)) + { + z = sizeof(ffi_arg); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(ffi_sarg *) argp = (ffi_sarg)*(SINT8 *)(* p_argv); + break; + + case FFI_TYPE_UINT8: + *(ffi_arg *) argp = (ffi_arg)*(UINT8 *)(* p_argv); + break; + + case FFI_TYPE_SINT16: + *(ffi_sarg *) argp = (ffi_sarg)*(SINT16 *)(* p_argv); + break; + + case FFI_TYPE_UINT16: + *(ffi_arg *) argp = (ffi_arg)*(UINT16 *)(* p_argv); + break; + + case FFI_TYPE_SINT32: + *(ffi_sarg *) argp = (ffi_sarg)*(SINT32 *)(* p_argv); + break; + + case FFI_TYPE_UINT32: + *(ffi_arg *) argp = (ffi_arg)*(UINT32 *)(* p_argv); + break; + + case FFI_TYPE_STRUCT: + *(ffi_arg *) argp = *(ffi_arg *)(* p_argv); + break; + + default: + FFI_ASSERT(0); + } + } + else + { + memcpy(argp, *p_argv, z); + } + p_argv++; +#ifdef X86_WIN64 + argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); +#else + argp += z; +#endif + } + + return; +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + unsigned int i; + ffi_type **ptr; + + /* Set the return type flag */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: +#if defined(X86) || defined (X86_WIN32) || defined(X86_FREEBSD) || defined(X86_DARWIN) || defined(X86_WIN64) + case FFI_TYPE_UINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT8: + case FFI_TYPE_SINT16: +#endif +#ifdef X86_WIN64 + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: +#endif + case FFI_TYPE_SINT64: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: +#ifndef X86_WIN64 +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE + case FFI_TYPE_LONGDOUBLE: +#endif +#endif + cif->flags = (unsigned) cif->rtype->type; + break; + + case FFI_TYPE_UINT64: +#ifdef X86_WIN64 + case FFI_TYPE_POINTER: +#endif + cif->flags = FFI_TYPE_SINT64; + break; + + case FFI_TYPE_STRUCT: +#ifndef X86 + if (cif->rtype->size == 1) + { + cif->flags = FFI_TYPE_SMALL_STRUCT_1B; /* same as char size */ + } + else if (cif->rtype->size == 2) + { + cif->flags = FFI_TYPE_SMALL_STRUCT_2B; /* same as short size */ + } + else if (cif->rtype->size == 4) + { +#ifdef X86_WIN64 + cif->flags = FFI_TYPE_SMALL_STRUCT_4B; +#else + cif->flags = FFI_TYPE_INT; /* same as int type */ +#endif + } + else if (cif->rtype->size == 8) + { + cif->flags = FFI_TYPE_SINT64; /* same as int64 type */ + } + else +#endif + { + cif->flags = FFI_TYPE_STRUCT; + /* allocate space for return value pointer */ + cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG); + } + break; + + default: +#ifdef X86_WIN64 + cif->flags = FFI_TYPE_SINT64; + break; + case FFI_TYPE_INT: + cif->flags = FFI_TYPE_SINT32; +#else + cif->flags = FFI_TYPE_INT; +#endif + break; + } + + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) + { + if (((*ptr)->alignment - 1) & cif->bytes) + cif->bytes = ALIGN(cif->bytes, (*ptr)->alignment); + cif->bytes += ALIGN((*ptr)->size, FFI_SIZEOF_ARG); + } + +#ifdef X86_WIN64 + /* ensure space for storing four registers */ + cif->bytes += 4 * sizeof(ffi_arg); +#endif + +#ifdef X86_DARWIN + cif->bytes = (cif->bytes + 15) & ~0xF; +#endif + + return FFI_OK; +} + +#ifdef X86_WIN64 +extern int +ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); +#elif defined(X86_WIN32) +extern void +ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); +#else +extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, + unsigned, unsigned, unsigned *, void (*fn)(void)); +#endif + +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + +#ifdef X86_WIN64 + if (rvalue == NULL + && cif->flags == FFI_TYPE_STRUCT + && cif->rtype->size != 1 && cif->rtype->size != 2 + && cif->rtype->size != 4 && cif->rtype->size != 8) + { + ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF); + } +#else + if (rvalue == NULL + && cif->flags == FFI_TYPE_STRUCT) + { + ecif.rvalue = alloca(cif->rtype->size); + } +#endif + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { +#ifdef X86_WIN64 + case FFI_WIN64: + { + /* Make copies of all struct arguments + NOTE: not sure if responsibility should be here or in caller */ + unsigned int i; + for (i=0; i < cif->nargs;i++) { + size_t size = cif->arg_types[i]->size; + if ((cif->arg_types[i]->type == FFI_TYPE_STRUCT + && (size != 1 && size != 2 && size != 4 && size != 8)) +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + || cif->arg_types[i]->type == FFI_TYPE_LONGDOUBLE +#endif + ) + { + void *local = alloca(size); + memcpy(local, avalue[i], size); + avalue[i] = local; + } + } + ffi_call_win64(ffi_prep_args, &ecif, cif->bytes, + cif->flags, ecif.rvalue, fn); + } + break; +#elif defined(X86_WIN32) + case FFI_SYSV: + case FFI_STDCALL: + ffi_call_win32(ffi_prep_args, &ecif, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; +#else + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue, + fn); + break; +#endif + default: + FFI_ASSERT(0); + break; + } +} + + +/** private members **/ + +/* The following __attribute__((regparm(1))) decorations will have no effect + on MSVC - standard cdecl convention applies. */ +static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, + void** args, ffi_cif* cif); +void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) + __attribute__ ((regparm(1))); +unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) + __attribute__ ((regparm(1))); +void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) + __attribute__ ((regparm(1))); +#ifdef X86_WIN32 +void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) + __attribute__ ((regparm(1))); +#endif +#ifdef X86_WIN64 +void FFI_HIDDEN ffi_closure_win64 (ffi_closure *); +#endif + +/* This function is jumped to by the trampoline */ + +#ifdef X86_WIN64 +void * FFI_HIDDEN +ffi_closure_win64_inner (ffi_closure *closure, void *args) { + ffi_cif *cif; + void **arg_area; + void *result; + void *resp = &result; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* this call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will change RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif); + + (closure->fun) (cif, resp, arg_area, closure->user_data); + + /* The result is returned in rax. This does the right thing for + result types except for floats; we have to 'mov xmm0, rax' in the + caller to correct this. + TODO: structure sizes of 3 5 6 7 are returned by reference, too!!! + */ + return cif->rtype->size > sizeof(void *) ? resp : *(void **)resp; +} + +#else +unsigned int FFI_HIDDEN __attribute__ ((regparm(1))) +ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args) +{ + /* our various things... */ + ffi_cif *cif; + void **arg_area; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* this call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will change RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + + (closure->fun) (cif, *respp, arg_area, closure->user_data); + + return cif->flags; +} +#endif /* !X86_WIN64 */ + +static void +ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, + ffi_cif *cif) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + +#ifdef X86_WIN64 + if (cif->rtype->size > sizeof(ffi_arg) + || (cif->flags == FFI_TYPE_STRUCT + && (cif->rtype->size != 1 && cif->rtype->size != 2 + && cif->rtype->size != 4 && cif->rtype->size != 8))) { + *rvalue = *(void **) argp; + argp += sizeof(void *); + } +#else + if ( cif->flags == FFI_TYPE_STRUCT ) { + *rvalue = *(void **) argp; + argp += sizeof(void *); + } +#endif + + p_argv = avalue; + + for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) + { + size_t z; + + /* Align if necessary */ + if ((sizeof(void*) - 1) & (size_t) argp) { + argp = (char *) ALIGN(argp, sizeof(void*)); + } + +#ifdef X86_WIN64 + if ((*p_arg)->size > sizeof(ffi_arg) + || ((*p_arg)->type == FFI_TYPE_STRUCT + && ((*p_arg)->size != 1 && (*p_arg)->size != 2 + && (*p_arg)->size != 4 && (*p_arg)->size != 8))) + { + z = sizeof(void *); + *p_argv = *(void **)argp; + } + else +#endif + { + z = (*p_arg)->size; + + /* because we're little endian, this is what it turns into. */ + + *p_argv = (void*) argp; + } + + p_argv++; +#ifdef X86_WIN64 + argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); +#else + argp += z; +#endif + } + + return; +} + +#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + void* __fun = (void*)(FUN); \ + void* __ctx = (void*)(CTX); \ + *(unsigned char*) &__tramp[0] = 0x41; \ + *(unsigned char*) &__tramp[1] = 0xbb; \ + *(unsigned int*) &__tramp[2] = MASK; /* mov $mask, %r11 */ \ + *(unsigned char*) &__tramp[6] = 0x48; \ + *(unsigned char*) &__tramp[7] = 0xb8; \ + *(void**) &__tramp[8] = __ctx; /* mov __ctx, %rax */ \ + *(unsigned char *) &__tramp[16] = 0x49; \ + *(unsigned char *) &__tramp[17] = 0xba; \ + *(void**) &__tramp[18] = __fun; /* mov __fun, %r10 */ \ + *(unsigned char *) &__tramp[26] = 0x41; \ + *(unsigned char *) &__tramp[27] = 0xff; \ + *(unsigned char *) &__tramp[28] = 0xe2; /* jmp %r10 */ \ + } + +/* How to make a trampoline. Derived from gcc/config/i386/i386.c. */ + +#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + unsigned int __fun = (unsigned int)(FUN); \ + unsigned int __ctx = (unsigned int)(CTX); \ + unsigned int __dis = __fun - (__ctx + 10); \ + *(unsigned char*) &__tramp[0] = 0xb8; \ + *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ + *(unsigned char *) &__tramp[5] = 0xe9; \ + *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ + } + +#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \ +{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ + unsigned int __fun = (unsigned int)(FUN); \ + unsigned int __ctx = (unsigned int)(CTX); \ + unsigned int __dis = __fun - (__ctx + 10); \ + unsigned short __size = (unsigned short)(SIZE); \ + *(unsigned char*) &__tramp[0] = 0xb8; \ + *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ + *(unsigned char *) &__tramp[5] = 0xe8; \ + *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ + *(unsigned char *) &__tramp[10] = 0xc2; \ + *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \ + } + +/* the cif must already be prep'ed */ + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ +#ifdef X86_WIN64 +#define ISFLOAT(IDX) (cif->arg_types[IDX]->type == FFI_TYPE_FLOAT || cif->arg_types[IDX]->type == FFI_TYPE_DOUBLE) +#define FLAG(IDX) (cif->nargs>(IDX)&&ISFLOAT(IDX)?(1<<(IDX)):0) + if (cif->abi == FFI_WIN64) + { + int mask = FLAG(0)|FLAG(1)|FLAG(2)|FLAG(3); + FFI_INIT_TRAMPOLINE_WIN64 (&closure->tramp[0], + &ffi_closure_win64, + codeloc, mask); + /* make sure we can execute here */ + } +#else + if (cif->abi == FFI_SYSV) + { + FFI_INIT_TRAMPOLINE (&closure->tramp[0], + &ffi_closure_SYSV, + (void*)codeloc); + } +#ifdef X86_WIN32 + else if (cif->abi == FFI_STDCALL) + { + FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], + &ffi_closure_STDCALL, + (void*)codeloc, cif->bytes); + } +#endif /* X86_WIN32 */ +#endif /* !X86_WIN64 */ + else + { + return FFI_BAD_ABI; + } + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +/* ------- Native raw API support -------------------------------- */ + +#if !FFI_NO_RAW_API + +ffi_status +ffi_prep_raw_closure_loc (ffi_raw_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,ffi_raw*,void*), + void *user_data, + void *codeloc) +{ + int i; + + if (cif->abi != FFI_SYSV) { + return FFI_BAD_ABI; + } + + /* we currently don't support certain kinds of arguments for raw + closures. This should be implemented by a separate assembly + language routine, since it would require argument processing, + something we don't do now for performance. */ + + for (i = cif->nargs-1; i >= 0; i--) + { + FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT); + FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE); + } + + + FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV, + codeloc); + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +static void +ffi_prep_args_raw(char *stack, extended_cif *ecif) +{ + memcpy (stack, ecif->avalue, ecif->cif->bytes); +} + +/* we borrow this routine from libffi (it must be changed, though, to + * actually call the function passed in the first argument. as of + * libffi-1.20, this is not the case.) + */ + +void +ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) +{ + extended_cif ecif; + void **avalue = (void **)fake_avalue; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if ((rvalue == NULL) && + (cif->rtype->type == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } + else + ecif.rvalue = rvalue; + + + switch (cif->abi) + { +#ifdef X86_WIN32 + case FFI_SYSV: + case FFI_STDCALL: + ffi_call_win32(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; +#else + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags, + ecif.rvalue, fn); + break; +#endif + default: + FFI_ASSERT(0); + break; + } +} + +#endif + +#endif /* !__x86_64__ || X86_WIN64 */ + diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c new file mode 100644 index 000000000..bd907d720 --- /dev/null +++ b/libffi/src/x86/ffi64.c @@ -0,0 +1,627 @@ +/* ----------------------------------------------------------------------- + ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de> + Copyright (c) 2008, 2010 Red Hat, Inc. + + x86-64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdarg.h> + +#ifdef __x86_64__ + +#define MAX_GPR_REGS 6 +#define MAX_SSE_REGS 8 + +struct register_args +{ + /* Registers for argument passing. */ + UINT64 gpr[MAX_GPR_REGS]; + __int128_t sse[MAX_SSE_REGS]; +}; + +extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)(void), unsigned ssecount); + +/* All reference to register classes here is identical to the code in + gcc/config/i386/i386.c. Do *not* change one without the other. */ + +/* Register class used for passing given 64bit part of the argument. + These represent classes as documented by the PS ABI, with the + exception of SSESF, SSEDF classes, that are basically SSE class, + just gcc will use SF or DFmode move instead of DImode to avoid + reformatting penalties. + + Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves + whenever possible (upper half does contain padding). */ +enum x86_64_reg_class + { + X86_64_NO_CLASS, + X86_64_INTEGER_CLASS, + X86_64_INTEGERSI_CLASS, + X86_64_SSE_CLASS, + X86_64_SSESF_CLASS, + X86_64_SSEDF_CLASS, + X86_64_SSEUP_CLASS, + X86_64_X87_CLASS, + X86_64_X87UP_CLASS, + X86_64_COMPLEX_X87_CLASS, + X86_64_MEMORY_CLASS + }; + +#define MAX_CLASSES 4 + +#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS) + +/* x86-64 register passing implementation. See x86-64 ABI for details. Goal + of this code is to classify each 8bytes of incoming argument by the register + class and assign registers accordingly. */ + +/* Return the union class of CLASS1 and CLASS2. + See the x86-64 PS ABI for details. */ + +static enum x86_64_reg_class +merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) +{ + /* Rule #1: If both classes are equal, this is the resulting class. */ + if (class1 == class2) + return class1; + + /* Rule #2: If one of the classes is NO_CLASS, the resulting class is + the other class. */ + if (class1 == X86_64_NO_CLASS) + return class2; + if (class2 == X86_64_NO_CLASS) + return class1; + + /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ + if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ + if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) + || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) + return X86_64_INTEGERSI_CLASS; + if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS + || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) + return X86_64_INTEGER_CLASS; + + /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, + MEMORY is used. */ + if (class1 == X86_64_X87_CLASS + || class1 == X86_64_X87UP_CLASS + || class1 == X86_64_COMPLEX_X87_CLASS + || class2 == X86_64_X87_CLASS + || class2 == X86_64_X87UP_CLASS + || class2 == X86_64_COMPLEX_X87_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #6: Otherwise class SSE is used. */ + return X86_64_SSE_CLASS; +} + +/* Classify the argument of type TYPE and mode MODE. + CLASSES will be filled by the register class used to pass each word + of the operand. The number of words is returned. In case the parameter + should be passed in memory, 0 is returned. As a special case for zero + sized containers, classes[0] will be NO_CLASS and 1 is returned. + + See the x86-64 PS ABI for details. +*/ +static int +classify_argument (ffi_type *type, enum x86_64_reg_class classes[], + size_t byte_offset) +{ + switch (type->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + { + int size = byte_offset + type->size; + + if (size <= 4) + { + classes[0] = X86_64_INTEGERSI_CLASS; + return 1; + } + else if (size <= 8) + { + classes[0] = X86_64_INTEGER_CLASS; + return 1; + } + else if (size <= 12) + { + classes[0] = X86_64_INTEGER_CLASS; + classes[1] = X86_64_INTEGERSI_CLASS; + return 2; + } + else if (size <= 16) + { + classes[0] = classes[1] = X86_64_INTEGERSI_CLASS; + return 2; + } + else + FFI_ASSERT (0); + } + case FFI_TYPE_FLOAT: + if (!(byte_offset % 8)) + classes[0] = X86_64_SSESF_CLASS; + else + classes[0] = X86_64_SSE_CLASS; + return 1; + case FFI_TYPE_DOUBLE: + classes[0] = X86_64_SSEDF_CLASS; + return 1; + case FFI_TYPE_LONGDOUBLE: + classes[0] = X86_64_X87_CLASS; + classes[1] = X86_64_X87UP_CLASS; + return 2; + case FFI_TYPE_STRUCT: + { + const int UNITS_PER_WORD = 8; + int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + ffi_type **ptr; + int i; + enum x86_64_reg_class subclasses[MAX_CLASSES]; + + /* If the struct is larger than 32 bytes, pass it on the stack. */ + if (type->size > 32) + return 0; + + for (i = 0; i < words; i++) + classes[i] = X86_64_NO_CLASS; + + /* Zero sized arrays or structures are NO_CLASS. We return 0 to + signalize memory class, so handle it as special case. */ + if (!words) + { + classes[0] = X86_64_NO_CLASS; + return 1; + } + + /* Merge the fields of structure. */ + for (ptr = type->elements; *ptr != NULL; ptr++) + { + int num; + + byte_offset = ALIGN (byte_offset, (*ptr)->alignment); + + num = classify_argument (*ptr, subclasses, byte_offset % 8); + if (num == 0) + return 0; + for (i = 0; i < num; i++) + { + int pos = byte_offset / 8; + classes[i + pos] = + merge_classes (subclasses[i], classes[i + pos]); + } + + byte_offset += (*ptr)->size; + } + + if (words > 2) + { + /* When size > 16 bytes, if the first one isn't + X86_64_SSE_CLASS or any other ones aren't + X86_64_SSEUP_CLASS, everything should be passed in + memory. */ + if (classes[0] != X86_64_SSE_CLASS) + return 0; + + for (i = 1; i < words; i++) + if (classes[i] != X86_64_SSEUP_CLASS) + return 0; + } + + /* Final merger cleanup. */ + for (i = 0; i < words; i++) + { + /* If one class is MEMORY, everything should be passed in + memory. */ + if (classes[i] == X86_64_MEMORY_CLASS) + return 0; + + /* The X86_64_SSEUP_CLASS should be always preceded by + X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ + if (classes[i] == X86_64_SSEUP_CLASS + && classes[i - 1] != X86_64_SSE_CLASS + && classes[i - 1] != X86_64_SSEUP_CLASS) + { + /* The first one should never be X86_64_SSEUP_CLASS. */ + FFI_ASSERT (i != 0); + classes[i] = X86_64_SSE_CLASS; + } + + /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, + everything should be passed in memory. */ + if (classes[i] == X86_64_X87UP_CLASS + && (classes[i - 1] != X86_64_X87_CLASS)) + { + /* The first one should never be X86_64_X87UP_CLASS. */ + FFI_ASSERT (i != 0); + return 0; + } + } + return words; + } + + default: + FFI_ASSERT(0); + } + return 0; /* Never reached. */ +} + +/* Examine the argument and return set number of register required in each + class. Return zero iff parameter should be passed in memory, otherwise + the number of registers. */ + +static int +examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], + _Bool in_return, int *pngpr, int *pnsse) +{ + int i, n, ngpr, nsse; + + n = classify_argument (type, classes, 0); + if (n == 0) + return 0; + + ngpr = nsse = 0; + for (i = 0; i < n; ++i) + switch (classes[i]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + ngpr++; + break; + case X86_64_SSE_CLASS: + case X86_64_SSESF_CLASS: + case X86_64_SSEDF_CLASS: + nsse++; + break; + case X86_64_NO_CLASS: + case X86_64_SSEUP_CLASS: + break; + case X86_64_X87_CLASS: + case X86_64_X87UP_CLASS: + case X86_64_COMPLEX_X87_CLASS: + return in_return != 0; + default: + abort (); + } + + *pngpr = ngpr; + *pnsse = nsse; + + return n; +} + +/* Perform machine dependent cif processing. */ + +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + int gprcount, ssecount, i, avn, n, ngpr, nsse, flags; + enum x86_64_reg_class classes[MAX_CLASSES]; + size_t bytes; + + gprcount = ssecount = 0; + + flags = cif->rtype->type; + if (flags != FFI_TYPE_VOID) + { + n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); + if (n == 0) + { + /* The return value is passed in memory. A pointer to that + memory is the first argument. Allocate a register for it. */ + gprcount++; + /* We don't have to do anything in asm for the return. */ + flags = FFI_TYPE_VOID; + } + else if (flags == FFI_TYPE_STRUCT) + { + /* Mark which registers the result appears in. */ + _Bool sse0 = SSE_CLASS_P (classes[0]); + _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); + if (sse0 && !sse1) + flags |= 1 << 8; + else if (!sse0 && sse1) + flags |= 1 << 9; + else if (sse0 && sse1) + flags |= 1 << 10; + /* Mark the true size of the structure. */ + flags |= cif->rtype->size << 12; + } + } + + /* Go over all arguments and determine the way they should be passed. + If it's in a register and there is space for it, let that be so. If + not, add it's size to the stack byte count. */ + for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++) + { + if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0 + || gprcount + ngpr > MAX_GPR_REGS + || ssecount + nsse > MAX_SSE_REGS) + { + long align = cif->arg_types[i]->alignment; + + if (align < 8) + align = 8; + + bytes = ALIGN (bytes, align); + bytes += cif->arg_types[i]->size; + } + else + { + gprcount += ngpr; + ssecount += nsse; + } + } + if (ssecount) + flags |= 1 << 11; + cif->flags = flags; + cif->bytes = ALIGN (bytes, 8); + + return FFI_OK; +} + +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + enum x86_64_reg_class classes[MAX_CLASSES]; + char *stack, *argp; + ffi_type **arg_types; + int gprcount, ssecount, ngpr, nsse, i, avn; + _Bool ret_in_memory; + struct register_args *reg_args; + + /* Can't call 32-bit mode from 64-bit mode. */ + FFI_ASSERT (cif->abi == FFI_UNIX64); + + /* If the return value is a struct and we don't have a return value + address then we need to make one. Note the setting of flags to + VOID above in ffi_prep_cif_machdep. */ + ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT + && (cif->flags & 0xff) == FFI_TYPE_VOID); + if (rvalue == NULL && ret_in_memory) + rvalue = alloca (cif->rtype->size); + + /* Allocate the space for the arguments, plus 4 words of temp space. */ + stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); + reg_args = (struct register_args *) stack; + argp = stack + sizeof (struct register_args); + + gprcount = ssecount = 0; + + /* If the return value is passed in memory, add the pointer as the + first integer argument. */ + if (ret_in_memory) + reg_args->gpr[gprcount++] = (long) rvalue; + + avn = cif->nargs; + arg_types = cif->arg_types; + + for (i = 0; i < avn; ++i) + { + size_t size = arg_types[i]->size; + int n; + + n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); + if (n == 0 + || gprcount + ngpr > MAX_GPR_REGS + || ssecount + nsse > MAX_SSE_REGS) + { + long align = arg_types[i]->alignment; + + /* Stack arguments are *always* at least 8 byte aligned. */ + if (align < 8) + align = 8; + + /* Pass this argument in memory. */ + argp = (void *) ALIGN (argp, align); + memcpy (argp, avalue[i], size); + argp += size; + } + else + { + /* The argument is passed entirely in registers. */ + char *a = (char *) avalue[i]; + int j; + + for (j = 0; j < n; j++, a += 8, size -= 8) + { + switch (classes[j]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + reg_args->gpr[gprcount] = 0; + memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); + gprcount++; + break; + case X86_64_SSE_CLASS: + case X86_64_SSEDF_CLASS: + reg_args->sse[ssecount++] = *(UINT64 *) a; + break; + case X86_64_SSESF_CLASS: + reg_args->sse[ssecount++] = *(UINT32 *) a; + break; + default: + abort(); + } + } + } + } + + ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), + cif->flags, rvalue, fn, ssecount); +} + + +extern void ffi_closure_unix64(void); + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + volatile unsigned short *tramp; + + tramp = (volatile unsigned short *) &closure->tramp[0]; + + tramp[0] = 0xbb49; /* mov <code>, %r11 */ + *(void * volatile *) &tramp[1] = ffi_closure_unix64; + tramp[5] = 0xba49; /* mov <data>, %r10 */ + *(void * volatile *) &tramp[6] = codeloc; + + /* Set the carry bit iff the function uses any sse registers. + This is clc or stc, together with the first byte of the jmp. */ + tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; + + tramp[11] = 0xe3ff; /* jmp *%r11 */ + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +int +ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, + struct register_args *reg_args, char *argp) +{ + ffi_cif *cif; + void **avalue; + ffi_type **arg_types; + long i, avn; + int gprcount, ssecount, ngpr, nsse; + int ret; + + cif = closure->cif; + avalue = alloca(cif->nargs * sizeof(void *)); + gprcount = ssecount = 0; + + ret = cif->rtype->type; + if (ret != FFI_TYPE_VOID) + { + enum x86_64_reg_class classes[MAX_CLASSES]; + int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); + if (n == 0) + { + /* The return value goes in memory. Arrange for the closure + return value to go directly back to the original caller. */ + rvalue = (void *) reg_args->gpr[gprcount++]; + /* We don't have to do anything in asm for the return. */ + ret = FFI_TYPE_VOID; + } + else if (ret == FFI_TYPE_STRUCT && n == 2) + { + /* Mark which register the second word of the structure goes in. */ + _Bool sse0 = SSE_CLASS_P (classes[0]); + _Bool sse1 = SSE_CLASS_P (classes[1]); + if (!sse0 && sse1) + ret |= 1 << 8; + else if (sse0 && !sse1) + ret |= 1 << 9; + } + } + + avn = cif->nargs; + arg_types = cif->arg_types; + + for (i = 0; i < avn; ++i) + { + enum x86_64_reg_class classes[MAX_CLASSES]; + int n; + + n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); + if (n == 0 + || gprcount + ngpr > MAX_GPR_REGS + || ssecount + nsse > MAX_SSE_REGS) + { + long align = arg_types[i]->alignment; + + /* Stack arguments are *always* at least 8 byte aligned. */ + if (align < 8) + align = 8; + + /* Pass this argument in memory. */ + argp = (void *) ALIGN (argp, align); + avalue[i] = argp; + argp += arg_types[i]->size; + } + /* If the argument is in a single register, or two consecutive + integer registers, then we can use that address directly. */ + else if (n == 1 + || (n == 2 && !(SSE_CLASS_P (classes[0]) + || SSE_CLASS_P (classes[1])))) + { + /* The argument is in a single register. */ + if (SSE_CLASS_P (classes[0])) + { + avalue[i] = ®_args->sse[ssecount]; + ssecount += n; + } + else + { + avalue[i] = ®_args->gpr[gprcount]; + gprcount += n; + } + } + /* Otherwise, allocate space to make them consecutive. */ + else + { + char *a = alloca (16); + int j; + + avalue[i] = a; + for (j = 0; j < n; j++, a += 8) + { + if (SSE_CLASS_P (classes[j])) + memcpy (a, ®_args->sse[ssecount++], 8); + else + memcpy (a, ®_args->gpr[gprcount++], 8); + } + } + } + + /* Invoke the closure. */ + closure->fun (cif, rvalue, avalue, closure->user_data); + + /* Tell assembly how to perform return type promotions. */ + return ret; +} + +#endif /* __x86_64__ */ diff --git a/libffi/src/x86/ffitarget.h b/libffi/src/x86/ffitarget.h new file mode 100644 index 000000000..b85016cc0 --- /dev/null +++ b/libffi/src/x86/ffitarget.h @@ -0,0 +1,120 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 1996-2003, 2010 Red Hat, Inc. + Copyright (C) 2008 Free Software Foundation, Inc. + + Target configuration macros for x86 and x86-64. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +/* ---- System specific configurations ----------------------------------- */ + +#if defined (X86_64) && defined (__i386__) +#undef X86_64 +#define X86 +#endif + +#ifdef X86_WIN64 +#define FFI_SIZEOF_ARG 8 +#define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */ +#endif + +/* ---- Generic type definitions ----------------------------------------- */ + +#ifndef LIBFFI_ASM +#ifdef X86_WIN64 +#ifdef _MSC_VER +typedef unsigned __int64 ffi_arg; +typedef __int64 ffi_sarg; +#else +typedef unsigned long long ffi_arg; +typedef long long ffi_sarg; +#endif +#else +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; +#endif + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + + /* ---- Intel x86 Win32 ---------- */ +#ifdef X86_WIN32 + FFI_SYSV, + FFI_STDCALL, + /* TODO: Add fastcall support for the sake of completeness */ + FFI_DEFAULT_ABI = FFI_SYSV, +#endif + +#ifdef X86_WIN64 + FFI_WIN64, + FFI_DEFAULT_ABI = FFI_WIN64, +#else + + /* ---- Intel x86 and AMD x86-64 - */ +#if !defined(X86_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(__i386) || defined(__amd64)) + FFI_SYSV, + FFI_UNIX64, /* Unix variants all use the same ABI for x86-64 */ +#if defined(__i386__) || defined(__i386) + FFI_DEFAULT_ABI = FFI_SYSV, +#else + FFI_DEFAULT_ABI = FFI_UNIX64, +#endif +#endif +#endif /* X86_WIN64 */ + + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1) +#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2) +#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3) + +#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_NATIVE_RAW_API 0 +#else +#ifdef X86_WIN32 +#define FFI_TRAMPOLINE_SIZE 13 +#else +#ifdef X86_WIN64 +#define FFI_TRAMPOLINE_SIZE 29 +#define FFI_NATIVE_RAW_API 0 +#define FFI_NO_RAW_API 1 +#else +#define FFI_TRAMPOLINE_SIZE 10 +#endif +#endif +#ifndef X86_WIN64 +#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ +#endif +#endif + +#endif + diff --git a/libffi/src/x86/freebsd.S b/libffi/src/x86/freebsd.S new file mode 100644 index 000000000..afde51316 --- /dev/null +++ b/libffi/src/x86/freebsd.S @@ -0,0 +1,458 @@ +/* ----------------------------------------------------------------------- + freebsd.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc. + Copyright (c) 2008 Björn König + + X86 Foreign Function Interface for FreeBSD + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +----------------------------------------------------------------------- */ + +#ifndef __x86_64__ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +.text + +.globl ffi_prep_args + + .align 4 +.globl ffi_call_SYSV + .type ffi_call_SYSV,@function + +ffi_call_SYSV: +.LFB1: + pushl %ebp +.LCFI0: + movl %esp,%ebp +.LCFI1: + /* Make room for all of the new args. */ + movl 16(%ebp),%ecx + subl %ecx,%esp + + movl %esp,%eax + + /* Place all of the ffi_prep_args in position */ + pushl 12(%ebp) + pushl %eax + call *8(%ebp) + + /* Return stack to previous state and call the function */ + addl $8,%esp + + call *28(%ebp) + + /* Load %ecx with the return type code */ + movl 20(%ebp),%ecx + + /* Protect %esi. We're going to pop it in the epilogue. */ + pushl %esi + + /* If the return value pointer is NULL, assume no return value. */ + cmpl $0,24(%ebp) + jne 0f + + /* Even if there is no space for the return value, we are + obliged to handle floating-point values. */ + cmpl $FFI_TYPE_FLOAT,%ecx + jne noretval + fstp %st(0) + + jmp epilogue + +0: + call 1f + +.Lstore_table: + .long noretval-.Lstore_table /* FFI_TYPE_VOID */ + .long retint-.Lstore_table /* FFI_TYPE_INT */ + .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ + .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ + .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ + .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ + .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ + .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ + .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ + .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ + .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ + .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ + .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ + .long retint-.Lstore_table /* FFI_TYPE_POINTER */ + .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ + .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ + +1: + pop %esi + add (%esi, %ecx, 4), %esi + jmp *%esi + + /* Sign/zero extend as appropriate. */ +retsint8: + movsbl %al, %eax + jmp retint + +retsint16: + movswl %ax, %eax + jmp retint + +retuint8: + movzbl %al, %eax + jmp retint + +retuint16: + movzwl %ax, %eax + jmp retint + +retfloat: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstps (%ecx) + jmp epilogue + +retdouble: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstpl (%ecx) + jmp epilogue + +retlongdouble: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstpt (%ecx) + jmp epilogue + +retint64: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + movl %edx,4(%ecx) + jmp epilogue + +retstruct1b: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movb %al,0(%ecx) + jmp epilogue + +retstruct2b: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movw %ax,0(%ecx) + jmp epilogue + +retint: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + +retstruct: + /* Nothing to do! */ + +noretval: +epilogue: + popl %esi + movl %ebp,%esp + popl %ebp + ret +.LFE1: +.ffi_call_SYSV_end: + .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV + + .align 4 +FFI_HIDDEN (ffi_closure_SYSV) +.globl ffi_closure_SYSV + .type ffi_closure_SYSV, @function + +ffi_closure_SYSV: +.LFB2: + pushl %ebp +.LCFI2: + movl %esp, %ebp +.LCFI3: + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 8(%ebp), %edx + movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ +#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__ + call ffi_closure_SYSV_inner +#else + movl %ebx, 8(%esp) +.LCFI7: + call 1f +1: popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx + call ffi_closure_SYSV_inner@PLT + movl 8(%esp), %ebx +#endif + movl -12(%ebp), %ecx + cmpl $FFI_TYPE_INT, %eax + je .Lcls_retint + + /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, + FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ + cmpl $FFI_TYPE_UINT64, %eax + jge 0f + cmpl $FFI_TYPE_UINT8, %eax + jge .Lcls_retint + +0: cmpl $FFI_TYPE_FLOAT, %eax + je .Lcls_retfloat + cmpl $FFI_TYPE_DOUBLE, %eax + je .Lcls_retdouble + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je .Lcls_retldouble + cmpl $FFI_TYPE_SINT64, %eax + je .Lcls_retllong + cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax + je .Lcls_retstruct1b + cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax + je .Lcls_retstruct2b + cmpl $FFI_TYPE_STRUCT, %eax + je .Lcls_retstruct +.Lcls_epilogue: + movl %ebp, %esp + popl %ebp + ret +.Lcls_retint: + movl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retfloat: + flds (%ecx) + jmp .Lcls_epilogue +.Lcls_retdouble: + fldl (%ecx) + jmp .Lcls_epilogue +.Lcls_retldouble: + fldt (%ecx) + jmp .Lcls_epilogue +.Lcls_retllong: + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lcls_epilogue +.Lcls_retstruct1b: + movsbl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retstruct2b: + movswl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retstruct: + movl %ebp, %esp + popl %ebp + ret $4 +.LFE2: + .size ffi_closure_SYSV, .-ffi_closure_SYSV + +#if !FFI_NO_RAW_API + +#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) +#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) +#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) +#define CIF_FLAGS_OFFSET 20 + + .align 4 +FFI_HIDDEN (ffi_closure_raw_SYSV) +.globl ffi_closure_raw_SYSV + .type ffi_closure_raw_SYSV, @function + +ffi_closure_raw_SYSV: +.LFB3: + pushl %ebp +.LCFI4: + movl %esp, %ebp +.LCFI5: + pushl %esi +.LCFI6: + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ + movl %edx, 8(%esp) /* raw_args */ + leal -24(%ebp), %edx + movl %edx, 4(%esp) /* &res */ + movl %esi, (%esp) /* cif */ + call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ + movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ + cmpl $FFI_TYPE_INT, %eax + je .Lrcls_retint + + /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, + FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ + cmpl $FFI_TYPE_UINT64, %eax + jge 0f + cmpl $FFI_TYPE_UINT8, %eax + jge .Lrcls_retint +0: + cmpl $FFI_TYPE_FLOAT, %eax + je .Lrcls_retfloat + cmpl $FFI_TYPE_DOUBLE, %eax + je .Lrcls_retdouble + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je .Lrcls_retldouble + cmpl $FFI_TYPE_SINT64, %eax + je .Lrcls_retllong +.Lrcls_epilogue: + addl $36, %esp + popl %esi + popl %ebp + ret +.Lrcls_retint: + movl -24(%ebp), %eax + jmp .Lrcls_epilogue +.Lrcls_retfloat: + flds -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retdouble: + fldl -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retldouble: + fldt -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retllong: + movl -24(%ebp), %eax + movl -20(%ebp), %edx + jmp .Lrcls_epilogue +.LFE3: + .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV +#endif + + .section .eh_frame,EH_FRAME_FLAGS,@progbits +.Lframe1: + .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */ +.LSCIE1: + .long 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ +#ifdef __PIC__ + .ascii "zR\0" /* CIE Augmentation */ +#else + .ascii "\0" /* CIE Augmentation */ +#endif + .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ + .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ +#ifdef __PIC__ + .byte 0x1 /* .uleb128 0x1; Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ +#endif + .byte 0xc /* DW_CFA_def_cfa */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .byte 0x1 /* .uleb128 0x1 */ + .align 4 +.LECIE1: +.LSFDE1: + .long .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .long .LASFDE1-.Lframe1 /* FDE CIE offset */ +#ifdef __PIC__ + .long .LFB1-. /* FDE initial location */ +#else + .long .LFB1 /* FDE initial location */ +#endif + .long .LFE1-.LFB1 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI0-.LFB1 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI1-.LCFI0 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0x5 /* .uleb128 0x5 */ + .align 4 +.LEFDE1: +.LSFDE2: + .long .LEFDE2-.LASFDE2 /* FDE Length */ +.LASFDE2: + .long .LASFDE2-.Lframe1 /* FDE CIE offset */ +#ifdef __PIC__ + .long .LFB2-. /* FDE initial location */ +#else + .long .LFB2 +#endif + .long .LFE2-.LFB2 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI2-.LFB2 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI3-.LCFI2 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0x5 /* .uleb128 0x5 */ +#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI7-.LCFI3 + .byte 0x83 /* DW_CFA_offset, column 0x3 */ + .byte 0xa /* .uleb128 0xa */ +#endif + .align 4 +.LEFDE2: + +#if !FFI_NO_RAW_API + +.LSFDE3: + .long .LEFDE3-.LASFDE3 /* FDE Length */ +.LASFDE3: + .long .LASFDE3-.Lframe1 /* FDE CIE offset */ +#ifdef __PIC__ + .long .LFB3-. /* FDE initial location */ +#else + .long .LFB3 +#endif + .long .LFE3-.LFB3 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI4-.LFB3 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI5-.LCFI4 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0x5 /* .uleb128 0x5 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI6-.LCFI5 + .byte 0x86 /* DW_CFA_offset, column 0x6 */ + .byte 0x3 /* .uleb128 0x3 */ + .align 4 +.LEFDE3: + +#endif + +#endif /* ifndef __x86_64__ */ diff --git a/libffi/src/x86/sysv.S b/libffi/src/x86/sysv.S new file mode 100644 index 000000000..f108dd80d --- /dev/null +++ b/libffi/src/x86/sysv.S @@ -0,0 +1,468 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010 Red Hat, Inc. + + X86 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifndef __x86_64__ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +.text + +.globl ffi_prep_args + + .align 4 +.globl ffi_call_SYSV + .type ffi_call_SYSV,@function + +ffi_call_SYSV: +.LFB1: + pushl %ebp +.LCFI0: + movl %esp,%ebp +.LCFI1: + /* Make room for all of the new args. */ + movl 16(%ebp),%ecx + subl %ecx,%esp + + /* Align the stack pointer to 16-bytes */ + andl $0xfffffff0, %esp + + movl %esp,%eax + + /* Place all of the ffi_prep_args in position */ + pushl 12(%ebp) + pushl %eax + call *8(%ebp) + + /* Return stack to previous state and call the function */ + addl $8,%esp + + call *28(%ebp) + + /* Load %ecx with the return type code */ + movl 20(%ebp),%ecx + + /* Protect %esi. We're going to pop it in the epilogue. */ + pushl %esi + + /* If the return value pointer is NULL, assume no return value. */ + cmpl $0,24(%ebp) + jne 0f + + /* Even if there is no space for the return value, we are + obliged to handle floating-point values. */ + cmpl $FFI_TYPE_FLOAT,%ecx + jne noretval + fstp %st(0) + + jmp epilogue + +0: + call 1f + +.Lstore_table: + .long noretval-.Lstore_table /* FFI_TYPE_VOID */ + .long retint-.Lstore_table /* FFI_TYPE_INT */ + .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ + .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ + .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ + .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ + .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ + .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ + .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ + .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ + .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ + .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ + .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ + .long retint-.Lstore_table /* FFI_TYPE_POINTER */ + +1: + pop %esi + add (%esi, %ecx, 4), %esi + jmp *%esi + + /* Sign/zero extend as appropriate. */ +retsint8: + movsbl %al, %eax + jmp retint + +retsint16: + movswl %ax, %eax + jmp retint + +retuint8: + movzbl %al, %eax + jmp retint + +retuint16: + movzwl %ax, %eax + jmp retint + +retfloat: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstps (%ecx) + jmp epilogue + +retdouble: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstpl (%ecx) + jmp epilogue + +retlongdouble: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + fstpt (%ecx) + jmp epilogue + +retint64: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + movl %edx,4(%ecx) + jmp epilogue + +retint: + /* Load %ecx with the pointer to storage for the return value */ + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + +retstruct: + /* Nothing to do! */ + +noretval: +epilogue: + popl %esi + movl %ebp,%esp + popl %ebp + ret +.LFE1: +.ffi_call_SYSV_end: + .size ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV + + .align 4 +FFI_HIDDEN (ffi_closure_SYSV) +.globl ffi_closure_SYSV + .type ffi_closure_SYSV, @function + +ffi_closure_SYSV: +.LFB2: + pushl %ebp +.LCFI2: + movl %esp, %ebp +.LCFI3: + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 8(%ebp), %edx + movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ +#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__ + call ffi_closure_SYSV_inner +#else + movl %ebx, 8(%esp) +.LCFI7: + call 1f +1: popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx + call ffi_closure_SYSV_inner@PLT + movl 8(%esp), %ebx +#endif + movl -12(%ebp), %ecx + cmpl $FFI_TYPE_INT, %eax + je .Lcls_retint + + /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, + FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ + cmpl $FFI_TYPE_UINT64, %eax + jge 0f + cmpl $FFI_TYPE_UINT8, %eax + jge .Lcls_retint + +0: cmpl $FFI_TYPE_FLOAT, %eax + je .Lcls_retfloat + cmpl $FFI_TYPE_DOUBLE, %eax + je .Lcls_retdouble + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je .Lcls_retldouble + cmpl $FFI_TYPE_SINT64, %eax + je .Lcls_retllong + cmpl $FFI_TYPE_STRUCT, %eax + je .Lcls_retstruct +.Lcls_epilogue: + movl %ebp, %esp + popl %ebp + ret +.Lcls_retint: + movl (%ecx), %eax + jmp .Lcls_epilogue +.Lcls_retfloat: + flds (%ecx) + jmp .Lcls_epilogue +.Lcls_retdouble: + fldl (%ecx) + jmp .Lcls_epilogue +.Lcls_retldouble: + fldt (%ecx) + jmp .Lcls_epilogue +.Lcls_retllong: + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lcls_epilogue +.Lcls_retstruct: + movl %ebp, %esp + popl %ebp + ret $4 +.LFE2: + .size ffi_closure_SYSV, .-ffi_closure_SYSV + +#if !FFI_NO_RAW_API + +/* Precalculate for e.g. the Solaris 10/x86 assembler. */ +#if FFI_TRAMPOLINE_SIZE == 10 +#define RAW_CLOSURE_CIF_OFFSET 12 +#define RAW_CLOSURE_FUN_OFFSET 16 +#define RAW_CLOSURE_USER_DATA_OFFSET 20 +#elif FFI_TRAMPOLINE_SIZE == 24 +#define RAW_CLOSURE_CIF_OFFSET 24 +#define RAW_CLOSURE_FUN_OFFSET 28 +#define RAW_CLOSURE_USER_DATA_OFFSET 32 +#else +#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) +#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) +#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) +#endif +#define CIF_FLAGS_OFFSET 20 + + .align 4 +FFI_HIDDEN (ffi_closure_raw_SYSV) +.globl ffi_closure_raw_SYSV + .type ffi_closure_raw_SYSV, @function + +ffi_closure_raw_SYSV: +.LFB3: + pushl %ebp +.LCFI4: + movl %esp, %ebp +.LCFI5: + pushl %esi +.LCFI6: + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ + movl %edx, 8(%esp) /* raw_args */ + leal -24(%ebp), %edx + movl %edx, 4(%esp) /* &res */ + movl %esi, (%esp) /* cif */ + call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ + movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ + cmpl $FFI_TYPE_INT, %eax + je .Lrcls_retint + + /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, + FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ + cmpl $FFI_TYPE_UINT64, %eax + jge 0f + cmpl $FFI_TYPE_UINT8, %eax + jge .Lrcls_retint +0: + cmpl $FFI_TYPE_FLOAT, %eax + je .Lrcls_retfloat + cmpl $FFI_TYPE_DOUBLE, %eax + je .Lrcls_retdouble + cmpl $FFI_TYPE_LONGDOUBLE, %eax + je .Lrcls_retldouble + cmpl $FFI_TYPE_SINT64, %eax + je .Lrcls_retllong +.Lrcls_epilogue: + addl $36, %esp + popl %esi + popl %ebp + ret +.Lrcls_retint: + movl -24(%ebp), %eax + jmp .Lrcls_epilogue +.Lrcls_retfloat: + flds -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retdouble: + fldl -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retldouble: + fldt -24(%ebp) + jmp .Lrcls_epilogue +.Lrcls_retllong: + movl -24(%ebp), %eax + movl -20(%ebp), %edx + jmp .Lrcls_epilogue +.LFE3: + .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV +#endif + +#if defined __PIC__ +# if defined __sun__ && defined __svr4__ +/* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22 + doesn't correctly sort .eh_frame_hdr with mixed encodings, so match this. */ +# define FDE_ENCODING 0x30 /* datarel */ +# define FDE_ENCODE(X) X@GOTOFF +# else +# define FDE_ENCODING 0x1b /* pcrel sdata4 */ +# if defined HAVE_AS_X86_PCREL +# define FDE_ENCODE(X) X-. +# else +# define FDE_ENCODE(X) X@rel +# endif +# endif +#else +# define FDE_ENCODING 0 /* absolute */ +# define FDE_ENCODE(X) X +#endif + + .section .eh_frame,EH_FRAME_FLAGS,@progbits +.Lframe1: + .long .LECIE1-.LSCIE1 /* Length of Common Information Entry */ +.LSCIE1: + .long 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ +#ifdef HAVE_AS_ASCII_PSEUDO_OP +#ifdef __PIC__ + .ascii "zR\0" /* CIE Augmentation */ +#else + .ascii "\0" /* CIE Augmentation */ +#endif +#elif defined HAVE_AS_STRING_PSEUDO_OP +#ifdef __PIC__ + .string "zR" /* CIE Augmentation */ +#else + .string "" /* CIE Augmentation */ +#endif +#else +#error missing .ascii/.string +#endif + .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ + .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ +#ifdef __PIC__ + .byte 0x1 /* .uleb128 0x1; Augmentation size */ + .byte FDE_ENCODING +#endif + .byte 0xc /* DW_CFA_def_cfa */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x88 /* DW_CFA_offset, column 0x8 */ + .byte 0x1 /* .uleb128 0x1 */ + .align 4 +.LECIE1: +.LSFDE1: + .long .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .long .LASFDE1-.Lframe1 /* FDE CIE offset */ + .long FDE_ENCODE(.LFB1) /* FDE initial location */ + .long .LFE1-.LFB1 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI0-.LFB1 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI1-.LCFI0 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0x5 /* .uleb128 0x5 */ + .align 4 +.LEFDE1: +.LSFDE2: + .long .LEFDE2-.LASFDE2 /* FDE Length */ +.LASFDE2: + .long .LASFDE2-.Lframe1 /* FDE CIE offset */ + .long FDE_ENCODE(.LFB2) /* FDE initial location */ + .long .LFE2-.LFB2 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI2-.LFB2 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI3-.LCFI2 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0x5 /* .uleb128 0x5 */ +#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI7-.LCFI3 + .byte 0x83 /* DW_CFA_offset, column 0x3 */ + .byte 0xa /* .uleb128 0xa */ +#endif + .align 4 +.LEFDE2: + +#if !FFI_NO_RAW_API + +.LSFDE3: + .long .LEFDE3-.LASFDE3 /* FDE Length */ +.LASFDE3: + .long .LASFDE3-.Lframe1 /* FDE CIE offset */ + .long FDE_ENCODE(.LFB3) /* FDE initial location */ + .long .LFE3-.LFB3 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI4-.LFB3 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 */ + .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI5-.LCFI4 + .byte 0xd /* DW_CFA_def_cfa_register */ + .byte 0x5 /* .uleb128 0x5 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI6-.LCFI5 + .byte 0x86 /* DW_CFA_offset, column 0x6 */ + .byte 0x3 /* .uleb128 0x3 */ + .align 4 +.LEFDE3: + +#endif + +#endif /* ifndef __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S new file mode 100644 index 000000000..7a6619a54 --- /dev/null +++ b/libffi/src/x86/unix64.S @@ -0,0 +1,426 @@ +/* ----------------------------------------------------------------------- + unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de> + Copyright (c) 2008 Red Hat, Inc + + x86-64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifdef __x86_64__ +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +.text + +/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, + void *raddr, void (*fnaddr)(void)); + + Bit o trickiness here -- ARGS+BYTES is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 2 + .globl ffi_call_unix64 + .type ffi_call_unix64,@function + +ffi_call_unix64: +.LUW0: + movq (%rsp), %r10 /* Load return address. */ + leaq (%rdi, %rsi), %rax /* Find local stack base. */ + movq %rdx, (%rax) /* Save flags. */ + movq %rcx, 8(%rax) /* Save raddr. */ + movq %rbp, 16(%rax) /* Save old frame pointer. */ + movq %r10, 24(%rax) /* Relocate return address. */ + movq %rax, %rbp /* Finalize local stack frame. */ +.LUW1: + movq %rdi, %r10 /* Save a copy of the register area. */ + movq %r8, %r11 /* Save a copy of the target fn. */ + movl %r9d, %eax /* Set number of SSE registers. */ + + /* Load up all argument registers. */ + movq (%r10), %rdi + movq 8(%r10), %rsi + movq 16(%r10), %rdx + movq 24(%r10), %rcx + movq 32(%r10), %r8 + movq 40(%r10), %r9 + testl %eax, %eax + jnz .Lload_sse +.Lret_from_load_sse: + + /* Deallocate the reg arg area. */ + leaq 176(%r10), %rsp + + /* Call the user function. */ + call *%r11 + + /* Deallocate stack arg area; local stack frame in redzone. */ + leaq 24(%rbp), %rsp + + movq 0(%rbp), %rcx /* Reload flags. */ + movq 8(%rbp), %rdi /* Reload raddr. */ + movq 16(%rbp), %rbp /* Reload old frame pointer. */ +.LUW2: + + /* The first byte of the flags contains the FFI_TYPE. */ + movzbl %cl, %r10d + leaq .Lstore_table(%rip), %r11 + movslq (%r11, %r10, 4), %r10 + addq %r11, %r10 + jmp *%r10 + +.Lstore_table: + .long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */ + .long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */ + .long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */ + .long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */ + .long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */ + .long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */ + .long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */ + .long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */ + .long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */ + .long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */ + .long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */ + .long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */ + .long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */ + .long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */ + + .align 2 +.Lst_void: + ret + .align 2 + +.Lst_uint8: + movzbq %al, %rax + movq %rax, (%rdi) + ret + .align 2 +.Lst_sint8: + movsbq %al, %rax + movq %rax, (%rdi) + ret + .align 2 +.Lst_uint16: + movzwq %ax, %rax + movq %rax, (%rdi) + .align 2 +.Lst_sint16: + movswq %ax, %rax + movq %rax, (%rdi) + ret + .align 2 +.Lst_uint32: + movl %eax, %eax + movq %rax, (%rdi) + .align 2 +.Lst_sint32: + cltq + movq %rax, (%rdi) + ret + .align 2 +.Lst_int64: + movq %rax, (%rdi) + ret + + .align 2 +.Lst_float: + movss %xmm0, (%rdi) + ret + .align 2 +.Lst_double: + movsd %xmm0, (%rdi) + ret +.Lst_ldouble: + fstpt (%rdi) + ret + + .align 2 +.Lst_struct: + leaq -20(%rsp), %rsi /* Scratch area in redzone. */ + + /* We have to locate the values now, and since we don't want to + write too much data into the user's return value, we spill the + value to a 16 byte scratch area first. Bits 8, 9, and 10 + control where the values are located. Only one of the three + bits will be set; see ffi_prep_cif_machdep for the pattern. */ + movd %xmm0, %r10 + movd %xmm1, %r11 + testl $0x100, %ecx + cmovnz %rax, %rdx + cmovnz %r10, %rax + testl $0x200, %ecx + cmovnz %r10, %rdx + testl $0x400, %ecx + cmovnz %r10, %rax + cmovnz %r11, %rdx + movq %rax, (%rsi) + movq %rdx, 8(%rsi) + + /* Bits 12-31 contain the true size of the structure. Copy from + the scratch area to the true destination. */ + shrl $12, %ecx + rep movsb + ret + + /* Many times we can avoid loading any SSE registers at all. + It's not worth an indirect jump to load the exact set of + SSE registers needed; zero or all is a good compromise. */ + .align 2 +.LUW3: +.Lload_sse: + movdqa 48(%r10), %xmm0 + movdqa 64(%r10), %xmm1 + movdqa 80(%r10), %xmm2 + movdqa 96(%r10), %xmm3 + movdqa 112(%r10), %xmm4 + movdqa 128(%r10), %xmm5 + movdqa 144(%r10), %xmm6 + movdqa 160(%r10), %xmm7 + jmp .Lret_from_load_sse + +.LUW4: + .size ffi_call_unix64,.-ffi_call_unix64 + + .align 2 + .globl ffi_closure_unix64 + .type ffi_closure_unix64,@function + +ffi_closure_unix64: +.LUW5: + /* The carry flag is set by the trampoline iff SSE registers + are used. Don't clobber it before the branch instruction. */ + leaq -200(%rsp), %rsp +.LUW6: + movq %rdi, (%rsp) + movq %rsi, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rcx, 24(%rsp) + movq %r8, 32(%rsp) + movq %r9, 40(%rsp) + jc .Lsave_sse +.Lret_from_save_sse: + + movq %r10, %rdi + leaq 176(%rsp), %rsi + movq %rsp, %rdx + leaq 208(%rsp), %rcx + call ffi_closure_unix64_inner@PLT + + /* Deallocate stack frame early; return value is now in redzone. */ + addq $200, %rsp +.LUW7: + + /* The first byte of the return value contains the FFI_TYPE. */ + movzbl %al, %r10d + leaq .Lload_table(%rip), %r11 + movslq (%r11, %r10, 4), %r10 + addq %r11, %r10 + jmp *%r10 + +.Lload_table: + .long .Lld_void-.Lload_table /* FFI_TYPE_VOID */ + .long .Lld_int32-.Lload_table /* FFI_TYPE_INT */ + .long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */ + .long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */ + .long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */ + .long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */ + .long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */ + .long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */ + .long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */ + .long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */ + .long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */ + .long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */ + .long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */ + .long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */ + + .align 2 +.Lld_void: + ret + + .align 2 +.Lld_int8: + movzbl -24(%rsp), %eax + ret + .align 2 +.Lld_int16: + movzwl -24(%rsp), %eax + ret + .align 2 +.Lld_int32: + movl -24(%rsp), %eax + ret + .align 2 +.Lld_int64: + movq -24(%rsp), %rax + ret + + .align 2 +.Lld_float: + movss -24(%rsp), %xmm0 + ret + .align 2 +.Lld_double: + movsd -24(%rsp), %xmm0 + ret + .align 2 +.Lld_ldouble: + fldt -24(%rsp) + ret + + .align 2 +.Lld_struct: + /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, + %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading + both rdx and xmm1 with the second word. For the remaining, + bit 8 set means xmm0 gets the second word, and bit 9 means + that rax gets the second word. */ + movq -24(%rsp), %rcx + movq -16(%rsp), %rdx + movq -16(%rsp), %xmm1 + testl $0x100, %eax + cmovnz %rdx, %rcx + movd %rcx, %xmm0 + testl $0x200, %eax + movq -24(%rsp), %rax + cmovnz %rdx, %rax + ret + + /* See the comment above .Lload_sse; the same logic applies here. */ + .align 2 +.LUW8: +.Lsave_sse: + movdqa %xmm0, 48(%rsp) + movdqa %xmm1, 64(%rsp) + movdqa %xmm2, 80(%rsp) + movdqa %xmm3, 96(%rsp) + movdqa %xmm4, 112(%rsp) + movdqa %xmm5, 128(%rsp) + movdqa %xmm6, 144(%rsp) + movdqa %xmm7, 160(%rsp) + jmp .Lret_from_save_sse + +.LUW9: + .size ffi_closure_unix64,.-ffi_closure_unix64 + +#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE + .section .eh_frame,"a",@unwind +#else + .section .eh_frame,"a",@progbits +#endif +.Lframe1: + .long .LECIE1-.LSCIE1 /* CIE Length */ +.LSCIE1: + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .uleb128 1 /* CIE Code Alignment Factor */ + .sleb128 -8 /* CIE Data Alignment Factor */ + .byte 0x10 /* CIE RA Column */ + .uleb128 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ + .uleb128 7 + .uleb128 8 + .byte 0x80+16 /* DW_CFA_offset, %rip offset 1*-8 */ + .uleb128 1 + .align 8 +.LECIE1: +.LSFDE1: + .long .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .long .LASFDE1-.Lframe1 /* FDE CIE offset */ +#if HAVE_AS_X86_PCREL + .long .LUW0-. /* FDE initial location */ +#else + .long .LUW0@rel +#endif + .long .LUW4-.LUW0 /* FDE address range */ + .uleb128 0x0 /* Augmentation size */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW1-.LUW0 + + /* New stack frame based off rbp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-8, so from the + perspective of the unwind info, it hasn't moved. */ + .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ + .uleb128 6 + .uleb128 32 + .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ + .uleb128 2 + .byte 0xa /* DW_CFA_remember_state */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW2-.LUW1 + .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ + .uleb128 7 + .uleb128 8 + .byte 0xc0+6 /* DW_CFA_restore, %rbp */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW3-.LUW2 + .byte 0xb /* DW_CFA_restore_state */ + + .align 8 +.LEFDE1: +.LSFDE3: + .long .LEFDE3-.LASFDE3 /* FDE Length */ +.LASFDE3: + .long .LASFDE3-.Lframe1 /* FDE CIE offset */ +#if HAVE_AS_X86_PCREL + .long .LUW5-. /* FDE initial location */ +#else + .long .LUW5@rel +#endif + .long .LUW9-.LUW5 /* FDE address range */ + .uleb128 0x0 /* Augmentation size */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW6-.LUW5 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 208 + .byte 0xa /* DW_CFA_remember_state */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW7-.LUW6 + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 8 + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LUW8-.LUW7 + .byte 0xb /* DW_CFA_restore_state */ + + .align 8 +.LEFDE3: + +#endif /* __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/libffi/src/x86/win32.S b/libffi/src/x86/win32.S new file mode 100644 index 000000000..34ec0fd82 --- /dev/null +++ b/libffi/src/x86/win32.S @@ -0,0 +1,1065 @@ +/* ----------------------------------------------------------------------- + win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc. + Copyright (c) 2001 John Beniton + Copyright (c) 2002 Ranjit Mathew + Copyright (c) 2009 Daniel Witte + + + X86 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- + */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#ifdef _MSC_VER + +.386 +.MODEL FLAT, C + +EXTRN ffi_closure_SYSV_inner:NEAR + +_TEXT SEGMENT + +ffi_call_win32 PROC NEAR, + ffi_prep_args : NEAR PTR DWORD, + ecif : NEAR PTR DWORD, + cif_bytes : DWORD, + cif_flags : DWORD, + rvalue : NEAR PTR DWORD, + fn : NEAR PTR DWORD + + ;; Make room for all of the new args. + mov ecx, cif_bytes + sub esp, ecx + + mov eax, esp + + ;; Place all of the ffi_prep_args in position + push ecif + push eax + call ffi_prep_args + + ;; Return stack to previous state and call the function + add esp, 8 + + call fn + + ;; cdecl: we restore esp in the epilogue, so there's no need to + ;; remove the space we pushed for the args. + ;; stdcall: the callee has already cleaned the stack. + + ;; Load ecx with the return type code + mov ecx, cif_flags + + ;; If the return value pointer is NULL, assume no return value. + cmp rvalue, 0 + jne ca_jumptable + + ;; Even if there is no space for the return value, we are + ;; obliged to handle floating-point values. + cmp ecx, FFI_TYPE_FLOAT + jne ca_epilogue + fstp st(0) + + jmp ca_epilogue + +ca_jumptable: + jmp [ca_jumpdata + 4 * ecx] +ca_jumpdata: + ;; Do not insert anything here between label and jump table. + dd offset ca_epilogue ;; FFI_TYPE_VOID + dd offset ca_retint ;; FFI_TYPE_INT + dd offset ca_retfloat ;; FFI_TYPE_FLOAT + dd offset ca_retdouble ;; FFI_TYPE_DOUBLE + dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset ca_retint8 ;; FFI_TYPE_UINT8 + dd offset ca_retint8 ;; FFI_TYPE_SINT8 + dd offset ca_retint16 ;; FFI_TYPE_UINT16 + dd offset ca_retint16 ;; FFI_TYPE_SINT16 + dd offset ca_retint ;; FFI_TYPE_UINT32 + dd offset ca_retint ;; FFI_TYPE_SINT32 + dd offset ca_retint64 ;; FFI_TYPE_UINT64 + dd offset ca_retint64 ;; FFI_TYPE_SINT64 + dd offset ca_epilogue ;; FFI_TYPE_STRUCT + dd offset ca_retint ;; FFI_TYPE_POINTER + dd offset ca_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset ca_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +ca_retint8: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], al + jmp ca_epilogue + +ca_retint16: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], ax + jmp ca_epilogue + +ca_retint: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], eax + jmp ca_epilogue + +ca_retint64: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], eax + mov [ecx + 4], edx + jmp ca_epilogue + +ca_retfloat: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + fstp DWORD PTR [ecx] + jmp ca_epilogue + +ca_retdouble: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + fstp QWORD PTR [ecx] + jmp ca_epilogue + +ca_retlongdouble: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + fstp TBYTE PTR [ecx] + jmp ca_epilogue + +ca_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_call_win32 ENDP + +ffi_closure_SYSV PROC NEAR FORCEFRAME + ;; the ffi_closure ctx is passed in eax by the trampoline. + + sub esp, 40 + lea edx, [ebp - 24] + mov [ebp - 12], edx ;; resp + lea edx, [ebp + 8] + mov [esp + 8], edx ;; args + lea edx, [ebp - 12] + mov [esp + 4], edx ;; &resp + mov [esp], eax ;; closure + call ffi_closure_SYSV_inner + mov ecx, [ebp - 12] + +cs_jumptable: + jmp [cs_jumpdata + 4 * eax] +cs_jumpdata: + ;; Do not insert anything here between the label and jump table. + dd offset cs_epilogue ;; FFI_TYPE_VOID + dd offset cs_retint ;; FFI_TYPE_INT + dd offset cs_retfloat ;; FFI_TYPE_FLOAT + dd offset cs_retdouble ;; FFI_TYPE_DOUBLE + dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset cs_retint8 ;; FFI_TYPE_UINT8 + dd offset cs_retint8 ;; FFI_TYPE_SINT8 + dd offset cs_retint16 ;; FFI_TYPE_UINT16 + dd offset cs_retint16 ;; FFI_TYPE_SINT16 + dd offset cs_retint ;; FFI_TYPE_UINT32 + dd offset cs_retint ;; FFI_TYPE_SINT32 + dd offset cs_retint64 ;; FFI_TYPE_UINT64 + dd offset cs_retint64 ;; FFI_TYPE_SINT64 + dd offset cs_retstruct ;; FFI_TYPE_STRUCT + dd offset cs_retint ;; FFI_TYPE_POINTER + dd offset cs_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cs_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +cs_retint8: + mov al, [ecx] + jmp cs_epilogue + +cs_retint16: + mov ax, [ecx] + jmp cs_epilogue + +cs_retint: + mov eax, [ecx] + jmp cs_epilogue + +cs_retint64: + mov eax, [ecx + 0] + mov edx, [ecx + 4] + jmp cs_epilogue + +cs_retfloat: + fld DWORD PTR [ecx] + jmp cs_epilogue + +cs_retdouble: + fld QWORD PTR [ecx] + jmp cs_epilogue + +cs_retlongdouble: + fld TBYTE PTR [ecx] + jmp cs_epilogue + +cs_retstruct: + ;; Caller expects us to pop struct return value pointer hidden arg. + ;; Epilogue code is autogenerated. + ret 4 + +cs_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_closure_SYSV ENDP + +#if !FFI_NO_RAW_API + +#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3) +#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) +#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) +#define CIF_FLAGS_OFFSET 20 + +ffi_closure_raw_SYSV PROC NEAR USES esi + ;; the ffi_closure ctx is passed in eax by the trampoline. + + sub esp, 40 + mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif + mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data + mov [esp + 12], edx ;; user_data + lea edx, [ebp + 8] + mov [esp + 8], edx ;; raw_args + lea edx, [ebp - 24] + mov [esp + 4], edx ;; &res + mov [esp], esi ;; cif + call DWORD PTR [eax + RAW_CLOSURE_FUN_OFFSET] ;; closure->fun + mov eax, [esi + CIF_FLAGS_OFFSET] ;; cif->flags + lea ecx, [ebp - 24] + +cr_jumptable: + jmp [cr_jumpdata + 4 * eax] +cr_jumpdata: + ;; Do not insert anything here between the label and jump table. + dd offset cr_epilogue ;; FFI_TYPE_VOID + dd offset cr_retint ;; FFI_TYPE_INT + dd offset cr_retfloat ;; FFI_TYPE_FLOAT + dd offset cr_retdouble ;; FFI_TYPE_DOUBLE + dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset cr_retint8 ;; FFI_TYPE_UINT8 + dd offset cr_retint8 ;; FFI_TYPE_SINT8 + dd offset cr_retint16 ;; FFI_TYPE_UINT16 + dd offset cr_retint16 ;; FFI_TYPE_SINT16 + dd offset cr_retint ;; FFI_TYPE_UINT32 + dd offset cr_retint ;; FFI_TYPE_SINT32 + dd offset cr_retint64 ;; FFI_TYPE_UINT64 + dd offset cr_retint64 ;; FFI_TYPE_SINT64 + dd offset cr_epilogue ;; FFI_TYPE_STRUCT + dd offset cr_retint ;; FFI_TYPE_POINTER + dd offset cr_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cr_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +cr_retint8: + mov al, [ecx] + jmp cr_epilogue + +cr_retint16: + mov ax, [ecx] + jmp cr_epilogue + +cr_retint: + mov eax, [ecx] + jmp cr_epilogue + +cr_retint64: + mov eax, [ecx + 0] + mov edx, [ecx + 4] + jmp cr_epilogue + +cr_retfloat: + fld DWORD PTR [ecx] + jmp cr_epilogue + +cr_retdouble: + fld QWORD PTR [ecx] + jmp cr_epilogue + +cr_retlongdouble: + fld TBYTE PTR [ecx] + jmp cr_epilogue + +cr_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_closure_raw_SYSV ENDP + +#endif /* !FFI_NO_RAW_API */ + +ffi_closure_STDCALL PROC NEAR FORCEFRAME + ;; the ffi_closure ctx is passed in eax by the trampoline. + + sub esp, 40 + lea edx, [ebp - 24] + mov [ebp - 12], edx ;; resp + lea edx, [ebp + 12] ;; account for stub return address on stack + mov [esp + 8], edx ;; args + lea edx, [ebp - 12] + mov [esp + 4], edx ;; &resp + mov [esp], eax ;; closure + call ffi_closure_SYSV_inner + mov ecx, [ebp - 12] + +cd_jumptable: + jmp [cd_jumpdata + 4 * eax] +cd_jumpdata: + ;; Do not insert anything here between the label and jump table. + dd offset cd_epilogue ;; FFI_TYPE_VOID + dd offset cd_retint ;; FFI_TYPE_INT + dd offset cd_retfloat ;; FFI_TYPE_FLOAT + dd offset cd_retdouble ;; FFI_TYPE_DOUBLE + dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE + dd offset cd_retint8 ;; FFI_TYPE_UINT8 + dd offset cd_retint8 ;; FFI_TYPE_SINT8 + dd offset cd_retint16 ;; FFI_TYPE_UINT16 + dd offset cd_retint16 ;; FFI_TYPE_SINT16 + dd offset cd_retint ;; FFI_TYPE_UINT32 + dd offset cd_retint ;; FFI_TYPE_SINT32 + dd offset cd_retint64 ;; FFI_TYPE_UINT64 + dd offset cd_retint64 ;; FFI_TYPE_SINT64 + dd offset cd_epilogue ;; FFI_TYPE_STRUCT + dd offset cd_retint ;; FFI_TYPE_POINTER + dd offset cd_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cd_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B + +cd_retint8: + mov al, [ecx] + jmp cd_epilogue + +cd_retint16: + mov ax, [ecx] + jmp cd_epilogue + +cd_retint: + mov eax, [ecx] + jmp cd_epilogue + +cd_retint64: + mov eax, [ecx + 0] + mov edx, [ecx + 4] + jmp cd_epilogue + +cd_retfloat: + fld DWORD PTR [ecx] + jmp cd_epilogue + +cd_retdouble: + fld QWORD PTR [ecx] + jmp cd_epilogue + +cd_retlongdouble: + fld TBYTE PTR [ecx] + jmp cd_epilogue + +cd_epilogue: + ;; Epilogue code is autogenerated. + ret +ffi_closure_STDCALL ENDP + +_TEXT ENDS +END + +#else + + .text + + # This assumes we are using gas. + .balign 16 + .globl _ffi_call_win32 +#ifndef __OS2__ + .def _ffi_call_win32; .scl 2; .type 32; .endef +#endif +_ffi_call_win32: +.LFB1: + pushl %ebp +.LCFI0: + movl %esp,%ebp +.LCFI1: + # Make room for all of the new args. + movl 16(%ebp),%ecx + subl %ecx,%esp + + movl %esp,%eax + + # Place all of the ffi_prep_args in position + pushl 12(%ebp) + pushl %eax + call *8(%ebp) + + # Return stack to previous state and call the function + addl $8,%esp + + # FIXME: Align the stack to a 128-bit boundary to avoid + # potential performance hits. + + call *28(%ebp) + + # stdcall functions pop arguments off the stack themselves + + # Load %ecx with the return type code + movl 20(%ebp),%ecx + + # If the return value pointer is NULL, assume no return value. + cmpl $0,24(%ebp) + jne 0f + + # Even if there is no space for the return value, we are + # obliged to handle floating-point values. + cmpl $FFI_TYPE_FLOAT,%ecx + jne .Lnoretval + fstp %st(0) + + jmp .Lepilogue + +0: + call 1f + # Do not insert anything here between the call and the jump table. +.Lstore_table: + .long .Lnoretval /* FFI_TYPE_VOID */ + .long .Lretint /* FFI_TYPE_INT */ + .long .Lretfloat /* FFI_TYPE_FLOAT */ + .long .Lretdouble /* FFI_TYPE_DOUBLE */ + .long .Lretlongdouble /* FFI_TYPE_LONGDOUBLE */ + .long .Lretuint8 /* FFI_TYPE_UINT8 */ + .long .Lretsint8 /* FFI_TYPE_SINT8 */ + .long .Lretuint16 /* FFI_TYPE_UINT16 */ + .long .Lretsint16 /* FFI_TYPE_SINT16 */ + .long .Lretint /* FFI_TYPE_UINT32 */ + .long .Lretint /* FFI_TYPE_SINT32 */ + .long .Lretint64 /* FFI_TYPE_UINT64 */ + .long .Lretint64 /* FFI_TYPE_SINT64 */ + .long .Lretstruct /* FFI_TYPE_STRUCT */ + .long .Lretint /* FFI_TYPE_POINTER */ + .long .Lretstruct1b /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lretstruct2b /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lretstruct4b /* FFI_TYPE_SMALL_STRUCT_4B */ +1: + add %ecx, %ecx + add %ecx, %ecx + add (%esp),%ecx + add $4, %esp + jmp *(%ecx) + + /* Sign/zero extend as appropriate. */ +.Lretsint8: + movsbl %al, %eax + jmp .Lretint + +.Lretsint16: + movswl %ax, %eax + jmp .Lretint + +.Lretuint8: + movzbl %al, %eax + jmp .Lretint + +.Lretuint16: + movzwl %ax, %eax + jmp .Lretint + +.Lretint: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + jmp .Lepilogue + +.Lretfloat: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + fstps (%ecx) + jmp .Lepilogue + +.Lretdouble: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + fstpl (%ecx) + jmp .Lepilogue + +.Lretlongdouble: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + fstpt (%ecx) + jmp .Lepilogue + +.Lretint64: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + movl %edx,4(%ecx) + jmp .Lepilogue + +.Lretstruct1b: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + movb %al,0(%ecx) + jmp .Lepilogue + +.Lretstruct2b: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + movw %ax,0(%ecx) + jmp .Lepilogue + +.Lretstruct4b: + # Load %ecx with the pointer to storage for the return value + movl 24(%ebp),%ecx + movl %eax,0(%ecx) + jmp .Lepilogue + +.Lretstruct: + # Nothing to do! + +.Lnoretval: +.Lepilogue: + movl %ebp,%esp + popl %ebp + ret +.ffi_call_win32_end: +.LFE1: + + # This assumes we are using gas. + .balign 16 + .globl _ffi_closure_SYSV +#ifndef __OS2__ + .def _ffi_closure_SYSV; .scl 2; .type 32; .endef +#endif +_ffi_closure_SYSV: +.LFB3: + pushl %ebp +.LCFI4: + movl %esp, %ebp +.LCFI5: + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 8(%ebp), %edx + movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ + call _ffi_closure_SYSV_inner + movl -12(%ebp), %ecx + +0: + call 1f + # Do not insert anything here between the call and the jump table. +.Lcls_store_table: + .long .Lcls_noretval /* FFI_TYPE_VOID */ + .long .Lcls_retint /* FFI_TYPE_INT */ + .long .Lcls_retfloat /* FFI_TYPE_FLOAT */ + .long .Lcls_retdouble /* FFI_TYPE_DOUBLE */ + .long .Lcls_retldouble /* FFI_TYPE_LONGDOUBLE */ + .long .Lcls_retuint8 /* FFI_TYPE_UINT8 */ + .long .Lcls_retsint8 /* FFI_TYPE_SINT8 */ + .long .Lcls_retuint16 /* FFI_TYPE_UINT16 */ + .long .Lcls_retsint16 /* FFI_TYPE_SINT16 */ + .long .Lcls_retint /* FFI_TYPE_UINT32 */ + .long .Lcls_retint /* FFI_TYPE_SINT32 */ + .long .Lcls_retllong /* FFI_TYPE_UINT64 */ + .long .Lcls_retllong /* FFI_TYPE_SINT64 */ + .long .Lcls_retstruct /* FFI_TYPE_STRUCT */ + .long .Lcls_retint /* FFI_TYPE_POINTER */ + .long .Lcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ + +1: + add %eax, %eax + add %eax, %eax + add (%esp),%eax + add $4, %esp + jmp *(%eax) + + /* Sign/zero extend as appropriate. */ +.Lcls_retsint8: + movsbl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retsint16: + movswl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retuint8: + movzbl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retuint16: + movzwl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retint: + movl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retfloat: + flds (%ecx) + jmp .Lcls_epilogue + +.Lcls_retdouble: + fldl (%ecx) + jmp .Lcls_epilogue + +.Lcls_retldouble: + fldt (%ecx) + jmp .Lcls_epilogue + +.Lcls_retllong: + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lcls_epilogue + +.Lcls_retstruct1: + movsbl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retstruct2: + movswl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retstruct4: + movl (%ecx), %eax + jmp .Lcls_epilogue + +.Lcls_retstruct: + # Caller expects us to pop struct return value pointer hidden arg. + movl %ebp, %esp + popl %ebp + ret $0x4 + +.Lcls_noretval: +.Lcls_epilogue: + movl %ebp, %esp + popl %ebp + ret +.ffi_closure_SYSV_end: +.LFE3: + +#if !FFI_NO_RAW_API + +#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) +#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) +#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) +#define CIF_FLAGS_OFFSET 20 + + # This assumes we are using gas. + .balign 16 + .globl _ffi_closure_raw_SYSV +#ifndef __OS2__ + .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef +#endif +_ffi_closure_raw_SYSV: +.LFB4: + pushl %ebp +.LCFI6: + movl %esp, %ebp +.LCFI7: + pushl %esi +.LCFI8: + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ + movl %edx, 8(%esp) /* raw_args */ + leal -24(%ebp), %edx + movl %edx, 4(%esp) /* &res */ + movl %esi, (%esp) /* cif */ + call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ + movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ +0: + call 1f + # Do not insert anything here between the call and the jump table. +.Lrcls_store_table: + .long .Lrcls_noretval /* FFI_TYPE_VOID */ + .long .Lrcls_retint /* FFI_TYPE_INT */ + .long .Lrcls_retfloat /* FFI_TYPE_FLOAT */ + .long .Lrcls_retdouble /* FFI_TYPE_DOUBLE */ + .long .Lrcls_retldouble /* FFI_TYPE_LONGDOUBLE */ + .long .Lrcls_retuint8 /* FFI_TYPE_UINT8 */ + .long .Lrcls_retsint8 /* FFI_TYPE_SINT8 */ + .long .Lrcls_retuint16 /* FFI_TYPE_UINT16 */ + .long .Lrcls_retsint16 /* FFI_TYPE_SINT16 */ + .long .Lrcls_retint /* FFI_TYPE_UINT32 */ + .long .Lrcls_retint /* FFI_TYPE_SINT32 */ + .long .Lrcls_retllong /* FFI_TYPE_UINT64 */ + .long .Lrcls_retllong /* FFI_TYPE_SINT64 */ + .long .Lrcls_retstruct /* FFI_TYPE_STRUCT */ + .long .Lrcls_retint /* FFI_TYPE_POINTER */ + .long .Lrcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lrcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lrcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ +1: + add %eax, %eax + add %eax, %eax + add (%esp),%eax + add $4, %esp + jmp *(%eax) + + /* Sign/zero extend as appropriate. */ +.Lrcls_retsint8: + movsbl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retsint16: + movswl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retuint8: + movzbl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retuint16: + movzwl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retint: + movl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retfloat: + flds -24(%ebp) + jmp .Lrcls_epilogue + +.Lrcls_retdouble: + fldl -24(%ebp) + jmp .Lrcls_epilogue + +.Lrcls_retldouble: + fldt -24(%ebp) + jmp .Lrcls_epilogue + +.Lrcls_retllong: + movl -24(%ebp), %eax + movl -20(%ebp), %edx + jmp .Lrcls_epilogue + +.Lrcls_retstruct1: + movsbl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retstruct2: + movswl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retstruct4: + movl -24(%ebp), %eax + jmp .Lrcls_epilogue + +.Lrcls_retstruct: + # Nothing to do! + +.Lrcls_noretval: +.Lrcls_epilogue: + addl $36, %esp + popl %esi + popl %ebp + ret +.ffi_closure_raw_SYSV_end: +.LFE4: + +#endif /* !FFI_NO_RAW_API */ + + # This assumes we are using gas. + .balign 16 + .globl _ffi_closure_STDCALL +#ifndef __OS2__ + .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef +#endif +_ffi_closure_STDCALL: +.LFB5: + pushl %ebp +.LCFI9: + movl %esp, %ebp +.LCFI10: + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 12(%ebp), %edx /* account for stub return address on stack */ + movl %edx, 4(%esp) /* args */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ + call _ffi_closure_SYSV_inner + movl -12(%ebp), %ecx +0: + call 1f + # Do not insert anything here between the call and the jump table. +.Lscls_store_table: + .long .Lscls_noretval /* FFI_TYPE_VOID */ + .long .Lscls_retint /* FFI_TYPE_INT */ + .long .Lscls_retfloat /* FFI_TYPE_FLOAT */ + .long .Lscls_retdouble /* FFI_TYPE_DOUBLE */ + .long .Lscls_retldouble /* FFI_TYPE_LONGDOUBLE */ + .long .Lscls_retuint8 /* FFI_TYPE_UINT8 */ + .long .Lscls_retsint8 /* FFI_TYPE_SINT8 */ + .long .Lscls_retuint16 /* FFI_TYPE_UINT16 */ + .long .Lscls_retsint16 /* FFI_TYPE_SINT16 */ + .long .Lscls_retint /* FFI_TYPE_UINT32 */ + .long .Lscls_retint /* FFI_TYPE_SINT32 */ + .long .Lscls_retllong /* FFI_TYPE_UINT64 */ + .long .Lscls_retllong /* FFI_TYPE_SINT64 */ + .long .Lscls_retstruct /* FFI_TYPE_STRUCT */ + .long .Lscls_retint /* FFI_TYPE_POINTER */ + .long .Lscls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lscls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lscls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ +1: + add %eax, %eax + add %eax, %eax + add (%esp),%eax + add $4, %esp + jmp *(%eax) + + /* Sign/zero extend as appropriate. */ +.Lscls_retsint8: + movsbl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retsint16: + movswl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retuint8: + movzbl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retuint16: + movzwl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retint: + movl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retfloat: + flds (%ecx) + jmp .Lscls_epilogue + +.Lscls_retdouble: + fldl (%ecx) + jmp .Lscls_epilogue + +.Lscls_retldouble: + fldt (%ecx) + jmp .Lscls_epilogue + +.Lscls_retllong: + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lscls_epilogue + +.Lscls_retstruct1: + movsbl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retstruct2: + movswl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retstruct4: + movl (%ecx), %eax + jmp .Lscls_epilogue + +.Lscls_retstruct: + # Nothing to do! + +.Lscls_noretval: +.Lscls_epilogue: + movl %ebp, %esp + popl %ebp + ret +.ffi_closure_STDCALL_end: +.LFE5: + +#ifndef __OS2__ + .section .eh_frame,"w" +#endif +.Lframe1: +.LSCIE1: + .long .LECIE1-.LASCIE1 /* Length of Common Information Entry */ +.LASCIE1: + .long 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ +#ifdef __PIC__ + .ascii "zR\0" /* CIE Augmentation */ +#else + .ascii "\0" /* CIE Augmentation */ +#endif + .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ + .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ +#ifdef __PIC__ + .byte 0x1 /* .uleb128 0x1; Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ +#endif + .byte 0xc /* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x88 /* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */ + .byte 0x1 /* .uleb128 0x1 */ + .align 4 +.LECIE1: + +.LSFDE1: + .long .LEFDE1-.LASFDE1 /* FDE Length */ +.LASFDE1: + .long .LASFDE1-.Lframe1 /* FDE CIE offset */ +#if defined __PIC__ && defined HAVE_AS_X86_PCREL + .long .LFB1-. /* FDE initial location */ +#else + .long .LFB1 +#endif + .long .LFE1-.LFB1 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + /* DW_CFA_xxx CFI instructions go here. */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI0-.LFB1 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI1-.LCFI0 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ + + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 +.LEFDE1: + + +.LSFDE3: + .long .LEFDE3-.LASFDE3 /* FDE Length */ +.LASFDE3: + .long .LASFDE3-.Lframe1 /* FDE CIE offset */ +#if defined __PIC__ && defined HAVE_AS_X86_PCREL + .long .LFB3-. /* FDE initial location */ +#else + .long .LFB3 +#endif + .long .LFE3-.LFB3 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + /* DW_CFA_xxx CFI instructions go here. */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI4-.LFB3 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI5-.LCFI4 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ + + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 +.LEFDE3: + +#if !FFI_NO_RAW_API + +.LSFDE4: + .long .LEFDE4-.LASFDE4 /* FDE Length */ +.LASFDE4: + .long .LASFDE4-.Lframe1 /* FDE CIE offset */ +#if defined __PIC__ && defined HAVE_AS_X86_PCREL + .long .LFB4-. /* FDE initial location */ +#else + .long .LFB4 +#endif + .long .LFE4-.LFB4 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + /* DW_CFA_xxx CFI instructions go here. */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI6-.LFB4 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI7-.LCFI6 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI8-.LCFI7 + .byte 0x86 /* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */ + .byte 0x3 /* .uleb128 0x3 */ + + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 +.LEFDE4: + +#endif /* !FFI_NO_RAW_API */ + +.LSFDE5: + .long .LEFDE5-.LASFDE5 /* FDE Length */ +.LASFDE5: + .long .LASFDE5-.Lframe1 /* FDE CIE offset */ +#if defined __PIC__ && defined HAVE_AS_X86_PCREL + .long .LFB5-. /* FDE initial location */ +#else + .long .LFB5 +#endif + .long .LFE5-.LFB5 /* FDE address range */ +#ifdef __PIC__ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ +#endif + /* DW_CFA_xxx CFI instructions go here. */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI9-.LFB5 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ + + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI10-.LCFI9 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ + + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 +.LEFDE5: + +#endif /* !_MSC_VER */ + diff --git a/libffi/src/x86/win64.S b/libffi/src/x86/win64.S new file mode 100644 index 000000000..6e9181867 --- /dev/null +++ b/libffi/src/x86/win64.S @@ -0,0 +1,460 @@ +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +/* Constants for ffi_call_win64 */ +#define STACK 0 +#define PREP_ARGS_FN 32 +#define ECIF 40 +#define CIF_BYTES 48 +#define CIF_FLAGS 56 +#define RVALUE 64 +#define FN 72 + +/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *), + extended_cif *ecif, unsigned bytes, unsigned flags, + unsigned *rvalue, void (*fn)()); + */ + +#ifdef _MSC_VER +PUBLIC ffi_call_win64 + +EXTRN __chkstk:NEAR +EXTRN ffi_closure_win64_inner:NEAR + +_TEXT SEGMENT + +;;; ffi_closure_win64 will be called with these registers set: +;;; rax points to 'closure' +;;; r11 contains a bit mask that specifies which of the +;;; first four parameters are float or double +;;; +;;; It must move the parameters passed in registers to their stack location, +;;; call ffi_closure_win64_inner for the actual work, then return the result. +;;; +ffi_closure_win64 PROC FRAME + ;; copy register arguments onto stack + test r11, 1 + jne first_is_float + mov QWORD PTR [rsp+8], rcx + jmp second +first_is_float: + movlpd QWORD PTR [rsp+8], xmm0 + +second: + test r11, 2 + jne second_is_float + mov QWORD PTR [rsp+16], rdx + jmp third +second_is_float: + movlpd QWORD PTR [rsp+16], xmm1 + +third: + test r11, 4 + jne third_is_float + mov QWORD PTR [rsp+24], r8 + jmp fourth +third_is_float: + movlpd QWORD PTR [rsp+24], xmm2 + +fourth: + test r11, 8 + jne fourth_is_float + mov QWORD PTR [rsp+32], r9 + jmp done +fourth_is_float: + movlpd QWORD PTR [rsp+32], xmm3 + +done: + .ALLOCSTACK 40 + sub rsp, 40 + .ENDPROLOG + mov rcx, rax ; context is first parameter + mov rdx, rsp ; stack is second parameter + add rdx, 48 ; point to start of arguments + mov rax, ffi_closure_win64_inner + call rax ; call the real closure function + add rsp, 40 + movd xmm0, rax ; If the closure returned a float, + ; ffi_closure_win64_inner wrote it to rax + ret 0 +ffi_closure_win64 ENDP + +ffi_call_win64 PROC FRAME + ;; copy registers onto stack + mov QWORD PTR [rsp+32], r9 + mov QWORD PTR [rsp+24], r8 + mov QWORD PTR [rsp+16], rdx + mov QWORD PTR [rsp+8], rcx + .PUSHREG rbp + push rbp + .ALLOCSTACK 48 + sub rsp, 48 ; 00000030H + .SETFRAME rbp, 32 + lea rbp, QWORD PTR [rsp+32] + .ENDPROLOG + + mov eax, DWORD PTR CIF_BYTES[rbp] + add rax, 15 + and rax, -16 + call __chkstk + sub rsp, rax + lea rax, QWORD PTR [rsp+32] + mov QWORD PTR STACK[rbp], rax + + mov rdx, QWORD PTR ECIF[rbp] + mov rcx, QWORD PTR STACK[rbp] + call QWORD PTR PREP_ARGS_FN[rbp] + + mov rsp, QWORD PTR STACK[rbp] + + movlpd xmm3, QWORD PTR [rsp+24] + movd r9, xmm3 + + movlpd xmm2, QWORD PTR [rsp+16] + movd r8, xmm2 + + movlpd xmm1, QWORD PTR [rsp+8] + movd rdx, xmm1 + + movlpd xmm0, QWORD PTR [rsp] + movd rcx, xmm0 + + call QWORD PTR FN[rbp] +ret_struct4b$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B + jne ret_struct2b$ + + mov rcx, QWORD PTR RVALUE[rbp] + mov DWORD PTR [rcx], eax + jmp ret_void$ + +ret_struct2b$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B + jne ret_struct1b$ + + mov rcx, QWORD PTR RVALUE[rbp] + mov WORD PTR [rcx], ax + jmp ret_void$ + +ret_struct1b$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B + jne ret_uint8$ + + mov rcx, QWORD PTR RVALUE[rbp] + mov BYTE PTR [rcx], al + jmp ret_void$ + +ret_uint8$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8 + jne ret_sint8$ + + mov rcx, QWORD PTR RVALUE[rbp] + movzx rax, al + mov QWORD PTR [rcx], rax + jmp ret_void$ + +ret_sint8$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8 + jne ret_uint16$ + + mov rcx, QWORD PTR RVALUE[rbp] + movsx rax, al + mov QWORD PTR [rcx], rax + jmp ret_void$ + +ret_uint16$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16 + jne ret_sint16$ + + mov rcx, QWORD PTR RVALUE[rbp] + movzx rax, ax + mov QWORD PTR [rcx], rax + jmp SHORT ret_void$ + +ret_sint16$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16 + jne ret_uint32$ + + mov rcx, QWORD PTR RVALUE[rbp] + movsx rax, ax + mov QWORD PTR [rcx], rax + jmp SHORT ret_void$ + +ret_uint32$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32 + jne ret_sint32$ + + mov rcx, QWORD PTR RVALUE[rbp] + mov eax, eax + mov QWORD PTR [rcx], rax + jmp SHORT ret_void$ + +ret_sint32$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32 + jne ret_float$ + + mov rcx, QWORD PTR RVALUE[rbp] + cdqe + mov QWORD PTR [rcx], rax + jmp SHORT ret_void$ + +ret_float$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT + jne SHORT ret_double$ + + mov rax, QWORD PTR RVALUE[rbp] + movss DWORD PTR [rax], xmm0 + jmp SHORT ret_void$ + +ret_double$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE + jne SHORT ret_sint64$ + + mov rax, QWORD PTR RVALUE[rbp] + movlpd QWORD PTR [rax], xmm0 + jmp SHORT ret_void$ + +ret_sint64$: + cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64 + jne ret_void$ + + mov rcx, QWORD PTR RVALUE[rbp] + mov QWORD PTR [rcx], rax + jmp SHORT ret_void$ + +ret_void$: + xor rax, rax + + lea rsp, QWORD PTR [rbp+16] + pop rbp + ret 0 +ffi_call_win64 ENDP +_TEXT ENDS +END +#else +.text + +.extern _ffi_closure_win64_inner + +# ffi_closure_win64 will be called with these registers set: +# rax points to 'closure' +# r11 contains a bit mask that specifies which of the +# first four parameters are float or double +# +# It must move the parameters passed in registers to their stack location, +# call ffi_closure_win64_inner for the actual work, then return the result. +# + .balign 16 + .globl _ffi_closure_win64 +_ffi_closure_win64: + # copy register arguments onto stack + test $1,%r11 + jne .Lfirst_is_float + mov %rcx, 8(%rsp) + jmp .Lsecond +.Lfirst_is_float: + movlpd %xmm0, 8(%rsp) + +.Lsecond: + test $2, %r11 + jne .Lsecond_is_float + mov %rdx, 16(%rsp) + jmp .Lthird +.Lsecond_is_float: + movlpd %xmm1, 16(%rsp) + +.Lthird: + test $4, %r11 + jne .Lthird_is_float + mov %r8,24(%rsp) + jmp .Lfourth +.Lthird_is_float: + movlpd %xmm2, 24(%rsp) + +.Lfourth: + test $8, %r11 + jne .Lfourth_is_float + mov %r9, 32(%rsp) + jmp .Ldone +.Lfourth_is_float: + movlpd %xmm3, 32(%rsp) + +.Ldone: +#.ALLOCSTACK 40 + sub $40, %rsp +#.ENDPROLOG + mov %rax, %rcx # context is first parameter + mov %rsp, %rdx # stack is second parameter + add $48, %rdx # point to start of arguments + mov $_ffi_closure_win64_inner, %rax + callq *%rax # call the real closure function + add $40, %rsp + movq %rax, %xmm0 # If the closure returned a float, + # ffi_closure_win64_inner wrote it to rax + retq +.ffi_closure_win64_end: + + .balign 16 + .globl _ffi_call_win64 +_ffi_call_win64: + # copy registers onto stack + mov %r9,32(%rsp) + mov %r8,24(%rsp) + mov %rdx,16(%rsp) + mov %rcx,8(%rsp) + #.PUSHREG rbp + push %rbp + #.ALLOCSTACK 48 + sub $48,%rsp + #.SETFRAME rbp, 32 + lea 32(%rsp),%rbp + #.ENDPROLOG + + mov CIF_BYTES(%rbp),%eax + add $15, %rax + and $-16, %rax + cmpq $0x1000, %rax + jb Lch_done +Lch_probe: + subq $0x1000,%rsp + orl $0x0, (%rsp) + subq $0x1000,%rax + cmpq $0x1000,%rax + ja Lch_probe +Lch_done: + subq %rax, %rsp + orl $0x0, (%rsp) + lea 32(%rsp), %rax + mov %rax, STACK(%rbp) + + mov ECIF(%rbp), %rdx + mov STACK(%rbp), %rcx + callq *PREP_ARGS_FN(%rbp) + + mov STACK(%rbp), %rsp + + movlpd 24(%rsp), %xmm3 + movd %xmm3, %r9 + + movlpd 16(%rsp), %xmm2 + movd %xmm2, %r8 + + movlpd 8(%rsp), %xmm1 + movd %xmm1, %rdx + + movlpd (%rsp), %xmm0 + movd %xmm0, %rcx + + callq *FN(%rbp) +.Lret_struct4b: + cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp) + jne .Lret_struct2b + + mov RVALUE(%rbp), %rcx + mov %eax, (%rcx) + jmp .Lret_void + +.Lret_struct2b: + cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp) + jne .Lret_struct1b + + mov RVALUE(%rbp), %rcx + mov %ax, (%rcx) + jmp .Lret_void + +.Lret_struct1b: + cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp) + jne .Lret_uint8 + + mov RVALUE(%rbp), %rcx + mov %al, (%rcx) + jmp .Lret_void + +.Lret_uint8: + cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp) + jne .Lret_sint8 + + mov RVALUE(%rbp), %rcx + movzbq %al, %rax + movq %rax, (%rcx) + jmp .Lret_void + +.Lret_sint8: + cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp) + jne .Lret_uint16 + + mov RVALUE(%rbp), %rcx + movsbq %al, %rax + movq %rax, (%rcx) + jmp .Lret_void + +.Lret_uint16: + cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp) + jne .Lret_sint16 + + mov RVALUE(%rbp), %rcx + movzwq %ax, %rax + movq %rax, (%rcx) + jmp .Lret_void + +.Lret_sint16: + cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp) + jne .Lret_uint32 + + mov RVALUE(%rbp), %rcx + movswq %ax, %rax + movq %rax, (%rcx) + jmp .Lret_void + +.Lret_uint32: + cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp) + jne .Lret_sint32 + + mov RVALUE(%rbp), %rcx + movl %eax, %eax + movq %rax, (%rcx) + jmp .Lret_void + +.Lret_sint32: + cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp) + jne .Lret_float + + mov RVALUE(%rbp), %rcx + cltq + movq %rax, (%rcx) + jmp .Lret_void + +.Lret_float: + cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp) + jne .Lret_double + + mov RVALUE(%rbp), %rax + movss %xmm0, (%rax) + jmp .Lret_void + +.Lret_double: + cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp) + jne .Lret_sint64 + + mov RVALUE(%rbp), %rax + movlpd %xmm0, (%rax) + jmp .Lret_void + +.Lret_sint64: + cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp) + jne .Lret_void + + mov RVALUE(%rbp), %rcx + mov %rax, (%rcx) + jmp .Lret_void + +.Lret_void: + xor %rax, %rax + + lea 16(%rbp), %rsp + pop %rbp + retq +.ffi_call_win64_end: +#endif /* !_MSC_VER */ + |