From: Debian Haskell Group Date: Wed, 1 Apr 2015 11:35:10 +0000 (+0000) Subject: arm64 X-Git-Tag: archive/raspbian/8.0.2-9+rpi1~1^2^2^2^2^2~2 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=43213c27bea5327ed3c364cabb5d2fce2f0df595;p=ghc.git arm64 Gbp-Pq: Name arm64.patch --- diff --git a/aclocal.m4 b/aclocal.m4 index 8298ab05..2ffa0cec 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -173,7 +173,7 @@ AC_DEFUN([FPTOOLS_SET_HASKELL_PLATFORM_VARS], GET_ARM_ISA() test -z "[$]2" || eval "[$]2=\"ArchARM {armISA = \$ARM_ISA, armISAExt = \$ARM_ISA_EXT, armABI = \$ARM_ABI}\"" ;; - alpha|mips|mipseb|mipsel|hppa|hppa1_1|ia64|m68k|rs6000|s390|s390x|sparc64|vax) + aarch64|alpha|mips|mipseb|mipsel|hppa|hppa1_1|ia64|m68k|rs6000|s390|s390x|sparc64|vax) test -z "[$]2" || eval "[$]2=ArchUnknown" ;; *) @@ -1835,6 +1835,9 @@ AC_MSG_CHECKING(for path to top of build tree) # converts cpu from gnu to ghc naming, and assigns the result to $target_var AC_DEFUN([GHC_CONVERT_CPU],[ case "$1" in + aarch64*) + $2="aarch64" + ;; alpha*) $2="alpha" ;; diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 91c1f890..1e83328d 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -43,6 +43,7 @@ #define powerpc_REGS (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH) #define sparc_REGS sparc_TARGET_ARCH #define arm_REGS arm_TARGET_ARCH +#define aarch64_REGS aarch64_TARGET_ARCH #define darwin_REGS darwin_TARGET_OS #else #define i386_REGS i386_HOST_ARCH @@ -50,6 +51,7 @@ #define powerpc_REGS (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH) #define sparc_REGS sparc_HOST_ARCH #define arm_REGS arm_HOST_ARCH +#define aarch64_REGS aarch64_HOST_ARCH #define darwin_REGS darwin_HOST_OS #endif @@ -461,6 +463,63 @@ #endif /* arm */ +/* ----------------------------------------------------------------------------- + The ARMv8/AArch64 ABI register mapping + + The AArch64 provides 31 64-bit general purpose registers + and 32 128-bit SIMD/floating point registers. + + General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B) + + Register | Special | Role in the procedure call standard + ---------+---------+------------------------------------ + SP | | The Stack Pointer + r30 | LR | The Link Register + r29 | FP | The Frame Pointer + r19-r28 | | Callee-saved registers + r18 | | The Platform Register, if needed; + | | or temporary register + r17 | IP1 | The second intra-procedure-call temporary register + r16 | IP0 | The first intra-procedure-call scratch register + r9-r15 | | Temporary registers + r8 | | Indirect result location register + r0-r7 | | Parameter/result registers + + + FPU/SIMD registers + + s/d/q/v0-v7 Argument / result/ scratch registers + s/d/q/v8-v15 callee-saved registers (must be preserved across subrutine calls, + but only bottom 64-bit value needs to be preserved) + s/d/q/v16-v31 temporary registers + + ----------------------------------------------------------------------------- */ + +#if aarch64_REGS + +#define REG(x) __asm__(#x) + +#define REG_Base r19 +#define REG_Sp r20 +#define REG_Hp r21 +#define REG_R1 r22 +#define REG_R2 r23 +#define REG_R3 r24 +#define REG_R4 r25 +#define REG_R5 r26 +#define REG_R6 r27 +#define REG_SpLim r28 + +#define REG_F1 s8 +#define REG_F2 s9 +#define REG_F3 s10 +#define REG_F4 s11 + +#define REG_D1 d12 +#define REG_D2 d13 + +#endif /* aarch64 */ + #endif /* NO_REGS */ /* ----------------------------------------------------------------------------- diff --git a/libffi/aarch64.patch b/libffi/aarch64.patch new file mode 100644 index 00000000..84cd14b4 --- /dev/null +++ b/libffi/aarch64.patch @@ -0,0 +1,1511 @@ +2012-10-30 James Greenhalgh + Marcus Shawcroft + + * src/aarch64/ffi.c: New. + * src/aarch64/ffitarget.h: Likewise. + * src/aarch64/sysv.S: Likewise. + * Makefile.am: Support aarch64. + * configure.ac: Support aarch64. + +Index: b/Makefile.am +=================================================================== +--- a/Makefile.am ++++ b/Makefile.am +@@ -5,6 +5,7 @@ + SUBDIRS = include testsuite man + + EXTRA_DIST = LICENSE ChangeLog.v1 ChangeLog.libgcj configure.host \ ++ src/aarch64/ffi.c src/aarch64/ffitarget.h src/aarch64/sysv.S \ + src/alpha/ffi.c src/alpha/osf.S src/alpha/ffitarget.h \ + src/arm/ffi.c src/arm/sysv.S src/arm/ffitarget.h \ + src/avr32/ffi.c src/avr32/sysv.S src/avr32/ffitarget.h \ +@@ -147,6 +148,9 @@ + if POWERPC_FREEBSD + nodist_libffi_la_SOURCES += src/powerpc/ffi.c src/powerpc/sysv.S src/powerpc/ppc_closure.S + endif ++if AARCH64 ++nodist_libffi_la_SOURCES += src/aarch64/sysv.S src/aarch64/ffi.c ++endif + if ARM + nodist_libffi_la_SOURCES += src/arm/sysv.S src/arm/ffi.c + if FFI_EXEC_TRAMPOLINE_TABLE +Index: b/configure.ac +=================================================================== +--- a/configure.ac ++++ b/configure.ac +@@ -53,6 +53,10 @@ + + TARGETDIR="unknown" + case "$host" in ++ aarch64*-*-*) ++ TARGET=AARCH64; TARGETDIR=aarch64 ++ ;; ++ + alpha*-*-*) + TARGET=ALPHA; TARGETDIR=alpha; + # Support 128-bit long double, changeable via command-line switch. +@@ -228,6 +232,7 @@ + AM_CONDITIONAL(POWERPC_AIX, test x$TARGET = xPOWERPC_AIX) + AM_CONDITIONAL(POWERPC_DARWIN, test x$TARGET = xPOWERPC_DARWIN) + AM_CONDITIONAL(POWERPC_FREEBSD, test x$TARGET = xPOWERPC_FREEBSD) ++AM_CONDITIONAL(AARCH64, test x$TARGET = xAARCH64) + AM_CONDITIONAL(ARM, test x$TARGET = xARM) + AM_CONDITIONAL(AVR32, test x$TARGET = xAVR32) + AM_CONDITIONAL(LIBFFI_CRIS, test x$TARGET = xLIBFFI_CRIS) +Index: b/src/aarch64/ffi.c +=================================================================== +--- /dev/null ++++ b/src/aarch64/ffi.c +@@ -0,0 +1,1076 @@ ++/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++``Software''), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice shall be ++included in all copies or substantial portions of the Software. ++ ++THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ++CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ ++ ++#include ++ ++#include ++#include ++ ++#include ++ ++/* Stack alignment requirement in bytes */ ++#define AARCH64_STACK_ALIGN 16 ++ ++#define N_X_ARG_REG 8 ++#define N_V_ARG_REG 8 ++ ++#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT) ++ ++union _d ++{ ++ UINT64 d; ++ UINT32 s[2]; ++}; ++ ++struct call_context ++{ ++ UINT64 x [AARCH64_N_XREG]; ++ struct ++ { ++ union _d d[2]; ++ } v [AARCH64_N_VREG]; ++}; ++ ++static void * ++get_x_addr (struct call_context *context, unsigned n) ++{ ++ return &context->x[n]; ++} ++ ++static void * ++get_s_addr (struct call_context *context, unsigned n) ++{ ++#if defined __AARCH64EB__ ++ return &context->v[n].d[1].s[1]; ++#else ++ return &context->v[n].d[0].s[0]; ++#endif ++} ++ ++static void * ++get_d_addr (struct call_context *context, unsigned n) ++{ ++#if defined __AARCH64EB__ ++ return &context->v[n].d[1]; ++#else ++ return &context->v[n].d[0]; ++#endif ++} ++ ++static void * ++get_v_addr (struct call_context *context, unsigned n) ++{ ++ return &context->v[n]; ++} ++ ++/* Return the memory location at which a basic type would reside ++ were it to have been stored in register n. */ ++ ++static void * ++get_basic_type_addr (unsigned short type, struct call_context *context, ++ unsigned n) ++{ ++ switch (type) ++ { ++ case FFI_TYPE_FLOAT: ++ return get_s_addr (context, n); ++ case FFI_TYPE_DOUBLE: ++ return get_d_addr (context, n); ++ case FFI_TYPE_LONGDOUBLE: ++ return get_v_addr (context, n); ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_SINT64: ++ return get_x_addr (context, n); ++ default: ++ FFI_ASSERT (0); ++ return NULL; ++ } ++} ++ ++/* Return the alignment width for each of the basic types. */ ++ ++static size_t ++get_basic_type_alignment (unsigned short type) ++{ ++ switch (type) ++ { ++ case FFI_TYPE_FLOAT: ++ case FFI_TYPE_DOUBLE: ++ return sizeof (UINT64); ++ case FFI_TYPE_LONGDOUBLE: ++ return sizeof (long double); ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_SINT64: ++ return sizeof (UINT64); ++ ++ default: ++ FFI_ASSERT (0); ++ return 0; ++ } ++} ++ ++/* Return the size in bytes for each of the basic types. */ ++ ++static size_t ++get_basic_type_size (unsigned short type) ++{ ++ switch (type) ++ { ++ case FFI_TYPE_FLOAT: ++ return sizeof (UINT32); ++ case FFI_TYPE_DOUBLE: ++ return sizeof (UINT64); ++ case FFI_TYPE_LONGDOUBLE: ++ return sizeof (long double); ++ case FFI_TYPE_UINT8: ++ return sizeof (UINT8); ++ case FFI_TYPE_SINT8: ++ return sizeof (SINT8); ++ case FFI_TYPE_UINT16: ++ return sizeof (UINT16); ++ case FFI_TYPE_SINT16: ++ return sizeof (SINT16); ++ case FFI_TYPE_UINT32: ++ return sizeof (UINT32); ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT32: ++ return sizeof (SINT32); ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ return sizeof (UINT64); ++ case FFI_TYPE_SINT64: ++ return sizeof (SINT64); ++ ++ default: ++ FFI_ASSERT (0); ++ return 0; ++ } ++} ++ ++extern void ++ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *, ++ extended_cif *), ++ struct call_context *context, ++ extended_cif *, ++ unsigned, ++ void (*fn)(void)); ++ ++extern void ++ffi_closure_SYSV (ffi_closure *); ++ ++/* Test for an FFI floating point representation. */ ++ ++static unsigned ++is_floating_type (unsigned short type) ++{ ++ return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE ++ || type == FFI_TYPE_LONGDOUBLE); ++} ++ ++/* Test for a homogeneous structure. */ ++ ++static unsigned short ++get_homogeneous_type (ffi_type *ty) ++{ ++ if (ty->type == FFI_TYPE_STRUCT && ty->elements) ++ { ++ unsigned i; ++ unsigned short candidate_type ++ = get_homogeneous_type (ty->elements[0]); ++ for (i =1; ty->elements[i]; i++) ++ { ++ unsigned short iteration_type = 0; ++ /* If we have a nested struct, we must find its homogeneous type. ++ If that fits with our candidate type, we are still ++ homogeneous. */ ++ if (ty->elements[i]->type == FFI_TYPE_STRUCT ++ && ty->elements[i]->elements) ++ { ++ iteration_type = get_homogeneous_type (ty->elements[i]); ++ } ++ else ++ { ++ iteration_type = ty->elements[i]->type; ++ } ++ ++ /* If we are not homogeneous, return FFI_TYPE_STRUCT. */ ++ if (candidate_type != iteration_type) ++ return FFI_TYPE_STRUCT; ++ } ++ return candidate_type; ++ } ++ ++ /* Base case, we have no more levels of nesting, so we ++ are a basic type, and so, trivially homogeneous in that type. */ ++ return ty->type; ++} ++ ++/* Determine the number of elements within a STRUCT. ++ ++ Note, we must handle nested structs. ++ ++ If ty is not a STRUCT this function will return 0. */ ++ ++static unsigned ++element_count (ffi_type *ty) ++{ ++ if (ty->type == FFI_TYPE_STRUCT && ty->elements) ++ { ++ unsigned n; ++ unsigned elems = 0; ++ for (n = 0; ty->elements[n]; n++) ++ { ++ if (ty->elements[n]->type == FFI_TYPE_STRUCT ++ && ty->elements[n]->elements) ++ elems += element_count (ty->elements[n]); ++ else ++ elems++; ++ } ++ return elems; ++ } ++ return 0; ++} ++ ++/* Test for a homogeneous floating point aggregate. ++ ++ A homogeneous floating point aggregate is a homogeneous aggregate of ++ a half- single- or double- precision floating point type with one ++ to four elements. Note that this includes nested structs of the ++ basic type. */ ++ ++static int ++is_hfa (ffi_type *ty) ++{ ++ if (ty->type == FFI_TYPE_STRUCT ++ && ty->elements[0] ++ && is_floating_type (get_homogeneous_type (ty))) ++ { ++ unsigned n = element_count (ty); ++ return n >= 1 && n <= 4; ++ } ++ return 0; ++} ++ ++/* Test if an ffi_type is a candidate for passing in a register. ++ ++ This test does not check that sufficient registers of the ++ appropriate class are actually available, merely that IFF ++ sufficient registers are available then the argument will be passed ++ in register(s). ++ ++ Note that an ffi_type that is deemed to be a register candidate ++ will always be returned in registers. ++ ++ Returns 1 if a register candidate else 0. */ ++ ++static int ++is_register_candidate (ffi_type *ty) ++{ ++ switch (ty->type) ++ { ++ case FFI_TYPE_VOID: ++ case FFI_TYPE_FLOAT: ++ case FFI_TYPE_DOUBLE: ++ case FFI_TYPE_LONGDOUBLE: ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT64: ++ return 1; ++ ++ case FFI_TYPE_STRUCT: ++ if (is_hfa (ty)) ++ { ++ return 1; ++ } ++ else if (ty->size > 16) ++ { ++ /* Too large. Will be replaced with a pointer to memory. The ++ pointer MAY be passed in a register, but the value will ++ not. This test specifically fails since the argument will ++ never be passed by value in registers. */ ++ return 0; ++ } ++ else ++ { ++ /* Might be passed in registers depending on the number of ++ registers required. */ ++ return (ty->size + 7) / 8 < N_X_ARG_REG; ++ } ++ break; ++ ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++ ++ return 0; ++} ++ ++/* Test if an ffi_type argument or result is a candidate for a vector ++ register. */ ++ ++static int ++is_v_register_candidate (ffi_type *ty) ++{ ++ return is_floating_type (ty->type) ++ || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty)); ++} ++ ++/* Representation of the procedure call argument marshalling ++ state. ++ ++ The terse state variable names match the names used in the AARCH64 ++ PCS. */ ++ ++struct arg_state ++{ ++ unsigned ngrn; /* Next general-purpose register number. */ ++ unsigned nsrn; /* Next vector register number. */ ++ unsigned nsaa; /* Next stack offset. */ ++}; ++ ++/* Initialize a procedure call argument marshalling state. */ ++static void ++arg_init (struct arg_state *state, unsigned call_frame_size) ++{ ++ state->ngrn = 0; ++ state->nsrn = 0; ++ state->nsaa = 0; ++} ++ ++/* Return the number of available consecutive core argument ++ registers. */ ++ ++static unsigned ++available_x (struct arg_state *state) ++{ ++ return N_X_ARG_REG - state->ngrn; ++} ++ ++/* Return the number of available consecutive vector argument ++ registers. */ ++ ++static unsigned ++available_v (struct arg_state *state) ++{ ++ return N_V_ARG_REG - state->nsrn; ++} ++ ++static void * ++allocate_to_x (struct call_context *context, struct arg_state *state) ++{ ++ FFI_ASSERT (state->ngrn < N_X_ARG_REG) ++ return get_x_addr (context, (state->ngrn)++); ++} ++ ++static void * ++allocate_to_s (struct call_context *context, struct arg_state *state) ++{ ++ FFI_ASSERT (state->nsrn < N_V_ARG_REG) ++ return get_s_addr (context, (state->nsrn)++); ++} ++ ++static void * ++allocate_to_d (struct call_context *context, struct arg_state *state) ++{ ++ FFI_ASSERT (state->nsrn < N_V_ARG_REG) ++ return get_d_addr (context, (state->nsrn)++); ++} ++ ++static void * ++allocate_to_v (struct call_context *context, struct arg_state *state) ++{ ++ FFI_ASSERT (state->nsrn < N_V_ARG_REG) ++ return get_v_addr (context, (state->nsrn)++); ++} ++ ++/* Allocate an aligned slot on the stack and return a pointer to it. */ ++static void * ++allocate_to_stack (struct arg_state *state, void *stack, unsigned alignment, ++ unsigned size) ++{ ++ void *allocation; ++ ++ /* Round up the NSAA to the larger of 8 or the natural ++ alignment of the argument's type. */ ++ state->nsaa = ALIGN (state->nsaa, alignment); ++ state->nsaa = ALIGN (state->nsaa, alignment); ++ state->nsaa = ALIGN (state->nsaa, 8); ++ ++ allocation = stack + state->nsaa; ++ ++ state->nsaa += size; ++ return allocation; ++} ++ ++static void ++copy_basic_type (void *dest, void *source, unsigned short type) ++{ ++ /* This is neccessary to ensure that basic types are copied ++ sign extended to 64-bits as libffi expects. */ ++ switch (type) ++ { ++ case FFI_TYPE_FLOAT: ++ *(float *) dest = *(float *) source; ++ break; ++ case FFI_TYPE_DOUBLE: ++ *(double *) dest = *(double *) source; ++ break; ++ case FFI_TYPE_LONGDOUBLE: ++ *(long double *) dest = *(long double *) source; ++ break; ++ case FFI_TYPE_UINT8: ++ *(ffi_arg *) dest = *(UINT8 *) source; ++ break; ++ case FFI_TYPE_SINT8: ++ *(ffi_sarg *) dest = *(SINT8 *) source; ++ break; ++ case FFI_TYPE_UINT16: ++ *(ffi_arg *) dest = *(UINT16 *) source; ++ break; ++ case FFI_TYPE_SINT16: ++ *(ffi_sarg *) dest = *(SINT16 *) source; ++ break; ++ case FFI_TYPE_UINT32: ++ *(ffi_arg *) dest = *(UINT32 *) source; ++ break; ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT32: ++ *(ffi_sarg *) dest = *(SINT32 *) source; ++ break; ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ *(ffi_arg *) dest = *(UINT64 *) source; ++ break; ++ case FFI_TYPE_SINT64: ++ *(ffi_sarg *) dest = *(SINT64 *) source; ++ break; ++ ++ default: ++ FFI_ASSERT (0); ++ } ++} ++ ++static void ++copy_hfa_to_reg_or_stack (void *memory, ++ ffi_type *ty, ++ struct call_context *context, ++ unsigned char *stack, ++ struct arg_state *state) ++{ ++ unsigned elems = element_count (ty); ++ if (available_v (state) < elems) ++ { ++ /* There are insufficient V registers. Further V register allocations ++ are prevented, the NSAA is adjusted (by allocate_to_stack ()) ++ and the argument is copied to memory at the adjusted NSAA. */ ++ state->nsrn = N_V_ARG_REG; ++ memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size), ++ memory, ++ ty->size); ++ } ++ else ++ { ++ int i; ++ unsigned short type = get_homogeneous_type (ty); ++ unsigned elems = element_count (ty); ++ for (i = 0; i < elems; i++) ++ { ++ void *reg = allocate_to_v (context, state); ++ copy_basic_type (reg, memory, type); ++ memory += get_basic_type_size (type); ++ } ++ } ++} ++ ++/* Either allocate an appropriate register for the argument type, or if ++ none are available, allocate a stack slot and return a pointer ++ to the allocated space. */ ++ ++static void * ++allocate_to_register_or_stack (struct call_context *context, ++ unsigned char *stack, ++ struct arg_state *state, ++ unsigned short type) ++{ ++ size_t alignment = get_basic_type_alignment (type); ++ size_t size = alignment; ++ switch (type) ++ { ++ case FFI_TYPE_FLOAT: ++ /* This is the only case for which the allocated stack size ++ should not match the alignment of the type. */ ++ size = sizeof (UINT32); ++ /* Fall through. */ ++ case FFI_TYPE_DOUBLE: ++ if (state->nsrn < N_V_ARG_REG) ++ return allocate_to_d (context, state); ++ state->nsrn = N_V_ARG_REG; ++ break; ++ case FFI_TYPE_LONGDOUBLE: ++ if (state->nsrn < N_V_ARG_REG) ++ return allocate_to_v (context, state); ++ state->nsrn = N_V_ARG_REG; ++ break; ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_SINT64: ++ if (state->ngrn < N_X_ARG_REG) ++ return allocate_to_x (context, state); ++ state->ngrn = N_X_ARG_REG; ++ break; ++ default: ++ FFI_ASSERT (0); ++ } ++ ++ return allocate_to_stack (state, stack, alignment, size); ++} ++ ++/* Copy a value to an appropriate register, or if none are ++ available, to the stack. */ ++ ++static void ++copy_to_register_or_stack (struct call_context *context, ++ unsigned char *stack, ++ struct arg_state *state, ++ void *value, ++ unsigned short type) ++{ ++ copy_basic_type ( ++ allocate_to_register_or_stack (context, stack, state, type), ++ value, ++ type); ++} ++ ++/* Marshall the arguments from FFI representation to procedure call ++ context and stack. */ ++ ++static unsigned ++aarch64_prep_args (struct call_context *context, unsigned char *stack, ++ extended_cif *ecif) ++{ ++ int i; ++ struct arg_state state; ++ ++ arg_init (&state, ALIGN(ecif->cif->bytes, 16)); ++ ++ for (i = 0; i < ecif->cif->nargs; i++) ++ { ++ ffi_type *ty = ecif->cif->arg_types[i]; ++ switch (ty->type) ++ { ++ case FFI_TYPE_VOID: ++ FFI_ASSERT (0); ++ break; ++ ++ /* If the argument is a basic type the argument is allocated to an ++ appropriate register, or if none are available, to the stack. */ ++ case FFI_TYPE_FLOAT: ++ case FFI_TYPE_DOUBLE: ++ case FFI_TYPE_LONGDOUBLE: ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_SINT64: ++ copy_to_register_or_stack (context, stack, &state, ++ ecif->avalue[i], ty->type); ++ break; ++ ++ case FFI_TYPE_STRUCT: ++ if (is_hfa (ty)) ++ { ++ copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context, ++ stack, &state); ++ } ++ else if (ty->size > 16) ++ { ++ /* If the argument is a composite type that is larger than 16 ++ bytes, then the argument has been copied to memory, and ++ the argument is replaced by a pointer to the copy. */ ++ ++ copy_to_register_or_stack (context, stack, &state, ++ &(ecif->avalue[i]), FFI_TYPE_POINTER); ++ } ++ else if (available_x (&state) >= (ty->size + 7) / 8) ++ { ++ /* If the argument is a composite type and the size in ++ double-words is not more than the number of available ++ X registers, then the argument is copied into consecutive ++ X registers. */ ++ int j; ++ for (j = 0; j < (ty->size + 7) / 8; j++) ++ { ++ memcpy (allocate_to_x (context, &state), ++ &(((UINT64 *) ecif->avalue[i])[j]), ++ sizeof (UINT64)); ++ } ++ } ++ else ++ { ++ /* Otherwise, there are insufficient X registers. Further X ++ register allocations are prevented, the NSAA is adjusted ++ (by allocate_to_stack ()) and the argument is copied to ++ memory at the adjusted NSAA. */ ++ state.ngrn = N_X_ARG_REG; ++ ++ memcpy (allocate_to_stack (&state, stack, ty->alignment, ++ ty->size), ecif->avalue + i, ty->size); ++ } ++ break; ++ ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++ } ++ ++ return ecif->cif->aarch64_flags; ++} ++ ++ffi_status ++ffi_prep_cif_machdep (ffi_cif *cif) ++{ ++ /* Round the stack up to a multiple of the stack alignment requirement. */ ++ cif->bytes = ++ (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1); ++ ++ /* Initialize our flags. We are interested if this CIF will touch a ++ vector register, if so we will enable context save and load to ++ those registers, otherwise not. This is intended to be friendly ++ to lazy float context switching in the kernel. */ ++ cif->aarch64_flags = 0; ++ ++ if (is_v_register_candidate (cif->rtype)) ++ { ++ cif->aarch64_flags |= AARCH64_FFI_WITH_V; ++ } ++ else ++ { ++ int i; ++ for (i = 0; i < cif->nargs; i++) ++ if (is_v_register_candidate (cif->arg_types[i])) ++ { ++ cif->aarch64_flags |= AARCH64_FFI_WITH_V; ++ break; ++ } ++ } ++ ++ return FFI_OK; ++} ++ ++/* Call a function with the provided arguments and capture the return ++ value. */ ++void ++ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) ++{ ++ extended_cif ecif; ++ ++ ecif.cif = cif; ++ ecif.avalue = avalue; ++ ecif.rvalue = rvalue; ++ ++ switch (cif->abi) ++ { ++ case FFI_SYSV: ++ { ++ struct call_context context; ++ unsigned stack_bytes; ++ ++ /* Figure out the total amount of stack space we need, the ++ above call frame space needs to be 16 bytes aligned to ++ ensure correct alignment of the first object inserted in ++ that space hence the ALIGN applied to cif->bytes.*/ ++ stack_bytes = ALIGN(cif->bytes, 16); ++ ++ memset (&context, 0, sizeof (context)); ++ if (is_register_candidate (cif->rtype)) ++ { ++ ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); ++ switch (cif->rtype->type) ++ { ++ case FFI_TYPE_VOID: ++ case FFI_TYPE_FLOAT: ++ case FFI_TYPE_DOUBLE: ++ case FFI_TYPE_LONGDOUBLE: ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT64: ++ { ++ void *addr = get_basic_type_addr (cif->rtype->type, ++ &context, 0); ++ copy_basic_type (rvalue, addr, cif->rtype->type); ++ break; ++ } ++ ++ case FFI_TYPE_STRUCT: ++ if (is_hfa (cif->rtype)) ++ { ++ int j; ++ unsigned short type = get_homogeneous_type (cif->rtype); ++ unsigned elems = element_count (cif->rtype); ++ for (j = 0; j < elems; j++) ++ { ++ void *reg = get_basic_type_addr (type, &context, j); ++ copy_basic_type (rvalue, reg, type); ++ rvalue += get_basic_type_size (type); ++ } ++ } ++ else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) ++ { ++ unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)); ++ memcpy (rvalue, get_x_addr (&context, 0), size); ++ } ++ else ++ { ++ FFI_ASSERT (0); ++ } ++ break; ++ ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++ } ++ else ++ { ++ memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64)); ++ ffi_call_SYSV (aarch64_prep_args, &context, &ecif, ++ stack_bytes, fn); ++ } ++ break; ++ } ++ ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++} ++ ++static unsigned char trampoline [] = ++{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */ ++ 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */ ++ 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ ++}; ++ ++/* Build a trampoline. */ ++ ++#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \ ++ ({unsigned char *__tramp = (unsigned char*)(TRAMP); \ ++ UINT64 __fun = (UINT64)(FUN); \ ++ UINT64 __ctx = (UINT64)(CTX); \ ++ UINT64 __flags = (UINT64)(FLAGS); \ ++ memcpy (__tramp, trampoline, sizeof (trampoline)); \ ++ memcpy (__tramp + 12, &__fun, sizeof (__fun)); \ ++ memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \ ++ memcpy (__tramp + 28, &__flags, sizeof (__flags)); \ ++ __clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \ ++ }) ++ ++ffi_status ++ffi_prep_closure_loc (ffi_closure* closure, ++ ffi_cif* cif, ++ void (*fun)(ffi_cif*,void*,void**,void*), ++ void *user_data, ++ void *codeloc) ++{ ++ if (cif->abi != FFI_SYSV) ++ return FFI_BAD_ABI; ++ ++ FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc, ++ cif->aarch64_flags); ++ ++ closure->cif = cif; ++ closure->user_data = user_data; ++ closure->fun = fun; ++ ++ return FFI_OK; ++} ++ ++/* Primary handler to setup and invoke a function within a closure. ++ ++ A closure when invoked enters via the assembler wrapper ++ ffi_closure_SYSV(). The wrapper allocates a call context on the ++ stack, saves the interesting registers (from the perspective of ++ the calling convention) into the context then passes control to ++ ffi_closure_SYSV_inner() passing the saved context and a pointer to ++ the stack at the point ffi_closure_SYSV() was invoked. ++ ++ On the return path the assembler wrapper will reload call context ++ regsiters. ++ ++ ffi_closure_SYSV_inner() marshalls the call context into ffi value ++ desriptors, invokes the wrapped function, then marshalls the return ++ value back into the call context. */ ++ ++void ++ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, ++ void *stack) ++{ ++ ffi_cif *cif = closure->cif; ++ void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); ++ void *rvalue = NULL; ++ int i; ++ struct arg_state state; ++ ++ arg_init (&state, ALIGN(cif->bytes, 16)); ++ ++ for (i = 0; i < cif->nargs; i++) ++ { ++ ffi_type *ty = cif->arg_types[i]; ++ ++ switch (ty->type) ++ { ++ case FFI_TYPE_VOID: ++ FFI_ASSERT (0); ++ break; ++ ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_SINT64: ++ case FFI_TYPE_FLOAT: ++ case FFI_TYPE_DOUBLE: ++ case FFI_TYPE_LONGDOUBLE: ++ avalue[i] = allocate_to_register_or_stack (context, stack, ++ &state, ty->type); ++ break; ++ ++ case FFI_TYPE_STRUCT: ++ if (is_hfa (ty)) ++ { ++ unsigned n = element_count (ty); ++ if (available_v (&state) < n) ++ { ++ state.nsrn = N_V_ARG_REG; ++ avalue[i] = allocate_to_stack (&state, stack, ty->alignment, ++ ty->size); ++ } ++ else ++ { ++ switch (get_homogeneous_type (ty)) ++ { ++ case FFI_TYPE_FLOAT: ++ { ++ /* Eeek! We need a pointer to the structure, ++ however the homogeneous float elements are ++ being passed in individual S registers, ++ therefore the structure is not represented as ++ a contiguous sequence of bytes in our saved ++ register context. We need to fake up a copy ++ of the structure layed out in memory ++ correctly. The fake can be tossed once the ++ closure function has returned hence alloca() ++ is sufficient. */ ++ int j; ++ UINT32 *p = avalue[i] = alloca (ty->size); ++ for (j = 0; j < element_count (ty); j++) ++ memcpy (&p[j], ++ allocate_to_s (context, &state), ++ sizeof (*p)); ++ break; ++ } ++ ++ case FFI_TYPE_DOUBLE: ++ { ++ /* Eeek! We need a pointer to the structure, ++ however the homogeneous float elements are ++ being passed in individual S registers, ++ therefore the structure is not represented as ++ a contiguous sequence of bytes in our saved ++ register context. We need to fake up a copy ++ of the structure layed out in memory ++ correctly. The fake can be tossed once the ++ closure function has returned hence alloca() ++ is sufficient. */ ++ int j; ++ UINT64 *p = avalue[i] = alloca (ty->size); ++ for (j = 0; j < element_count (ty); j++) ++ memcpy (&p[j], ++ allocate_to_d (context, &state), ++ sizeof (*p)); ++ break; ++ } ++ ++ case FFI_TYPE_LONGDOUBLE: ++ memcpy (&avalue[i], ++ allocate_to_v (context, &state), ++ sizeof (*avalue)); ++ break; ++ ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++ } ++ } ++ else if (ty->size > 16) ++ { ++ /* Replace Composite type of size greater than 16 with a ++ pointer. */ ++ memcpy (&avalue[i], ++ allocate_to_register_or_stack (context, stack, ++ &state, FFI_TYPE_POINTER), ++ sizeof (avalue[i])); ++ } ++ else if (available_x (&state) >= (ty->size + 7) / 8) ++ { ++ avalue[i] = get_x_addr (context, state.ngrn); ++ state.ngrn += (ty->size + 7) / 8; ++ } ++ else ++ { ++ state.ngrn = N_X_ARG_REG; ++ ++ avalue[i] = allocate_to_stack (&state, stack, ty->alignment, ++ ty->size); ++ } ++ break; ++ ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++ } ++ ++ /* Figure out where the return value will be passed, either in ++ registers or in a memory block allocated by the caller and passed ++ in x8. */ ++ ++ if (is_register_candidate (cif->rtype)) ++ { ++ /* Register candidates are *always* returned in registers. */ ++ ++ /* Allocate a scratchpad for the return value, we will let the ++ callee scrible the result into the scratch pad then move the ++ contents into the appropriate return value location for the ++ call convention. */ ++ rvalue = alloca (cif->rtype->size); ++ (closure->fun) (cif, rvalue, avalue, closure->user_data); ++ ++ /* Copy the return value into the call context so that it is returned ++ as expected to our caller. */ ++ switch (cif->rtype->type) ++ { ++ case FFI_TYPE_VOID: ++ break; ++ ++ case FFI_TYPE_UINT8: ++ case FFI_TYPE_UINT16: ++ case FFI_TYPE_UINT32: ++ case FFI_TYPE_POINTER: ++ case FFI_TYPE_UINT64: ++ case FFI_TYPE_SINT8: ++ case FFI_TYPE_SINT16: ++ case FFI_TYPE_INT: ++ case FFI_TYPE_SINT32: ++ case FFI_TYPE_SINT64: ++ case FFI_TYPE_FLOAT: ++ case FFI_TYPE_DOUBLE: ++ case FFI_TYPE_LONGDOUBLE: ++ { ++ void *addr = get_basic_type_addr (cif->rtype->type, context, 0); ++ copy_basic_type (addr, rvalue, cif->rtype->type); ++ break; ++ } ++ case FFI_TYPE_STRUCT: ++ if (is_hfa (cif->rtype)) ++ { ++ int i; ++ unsigned short type = get_homogeneous_type (cif->rtype); ++ unsigned elems = element_count (cif->rtype); ++ for (i = 0; i < elems; i++) ++ { ++ void *reg = get_basic_type_addr (type, context, i); ++ copy_basic_type (reg, rvalue, type); ++ rvalue += get_basic_type_size (type); ++ } ++ } ++ else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) ++ { ++ unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)) ; ++ memcpy (get_x_addr (context, 0), rvalue, size); ++ } ++ else ++ { ++ FFI_ASSERT (0); ++ } ++ break; ++ default: ++ FFI_ASSERT (0); ++ break; ++ } ++ } ++ else ++ { ++ memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64)); ++ (closure->fun) (cif, rvalue, avalue, closure->user_data); ++ } ++} ++ +Index: b/src/aarch64/ffitarget.h +=================================================================== +--- /dev/null ++++ b/src/aarch64/ffitarget.h +@@ -0,0 +1,59 @@ ++/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++``Software''), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice shall be ++included in all copies or substantial portions of the Software. ++ ++THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ++CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ ++ ++#ifndef LIBFFI_TARGET_H ++#define LIBFFI_TARGET_H ++ ++#ifndef LIBFFI_H ++#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." ++#endif ++ ++#ifndef LIBFFI_ASM ++typedef unsigned long ffi_arg; ++typedef signed long ffi_sarg; ++ ++typedef enum ffi_abi ++ { ++ FFI_FIRST_ABI = 0, ++ FFI_SYSV, ++ FFI_LAST_ABI, ++ FFI_DEFAULT_ABI = FFI_SYSV ++ } ffi_abi; ++#endif ++ ++/* ---- Definitions for closures ----------------------------------------- */ ++ ++#define FFI_CLOSURES 1 ++#define FFI_TRAMPOLINE_SIZE 36 ++#define FFI_NATIVE_RAW_API 0 ++ ++/* ---- Internal ---- */ ++ ++ ++#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags ++ ++#define AARCH64_FFI_WITH_V_BIT 0 ++ ++#define AARCH64_N_XREG 32 ++#define AARCH64_N_VREG 32 ++#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16) ++ ++#endif +Index: b/src/aarch64/sysv.S +=================================================================== +--- /dev/null ++++ b/src/aarch64/sysv.S +@@ -0,0 +1,307 @@ ++/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. ++ ++Permission is hereby granted, free of charge, to any person obtaining ++a copy of this software and associated documentation files (the ++``Software''), to deal in the Software without restriction, including ++without limitation the rights to use, copy, modify, merge, publish, ++distribute, sublicense, and/or sell copies of the Software, and to ++permit persons to whom the Software is furnished to do so, subject to ++the following conditions: ++ ++The above copyright notice and this permission notice shall be ++included in all copies or substantial portions of the Software. ++ ++THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, ++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ++IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ++CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ ++ ++#define LIBFFI_ASM ++#include ++#include ++ ++#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off ++#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off ++#define cfi_restore(reg) .cfi_restore reg ++#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg ++ ++ .text ++ .globl ffi_call_SYSV ++ .type ffi_call_SYSV, #function ++ ++/* ffi_call_SYSV() ++ ++ Create a stack frame, setup an argument context, call the callee ++ and extract the result. ++ ++ The maximum required argument stack size is provided, ++ ffi_call_SYSV() allocates that stack space then calls the ++ prepare_fn to populate register context and stack. The ++ argument passing registers are loaded from the register ++ context and the callee called, on return the register passing ++ register are saved back to the context. Our caller will ++ extract the return value from the final state of the saved ++ register context. ++ ++ Prototype: ++ ++ extern unsigned ++ ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *, ++ extended_cif *), ++ struct call_context *context, ++ extended_cif *, ++ unsigned required_stack_size, ++ void (*fn)(void)); ++ ++ Therefore on entry we have: ++ ++ x0 prepare_fn ++ x1 &context ++ x2 &ecif ++ x3 bytes ++ x4 fn ++ ++ This function uses the following stack frame layout: ++ ++ == ++ saved x30(lr) ++ x29(fp)-> saved x29(fp) ++ saved x24 ++ saved x23 ++ saved x22 ++ sp' -> saved x21 ++ ... ++ sp -> (constructed callee stack arguments) ++ == ++ ++ Voila! */ ++ ++#define ffi_call_SYSV_FS (8 * 4) ++ ++ .cfi_startproc ++ffi_call_SYSV: ++ stp x29, x30, [sp, #-16]! ++ cfi_adjust_cfa_offset (16) ++ cfi_rel_offset (x29, 0) ++ cfi_rel_offset (x30, 8) ++ ++ mov x29, sp ++ cfi_def_cfa_register (x29) ++ sub sp, sp, #ffi_call_SYSV_FS ++ ++ stp x21, x22, [sp, 0] ++ cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS) ++ cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS) ++ ++ stp x23, x24, [sp, 16] ++ cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS) ++ cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS) ++ ++ mov x21, x1 ++ mov x22, x2 ++ mov x24, x4 ++ ++ /* Allocate the stack space for the actual arguments, many ++ arguments will be passed in registers, but we assume ++ worst case and allocate sufficient stack for ALL of ++ the arguments. */ ++ sub sp, sp, x3 ++ ++ /* unsigned (*prepare_fn) (struct call_context *context, ++ unsigned char *stack, extended_cif *ecif); ++ */ ++ mov x23, x0 ++ mov x0, x1 ++ mov x1, sp ++ /* x2 already in place */ ++ blr x23 ++ ++ /* Preserve the flags returned. */ ++ mov x23, x0 ++ ++ /* Figure out if we should touch the vector registers. */ ++ tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f ++ ++ /* Load the vector argument passing registers. */ ++ ldp q0, q1, [x21, #8*32 + 0] ++ ldp q2, q3, [x21, #8*32 + 32] ++ ldp q4, q5, [x21, #8*32 + 64] ++ ldp q6, q7, [x21, #8*32 + 96] ++1: ++ /* Load the core argument passing registers. */ ++ ldp x0, x1, [x21, #0] ++ ldp x2, x3, [x21, #16] ++ ldp x4, x5, [x21, #32] ++ ldp x6, x7, [x21, #48] ++ ++ /* Don't forget x8 which may be holding the address of a return buffer. ++ */ ++ ldr x8, [x21, #8*8] ++ ++ blr x24 ++ ++ /* Save the core argument passing registers. */ ++ stp x0, x1, [x21, #0] ++ stp x2, x3, [x21, #16] ++ stp x4, x5, [x21, #32] ++ stp x6, x7, [x21, #48] ++ ++ /* Note nothing useful ever comes back in x8! */ ++ ++ /* Figure out if we should touch the vector registers. */ ++ tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f ++ ++ /* Save the vector argument passing registers. */ ++ stp q0, q1, [x21, #8*32 + 0] ++ stp q2, q3, [x21, #8*32 + 32] ++ stp q4, q5, [x21, #8*32 + 64] ++ stp q6, q7, [x21, #8*32 + 96] ++1: ++ /* All done, unwind our stack frame. */ ++ ldp x21, x22, [x29, # - ffi_call_SYSV_FS] ++ cfi_restore (x21) ++ cfi_restore (x22) ++ ++ ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16] ++ cfi_restore (x23) ++ cfi_restore (x24) ++ ++ mov sp, x29 ++ cfi_def_cfa_register (sp) ++ ++ ldp x29, x30, [sp], #16 ++ cfi_adjust_cfa_offset (-16) ++ cfi_restore (x29) ++ cfi_restore (x30) ++ ++ ret ++ ++ .cfi_endproc ++ .size ffi_call_SYSV, .-ffi_call_SYSV ++ ++#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE) ++ ++/* ffi_closure_SYSV ++ ++ Closure invocation glue. This is the low level code invoked directly by ++ the closure trampoline to setup and call a closure. ++ ++ On entry x17 points to a struct trampoline_data, x16 has been clobbered ++ all other registers are preserved. ++ ++ We allocate a call context and save the argument passing registers, ++ then invoked the generic C ffi_closure_SYSV_inner() function to do all ++ the real work, on return we load the result passing registers back from ++ the call context. ++ ++ On entry ++ ++ extern void ++ ffi_closure_SYSV (struct trampoline_data *); ++ ++ struct trampoline_data ++ { ++ UINT64 *ffi_closure; ++ UINT64 flags; ++ }; ++ ++ This function uses the following stack frame layout: ++ ++ == ++ saved x30(lr) ++ x29(fp)-> saved x29(fp) ++ saved x22 ++ saved x21 ++ ... ++ sp -> call_context ++ == ++ ++ Voila! */ ++ ++ .text ++ .globl ffi_closure_SYSV ++ .cfi_startproc ++ffi_closure_SYSV: ++ stp x29, x30, [sp, #-16]! ++ cfi_adjust_cfa_offset (16) ++ cfi_rel_offset (x29, 0) ++ cfi_rel_offset (x30, 8) ++ ++ mov x29, sp ++ ++ sub sp, sp, #ffi_closure_SYSV_FS ++ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) ++ ++ stp x21, x22, [x29, #-16] ++ cfi_rel_offset (x21, 0) ++ cfi_rel_offset (x22, 8) ++ ++ /* Load x21 with &call_context. */ ++ mov x21, sp ++ /* Preserve our struct trampoline_data * */ ++ mov x22, x17 ++ ++ /* Save the rest of the argument passing registers. */ ++ stp x0, x1, [x21, #0] ++ stp x2, x3, [x21, #16] ++ stp x4, x5, [x21, #32] ++ stp x6, x7, [x21, #48] ++ /* Don't forget we may have been given a result scratch pad address. ++ */ ++ str x8, [x21, #64] ++ ++ /* Figure out if we should touch the vector registers. */ ++ ldr x0, [x22, #8] ++ tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f ++ ++ /* Save the argument passing vector registers. */ ++ stp q0, q1, [x21, #8*32 + 0] ++ stp q2, q3, [x21, #8*32 + 32] ++ stp q4, q5, [x21, #8*32 + 64] ++ stp q6, q7, [x21, #8*32 + 96] ++1: ++ /* Load &ffi_closure.. */ ++ ldr x0, [x22, #0] ++ mov x1, x21 ++ /* Compute the location of the stack at the point that the ++ trampoline was called. */ ++ add x2, x29, #16 ++ ++ bl ffi_closure_SYSV_inner ++ ++ /* Figure out if we should touch the vector registers. */ ++ ldr x0, [x22, #8] ++ tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f ++ ++ /* Load the result passing vector registers. */ ++ ldp q0, q1, [x21, #8*32 + 0] ++ ldp q2, q3, [x21, #8*32 + 32] ++ ldp q4, q5, [x21, #8*32 + 64] ++ ldp q6, q7, [x21, #8*32 + 96] ++1: ++ /* Load the result passing core registers. */ ++ ldp x0, x1, [x21, #0] ++ ldp x2, x3, [x21, #16] ++ ldp x4, x5, [x21, #32] ++ ldp x6, x7, [x21, #48] ++ /* Note nothing usefull is returned in x8. */ ++ ++ /* We are done, unwind our frame. */ ++ ldp x21, x22, [x29, #-16] ++ cfi_restore (x21) ++ cfi_restore (x22) ++ ++ mov sp, x29 ++ cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) ++ ++ ldp x29, x30, [sp], #16 ++ cfi_adjust_cfa_offset (-16) ++ cfi_restore (x29) ++ cfi_restore (x30) ++ ++ ret ++ .cfi_endproc ++ .size ffi_closure_SYSV, .-ffi_closure_SYSV diff --git a/libffi/ghc.mk b/libffi/ghc.mk index 07d6d3d7..d05f0dd6 100644 --- a/libffi/ghc.mk +++ b/libffi/ghc.mk @@ -53,6 +53,8 @@ $(libffi_STAMP_CONFIGURE): $(TOUCH_DEP) $(call removeTrees,$(LIBFFI_DIR) libffi/build) cat ghc-tarballs/libffi/libffi*.tar.gz | $(GZIP_CMD) -d | { cd libffi && $(TAR_CMD) -xf - ; } mv libffi/libffi-* libffi/build + patch -d libffi/build -p1