--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mozilla-libffi-ppc64le.patch Thu Jan 02 21:56:34 2014 +0100
@@ -0,0 +1,4441 @@
+# HG changeset patch
+# Parent a38c083288a664a9b1fdeaa16563b47661ef6c16
+# User Ulrich Weigand <uweigand@de.ibm.com>
+PPC64 LE support for libffi
+
+diff --git a/js/src/ctypes/libffi/src/powerpc/aix.S b/js/src/ctypes/libffi/src/powerpc/aix.S
+--- a/js/src/ctypes/libffi/src/powerpc/aix.S
++++ b/js/src/ctypes/libffi/src/powerpc/aix.S
+@@ -1,10 +1,10 @@
+ /* -----------------------------------------------------------------------
+- aix.S - Copyright (c) 2002,2009 Free Software Foundation, Inc.
++ aix.S - Copyright (c) 2002, 2009 Free Software Foundation, Inc.
+ based on darwin.S by John Hornkvist
+
+ PowerPC Assembly glue.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+@@ -74,16 +74,18 @@
+ .set f15,15
+ .set f16,16
+ .set f17,17
+ .set f18,18
+ .set f19,19
+ .set f20,20
+ .set f21,21
+
++ .extern .ffi_prep_args
++
+ #define LIBFFI_ASM
+ #include <fficonfig.h>
+ #include <ffi.h>
+ #define JUMPTARGET(name) name
+ #define L(x) x
+ .file "aix.S"
+ .toc
+
+@@ -120,16 +122,17 @@ ffi_call_AIX:
+ mr r31, r5 /* flags, */
+ mr r30, r6 /* rvalue, */
+ mr r29, r7 /* function address. */
+ std r2, 40(r1)
+
+ /* Call ffi_prep_args. */
+ mr r4, r1
+ bl .ffi_prep_args
++ nop
+
+ /* Now do the call. */
+ ld r0, 0(r29)
+ ld r2, 8(r29)
+ ld r11, 16(r29)
+ /* Set up cr1 with bits 4-7 of the flags. */
+ mtcrf 0x40, r31
+ mtctr r0
+@@ -221,16 +224,17 @@ L(float_return_value):
+ mr r31, r5 /* flags, */
+ mr r30, r6 /* rvalue, */
+ mr r29, r7 /* function address, */
+ stw r2, 20(r1)
+
+ /* Call ffi_prep_args. */
+ mr r4, r1
+ bl .ffi_prep_args
++ nop
+
+ /* Now do the call. */
+ lwz r0, 0(r29)
+ lwz r2, 4(r29)
+ lwz r11, 8(r29)
+ /* Set up cr1 with bits 4-7 of the flags. */
+ mtcrf 0x40, r31
+ mtctr r0
+diff --git a/js/src/ctypes/libffi/src/powerpc/ffi.c b/js/src/ctypes/libffi/src/powerpc/ffi.c
+--- a/js/src/ctypes/libffi/src/powerpc/ffi.c
++++ b/js/src/ctypes/libffi/src/powerpc/ffi.c
+@@ -1,12 +1,14 @@
+ /* -----------------------------------------------------------------------
+- ffi.c - Copyright (c) 1998 Geoffrey Keating
+- Copyright (C) 2007, 2008 Free Software Foundation, Inc
+- Copyright (C) 2008 Red Hat, Inc
++ ffi.c - Copyright (C) 2011 Anthony Green
++ Copyright (C) 2011 Kyle Moffett
++ Copyright (C) 2008 Red Hat, Inc
++ Copyright (C) 2007, 2008 Free Software Foundation, Inc
++ Copyright (c) 1998 Geoffrey Keating
+
+ PowerPC Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+@@ -34,42 +36,39 @@
+
+ extern void ffi_closure_SYSV (void);
+ extern void FFI_HIDDEN ffi_closure_LINUX64 (void);
+
+ enum {
+ /* The assembly depends on these exact flags. */
+ FLAG_RETURNS_SMST = 1 << (31-31), /* Used for FFI_SYSV small structs. */
+ FLAG_RETURNS_NOTHING = 1 << (31-30), /* These go in cr7 */
++#ifndef __NO_FPRS__
+ FLAG_RETURNS_FP = 1 << (31-29),
++#endif
+ FLAG_RETURNS_64BITS = 1 << (31-28),
+
+ FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */
+- FLAG_SYSV_SMST_R4 = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
+- structs. */
+- FLAG_SYSV_SMST_R3 = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
+- structs. */
+- /* Bits (31-24) through (31-19) store shift value for SMST */
+
+ FLAG_ARG_NEEDS_COPY = 1 << (31- 7),
++ FLAG_ARG_NEEDS_PSAVE = FLAG_ARG_NEEDS_COPY, /* Used by ELFv2 */
++#ifndef __NO_FPRS__
+ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
++#endif
+ FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
+ FLAG_RETVAL_REFERENCE = 1 << (31- 4)
+ };
+
+ /* About the SYSV ABI. */
+-unsigned int NUM_GPR_ARG_REGISTERS = 8;
++#define ASM_NEEDS_REGISTERS 4
++#define NUM_GPR_ARG_REGISTERS 8
+ #ifndef __NO_FPRS__
+-unsigned int NUM_FPR_ARG_REGISTERS = 8;
+-#else
+-unsigned int NUM_FPR_ARG_REGISTERS = 0;
++# define NUM_FPR_ARG_REGISTERS 8
+ #endif
+
+-enum { ASM_NEEDS_REGISTERS = 4 };
+-
+ /* ffi_prep_args_SYSV is called by the assembly routine once stack space
+ has been allocated for the function's arguments.
+
+ The stack layout we want looks like this:
+
+ | Return address from ffi_call_SYSV 4bytes | higher addresses
+ |--------------------------------------------|
+ | Previous backchain pointer 4 | stack pointer here
+@@ -108,100 +107,119 @@ ffi_prep_args_SYSV (extended_cif *ecif,
+ /* 'stacktop' points at the previous backchain pointer. */
+ valp stacktop;
+
+ /* 'gpr_base' points at the space for gpr3, and grows upwards as
+ we use GPR registers. */
+ valp gpr_base;
+ int intarg_count;
+
++#ifndef __NO_FPRS__
+ /* 'fpr_base' points at the space for fpr1, and grows upwards as
+ we use FPR registers. */
+ valp fpr_base;
+ int fparg_count;
++#endif
+
+ /* 'copy_space' grows down as we put structures in it. It should
+ stay 16-byte aligned. */
+ valp copy_space;
+
+ /* 'next_arg' grows up as we put parameters in it. */
+ valp next_arg;
+
+- int i, ii MAYBE_UNUSED;
++ int i;
+ ffi_type **ptr;
++#ifndef __NO_FPRS__
+ double double_tmp;
++#endif
+ union {
+ void **v;
+ char **c;
+ signed char **sc;
+ unsigned char **uc;
+ signed short **ss;
+ unsigned short **us;
+ unsigned int **ui;
+ long long **ll;
+ float **f;
+ double **d;
+ } p_argv;
+ size_t struct_copy_size;
+ unsigned gprvalue;
+
+- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT)
+- NUM_FPR_ARG_REGISTERS = 0;
+-
+ stacktop.c = (char *) stack + bytes;
+ gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
+ intarg_count = 0;
++#ifndef __NO_FPRS__
+ fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
+ fparg_count = 0;
+ copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
++#else
++ copy_space.c = gpr_base.c;
++#endif
+ next_arg.u = stack + 2;
+
+ /* Check that everything starts aligned properly. */
+- FFI_ASSERT (((unsigned) (char *) stack & 0xF) == 0);
+- FFI_ASSERT (((unsigned) copy_space.c & 0xF) == 0);
+- FFI_ASSERT (((unsigned) stacktop.c & 0xF) == 0);
++ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
++ FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
++ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
+ FFI_ASSERT ((bytes & 0xF) == 0);
+ FFI_ASSERT (copy_space.c >= next_arg.c);
+
+ /* Deal with return values that are actually pass-by-reference. */
+ if (flags & FLAG_RETVAL_REFERENCE)
+ {
+ *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue;
+ intarg_count++;
+ }
+
+ /* Now for the arguments. */
+ p_argv.v = ecif->avalue;
+ for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
+ i > 0;
+ i--, ptr++, p_argv.v++)
+ {
+- switch ((*ptr)->type)
+- {
++ unsigned short typenum = (*ptr)->type;
++
++ /* We may need to handle some values depending on ABI */
++ if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) {
++ if (typenum == FFI_TYPE_FLOAT)
++ typenum = FFI_TYPE_UINT32;
++ if (typenum == FFI_TYPE_DOUBLE)
++ typenum = FFI_TYPE_UINT64;
++ if (typenum == FFI_TYPE_LONGDOUBLE)
++ typenum = FFI_TYPE_UINT128;
++ } else if (ecif->cif->abi != FFI_LINUX) {
++#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
++ if (typenum == FFI_TYPE_LONGDOUBLE)
++ typenum = FFI_TYPE_STRUCT;
++#endif
++ }
++
++ /* Now test the translated value */
++ switch (typenum) {
++#ifndef __NO_FPRS__
+ case FFI_TYPE_FLOAT:
+ /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */
+- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT)
+- goto soft_float_prep;
+ double_tmp = **p_argv.f;
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
+ {
+ *next_arg.f = (float) double_tmp;
+ next_arg.u += 1;
+ intarg_count++;
+ }
+ else
+ *fpr_base.d++ = double_tmp;
+ fparg_count++;
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+ break;
+
+ case FFI_TYPE_DOUBLE:
+ /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */
+- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT)
+- goto soft_double_prep;
+ double_tmp = **p_argv.d;
+
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
+ {
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS
+ && intarg_count % 2 != 0)
+ {
+ intarg_count++;
+@@ -213,53 +231,16 @@ ffi_prep_args_SYSV (extended_cif *ecif,
+ else
+ *fpr_base.d++ = double_tmp;
+ fparg_count++;
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+ break;
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+- if ((ecif->cif->abi != FFI_LINUX)
+- && (ecif->cif->abi != FFI_LINUX_SOFT_FLOAT))
+- goto do_struct;
+- /* The soft float ABI for long doubles works like this,
+- a long double is passed in four consecutive gprs if available.
+- A maximum of 2 long doubles can be passed in gprs.
+- If we do not have 4 gprs left, the long double is passed on the
+- stack, 4-byte aligned. */
+- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT)
+- {
+- unsigned int int_tmp = (*p_argv.ui)[0];
+- if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3)
+- {
+- if (intarg_count < NUM_GPR_ARG_REGISTERS)
+- intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count;
+- *next_arg.u = int_tmp;
+- next_arg.u++;
+- for (ii = 1; ii < 4; ii++)
+- {
+- int_tmp = (*p_argv.ui)[ii];
+- *next_arg.u = int_tmp;
+- next_arg.u++;
+- }
+- }
+- else
+- {
+- *gpr_base.u++ = int_tmp;
+- for (ii = 1; ii < 4; ii++)
+- {
+- int_tmp = (*p_argv.ui)[ii];
+- *gpr_base.u++ = int_tmp;
+- }
+- }
+- intarg_count +=4;
+- }
+- else
+- {
+ double_tmp = (*p_argv.d)[0];
+
+ if (fparg_count >= NUM_FPR_ARG_REGISTERS - 1)
+ {
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS
+ && intarg_count % 2 != 0)
+ {
+ intarg_count++;
+@@ -275,23 +256,50 @@ ffi_prep_args_SYSV (extended_cif *ecif,
+ {
+ *fpr_base.d++ = double_tmp;
+ double_tmp = (*p_argv.d)[1];
+ *fpr_base.d++ = double_tmp;
+ }
+
+ fparg_count += 2;
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+- }
+ break;
+ #endif
++#endif /* have FPRs */
++
++ /*
++ * The soft float ABI for long doubles works like this, a long double
++ * is passed in four consecutive GPRs if available. A maximum of 2
++ * long doubles can be passed in gprs. If we do not have 4 GPRs
++ * left, the long double is passed on the stack, 4-byte aligned.
++ */
++ case FFI_TYPE_UINT128: {
++ unsigned int int_tmp = (*p_argv.ui)[0];
++ unsigned int ii;
++ if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3) {
++ if (intarg_count < NUM_GPR_ARG_REGISTERS)
++ intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count;
++ *(next_arg.u++) = int_tmp;
++ for (ii = 1; ii < 4; ii++) {
++ int_tmp = (*p_argv.ui)[ii];
++ *(next_arg.u++) = int_tmp;
++ }
++ } else {
++ *(gpr_base.u++) = int_tmp;
++ for (ii = 1; ii < 4; ii++) {
++ int_tmp = (*p_argv.ui)[ii];
++ *(gpr_base.u++) = int_tmp;
++ }
++ }
++ intarg_count += 4;
++ break;
++ }
+
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+- soft_double_prep:
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1)
+ intarg_count++;
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS)
+ {
+ if (intarg_count % 2 != 0)
+ {
+ intarg_count++;
+ next_arg.u++;
+@@ -314,19 +322,16 @@ ffi_prep_args_SYSV (extended_cif *ecif,
+ gpr_base.u++;
+ }
+ *gpr_base.ll++ = **p_argv.ll;
+ }
+ intarg_count += 2;
+ break;
+
+ case FFI_TYPE_STRUCT:
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- do_struct:
+-#endif
+ struct_copy_size = ((*ptr)->size + 15) & ~0xF;
+ copy_space.c -= struct_copy_size;
+ memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
+
+ gprvalue = (unsigned long) copy_space.c;
+
+ FFI_ASSERT (copy_space.c > next_arg.c);
+ FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
+@@ -344,45 +349,91 @@ ffi_prep_args_SYSV (extended_cif *ecif,
+ case FFI_TYPE_SINT16:
+ gprvalue = **p_argv.ss;
+ goto putgpr;
+
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_POINTER:
+- soft_float_prep:
+
+ gprvalue = **p_argv.ui;
+
+ putgpr:
+ if (intarg_count >= NUM_GPR_ARG_REGISTERS)
+ *next_arg.u++ = gprvalue;
+ else
+ *gpr_base.u++ = gprvalue;
+ intarg_count++;
+ break;
+ }
+ }
+
+ /* Check that we didn't overrun the stack... */
+ FFI_ASSERT (copy_space.c >= next_arg.c);
+ FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
++ /* The assert below is testing that the number of integer arguments agrees
++ with the number found in ffi_prep_cif_machdep(). However, intarg_count
++ is incremented whenever we place an FP arg on the stack, so account for
++ that before our assert test. */
++#ifndef __NO_FPRS__
++ if (fparg_count > NUM_FPR_ARG_REGISTERS)
++ intarg_count -= fparg_count - NUM_FPR_ARG_REGISTERS;
+ FFI_ASSERT (fpr_base.u
+ <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
++#endif
+ FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
+ }
+
+ /* About the LINUX64 ABI. */
+ enum {
+ NUM_GPR_ARG_REGISTERS64 = 8,
+ NUM_FPR_ARG_REGISTERS64 = 13
+ };
+ enum { ASM_NEEDS_REGISTERS64 = 4 };
+
++#if _CALL_ELF == 2
++static unsigned int
++discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
++{
++ switch (t->type)
++ {
++ case FFI_TYPE_FLOAT:
++ case FFI_TYPE_DOUBLE:
++ *elnum = 1;
++ return (int) t->type;
++
++ case FFI_TYPE_STRUCT:;
++ {
++ unsigned int base_elt = 0, total_elnum = 0;
++ ffi_type **el = t->elements;
++ while (*el)
++ {
++ unsigned int el_elt, el_elnum = 0;
++ el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
++ if (el_elt == 0
++ || (base_elt && base_elt != el_elt))
++ return 0;
++ base_elt = el_elt;
++ total_elnum += el_elnum;
++ if (total_elnum > 8)
++ return 0;
++ el++;
++ }
++ *elnum = total_elnum;
++ return base_elt;
++ }
++
++ default:
++ return 0;
++ }
++}
++#endif
++
++
+ /* ffi_prep_args64 is called by the assembly routine once stack space
+ has been allocated for the function's arguments.
+
+ The stack layout we want looks like this:
+
+ | Ret addr from ffi_call_LINUX64 8bytes | higher addresses
+ |--------------------------------------------|
+ | CR save area 8bytes |
+@@ -418,141 +469,216 @@ ffi_prep_args64 (extended_cif *ecif, uns
+ const unsigned long bytes = ecif->cif->bytes;
+ const unsigned long flags = ecif->cif->flags;
+
+ typedef union {
+ char *c;
+ unsigned long *ul;
+ float *f;
+ double *d;
++ size_t p;
+ } valp;
+
+ /* 'stacktop' points at the previous backchain pointer. */
+ valp stacktop;
+
+ /* 'next_arg' points at the space for gpr3, and grows upwards as
+ we use GPR registers, then continues at rest. */
+ valp gpr_base;
+ valp gpr_end;
+ valp rest;
+ valp next_arg;
+
+ /* 'fpr_base' points at the space for fpr3, and grows upwards as
+ we use FPR registers. */
+ valp fpr_base;
+- int fparg_count;
++ unsigned int fparg_count;
+
+- int i, words;
++ unsigned int i, words, nargs, nfixedargs;
+ ffi_type **ptr;
+ double double_tmp;
+ union {
+ void **v;
+ char **c;
+ signed char **sc;
+ unsigned char **uc;
+ signed short **ss;
+ unsigned short **us;
+ signed int **si;
+ unsigned int **ui;
+ unsigned long **ul;
+ float **f;
+ double **d;
+ } p_argv;
+ unsigned long gprvalue;
++#ifdef __STRUCT_PARM_ALIGN__
++ unsigned long align;
++#endif
+
+ stacktop.c = (char *) stack + bytes;
+ gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
+ gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
++#if _CALL_ELF == 2
++ rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
++#else
+ rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
++#endif
+ fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
+ fparg_count = 0;
+ next_arg.ul = gpr_base.ul;
+
+ /* Check that everything starts aligned properly. */
+ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
+ FFI_ASSERT ((bytes & 0xF) == 0);
+
+ /* Deal with return values that are actually pass-by-reference. */
+ if (flags & FLAG_RETVAL_REFERENCE)
+ *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
+
+ /* Now for the arguments. */
+ p_argv.v = ecif->avalue;
+- for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
+- i > 0;
+- i--, ptr++, p_argv.v++)
++ nargs = ecif->cif->nargs;
++ nfixedargs = ecif->cif->nfixedargs;
++ for (ptr = ecif->cif->arg_types, i = 0;
++ i < nargs;
++ i++, ptr++, p_argv.v++)
+ {
++ unsigned int elt, elnum;
++
+ switch ((*ptr)->type)
+ {
+ case FFI_TYPE_FLOAT:
+ double_tmp = **p_argv.f;
+- *next_arg.f = (float) double_tmp;
++ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
++ *fpr_base.d++ = double_tmp;
++ else
++ *next_arg.f = (float) double_tmp;
+ if (++next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
+- *fpr_base.d++ = double_tmp;
+ fparg_count++;
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+ break;
+
+ case FFI_TYPE_DOUBLE:
+ double_tmp = **p_argv.d;
+- *next_arg.d = double_tmp;
++ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
++ *fpr_base.d++ = double_tmp;
++ else
++ *next_arg.d = double_tmp;
+ if (++next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
+- *fpr_base.d++ = double_tmp;
+ fparg_count++;
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+ break;
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ double_tmp = (*p_argv.d)[0];
+- *next_arg.d = double_tmp;
++ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
++ *fpr_base.d++ = double_tmp;
++ else
++ *next_arg.d = double_tmp;
+ if (++next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
+- *fpr_base.d++ = double_tmp;
+ fparg_count++;
+ double_tmp = (*p_argv.d)[1];
+- *next_arg.d = double_tmp;
++ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
++ *fpr_base.d++ = double_tmp;
++ else
++ *next_arg.d = double_tmp;
+ if (++next_arg.ul == gpr_end.ul)
+ next_arg.ul = rest.ul;
+- if (fparg_count < NUM_FPR_ARG_REGISTERS64)
+- *fpr_base.d++ = double_tmp;
+ fparg_count++;
+ FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
+ FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+ break;
+ #endif
+
+ case FFI_TYPE_STRUCT:
+- words = ((*ptr)->size + 7) / 8;
+- if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
++#ifdef __STRUCT_PARM_ALIGN__
++ align = (*ptr)->alignment;
++ if (align > __STRUCT_PARM_ALIGN__)
++ align = __STRUCT_PARM_ALIGN__;
++ if (align > 1)
++ next_arg.p = ALIGN (next_arg.p, align);
++#endif
++ elt = 0;
++#if _CALL_ELF == 2
++ elt = discover_homogeneous_aggregate (*ptr, &elnum);
++#endif
++ if (elt)
+ {
+- size_t first = gpr_end.c - next_arg.c;
+- memcpy (next_arg.c, *p_argv.c, first);
+- memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
+- next_arg.c = rest.c + words * 8 - first;
++ union {
++ void *v;
++ float *f;
++ double *d;
++ } arg;
++
++ arg.v = *p_argv.v;
++ if (elt == FFI_TYPE_FLOAT)
++ {
++ do
++ {
++ double_tmp = *arg.f++;
++ if (fparg_count < NUM_FPR_ARG_REGISTERS64
++ && i < nfixedargs)
++ *fpr_base.d++ = double_tmp;
++ else
++ *next_arg.f = (float) double_tmp;
++ if (++next_arg.f == gpr_end.f)
++ next_arg.f = rest.f;
++ fparg_count++;
++ }
++ while (--elnum != 0);
++ if ((next_arg.p & 3) != 0)
++ {
++ if (++next_arg.f == gpr_end.f)
++ next_arg.f = rest.f;
++ }
++ }
++ else
++ do
++ {
++ double_tmp = *arg.d++;
++ if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
++ *fpr_base.d++ = double_tmp;
++ else
++ *next_arg.d = double_tmp;
++ if (++next_arg.d == gpr_end.d)
++ next_arg.d = rest.d;
++ fparg_count++;
++ }
++ while (--elnum != 0);
+ }
+ else
+ {
+- char *where = next_arg.c;
++ words = ((*ptr)->size + 7) / 8;
++ if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
++ {
++ size_t first = gpr_end.c - next_arg.c;
++ memcpy (next_arg.c, *p_argv.c, first);
++ memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
++ next_arg.c = rest.c + words * 8 - first;
++ }
++ else
++ {
++ char *where = next_arg.c;
+
+- /* Structures with size less than eight bytes are passed
+- left-padded. */
+- if ((*ptr)->size < 8)
+- where += 8 - (*ptr)->size;
+-
+- memcpy (where, *p_argv.c, (*ptr)->size);
+- next_arg.ul += words;
+- if (next_arg.ul == gpr_end.ul)
+- next_arg.ul = rest.ul;
++#ifndef __LITTLE_ENDIAN__
++ /* Structures with size less than eight bytes are passed
++ left-padded. */
++ if ((*ptr)->size < 8)
++ where += 8 - (*ptr)->size;
++#endif
++ memcpy (where, *p_argv.c, (*ptr)->size);
++ next_arg.ul += words;
++ if (next_arg.ul == gpr_end.ul)
++ next_arg.ul = rest.ul;
++ }
+ }
+ break;
+
+ case FFI_TYPE_UINT8:
+ gprvalue = **p_argv.uc;
+ goto putgpr;
+ case FFI_TYPE_SINT8:
+ gprvalue = **p_argv.sc;
+@@ -586,53 +712,55 @@ ffi_prep_args64 (extended_cif *ecif, uns
+ FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
+ || (next_arg.ul >= gpr_base.ul
+ && next_arg.ul <= gpr_base.ul + 4));
+ }
+
+
+
+ /* Perform machine dependent cif processing */
+-ffi_status
+-ffi_prep_cif_machdep (ffi_cif *cif)
++static ffi_status
++ffi_prep_cif_machdep_core (ffi_cif *cif)
+ {
+ /* All this is for the SYSV and LINUX64 ABI. */
+- int i;
+ ffi_type **ptr;
+ unsigned bytes;
+- int fparg_count = 0, intarg_count = 0;
+- unsigned flags = 0;
++ unsigned i, fparg_count = 0, intarg_count = 0;
++ unsigned flags = cif->flags;
+ unsigned struct_copy_size = 0;
+ unsigned type = cif->rtype->type;
+ unsigned size = cif->rtype->size;
+
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- NUM_FPR_ARG_REGISTERS = 0;
+-
++ /* The machine-independent calculation of cif->bytes doesn't work
++ for us. Redo the calculation. */
+ if (cif->abi != FFI_LINUX64)
+ {
+- /* All the machine-independent calculation of cif->bytes will be wrong.
+- Redo the calculation for SYSV. */
+-
+ /* Space for the frame pointer, callee's LR, and the asm's temp regs. */
+ bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
+
+ /* Space for the GPR registers. */
+ bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
+ }
+ else
+ {
+ /* 64-bit ABI. */
++#if _CALL_ELF == 2
++ /* Space for backchain, CR, LR, TOC and the asm's temp regs. */
++ bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
+
++ /* Space for the general registers. */
++ bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
++#else
+ /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
+ regs. */
+ bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
+
+ /* Space for the mandatory parm save area and general registers. */
+ bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
++#endif
+ }
+
+ /* Return value handling. The rules for SYSV are as follows:
+ - 32-bit (or less) integer values are returned in gpr3;
+ - Structures of size <= 4 bytes also returned in gpr3;
+ - 64-bit integer values and structures between 5 and 8 bytes are returned
+ in gpr3 and gpr4;
+ - Single/double FP values are returned in fpr1;
+@@ -641,71 +769,93 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+ - long doubles (if not equivalent to double) are returned in
+ fpr1,fpr2 for Linux and as for large structs for SysV.
+ For LINUX64:
+ - integer values in gpr3;
+ - Structures/Unions by reference;
+ - Single/double FP values in fpr1, long double in fpr1,fpr2.
+ - soft-float float/doubles are treated as UINT32/UINT64 respectivley.
+ - soft-float long doubles are returned in gpr3-gpr6. */
++ /* First translate for softfloat/nonlinux */
++ if (cif->abi == FFI_LINUX_SOFT_FLOAT)
++ {
++ if (type == FFI_TYPE_FLOAT)
++ type = FFI_TYPE_UINT32;
++ if (type == FFI_TYPE_DOUBLE)
++ type = FFI_TYPE_UINT64;
++ if (type == FFI_TYPE_LONGDOUBLE)
++ type = FFI_TYPE_UINT128;
++ }
++ else if (cif->abi != FFI_LINUX
++ && cif->abi != FFI_LINUX64)
++ {
++#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
++ if (type == FFI_TYPE_LONGDOUBLE)
++ type = FFI_TYPE_STRUCT;
++#endif
++ }
++
+ switch (type)
+ {
++#ifndef __NO_FPRS__
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+- if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64
+- && cif->abi != FFI_LINUX_SOFT_FLOAT)
+- goto byref;
+ flags |= FLAG_RETURNS_128BITS;
+ /* Fall through. */
+ #endif
+ case FFI_TYPE_DOUBLE:
+ flags |= FLAG_RETURNS_64BITS;
+ /* Fall through. */
+ case FFI_TYPE_FLOAT:
+- /* With FFI_LINUX_SOFT_FLOAT no fp registers are used. */
+- if (cif->abi != FFI_LINUX_SOFT_FLOAT)
+- flags |= FLAG_RETURNS_FP;
++ flags |= FLAG_RETURNS_FP;
+ break;
++#endif
+
++ case FFI_TYPE_UINT128:
++ flags |= FLAG_RETURNS_128BITS;
++ /* Fall through. */
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ flags |= FLAG_RETURNS_64BITS;
+ break;
+
+ case FFI_TYPE_STRUCT:
+- if (cif->abi == FFI_SYSV)
++ /*
++ * The final SYSV ABI says that structures smaller or equal 8 bytes
++ * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
++ * in memory.
++ *
++ * NOTE: The assembly code can safely assume that it just needs to
++ * store both r3 and r4 into a 8-byte word-aligned buffer, as
++ * we allocate a temporary buffer in ffi_call() if this flag is
++ * set.
++ */
++ if (cif->abi == FFI_SYSV && size <= 8)
+ {
+- /* The final SYSV ABI says that structures smaller or equal 8 bytes
+- are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+- in memory. */
+-
+- /* Treat structs with size <= 8 bytes. */
+- if (size <= 8)
++ flags |= FLAG_RETURNS_SMST;
++ break;
++ }
++#if _CALL_ELF == 2
++ if (cif->abi == FFI_LINUX64)
++ {
++ unsigned int elt, elnum;
++ elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
++ if (elt)
++ {
++ if (elt == FFI_TYPE_DOUBLE)
++ flags |= FLAG_RETURNS_64BITS;
++ flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
++ break;
++ }
++ if (size <= 16)
+ {
+ flags |= FLAG_RETURNS_SMST;
+- /* These structs are returned in r3. We pack the type and the
+- precalculated shift value (needed in the sysv.S) into flags.
+- The same applies for the structs returned in r3/r4. */
+- if (size <= 4)
+- {
+- flags |= FLAG_SYSV_SMST_R3;
+- flags |= 8 * (4 - size) << 8;
+- break;
+- }
+- /* These structs are returned in r3 and r4. See above. */
+- if (size <= 8)
+- {
+- flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
+- flags |= 8 * (8 - size) << 8;
+- break;
+- }
++ break;
+ }
+ }
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- byref:
+ #endif
+ intarg_count++;
+ flags |= FLAG_RETVAL_REFERENCE;
+ /* Fall through. */
+ case FFI_TYPE_VOID:
+ flags |= FLAG_RETURNS_NOTHING;
+ break;
+
+@@ -717,218 +867,334 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+ if (cif->abi != FFI_LINUX64)
+ /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
+ first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
+ goes on the stack. Structures and long doubles (if not equivalent
+ to double) are passed as a pointer to a copy of the structure.
+ Stuff on the stack needs to keep proper alignment. */
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ {
+- switch ((*ptr)->type)
+- {
++ unsigned short typenum = (*ptr)->type;
++
++ /* We may need to handle some values depending on ABI */
++ if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
++ if (typenum == FFI_TYPE_FLOAT)
++ typenum = FFI_TYPE_UINT32;
++ if (typenum == FFI_TYPE_DOUBLE)
++ typenum = FFI_TYPE_UINT64;
++ if (typenum == FFI_TYPE_LONGDOUBLE)
++ typenum = FFI_TYPE_UINT128;
++ } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
++#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
++ if (typenum == FFI_TYPE_LONGDOUBLE)
++ typenum = FFI_TYPE_STRUCT;
++#endif
++ }
++
++ switch (typenum) {
++#ifndef __NO_FPRS__
+ case FFI_TYPE_FLOAT:
+- /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- goto soft_float_cif;
+ fparg_count++;
+ /* floating singles are not 8-aligned on stack */
+ break;
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+- if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT)
+- goto do_struct;
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- {
+- if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3
+- || intarg_count < NUM_GPR_ARG_REGISTERS)
+- /* A long double in FFI_LINUX_SOFT_FLOAT can use only
+- a set of four consecutive gprs. If we have not enough,
+- we have to adjust the intarg_count value. */
+- intarg_count += NUM_GPR_ARG_REGISTERS - intarg_count;
+- intarg_count += 4;
+- break;
+- }
+- else
+- fparg_count++;
++ fparg_count++;
+ /* Fall thru */
+ #endif
+ case FFI_TYPE_DOUBLE:
+- /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- goto soft_double_cif;
+ fparg_count++;
+ /* If this FP arg is going on the stack, it must be
+ 8-byte-aligned. */
+ if (fparg_count > NUM_FPR_ARG_REGISTERS
+ && intarg_count >= NUM_GPR_ARG_REGISTERS
+ && intarg_count % 2 != 0)
+ intarg_count++;
+ break;
++#endif
++ case FFI_TYPE_UINT128:
++ /*
++ * A long double in FFI_LINUX_SOFT_FLOAT can use only a set
++ * of four consecutive gprs. If we do not have enough, we
++ * have to adjust the intarg_count value.
++ */
++ if (intarg_count >= NUM_GPR_ARG_REGISTERS - 3
++ && intarg_count < NUM_GPR_ARG_REGISTERS)
++ intarg_count = NUM_GPR_ARG_REGISTERS;
++ intarg_count += 4;
++ break;
+
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+- soft_double_cif:
+ /* 'long long' arguments are passed as two words, but
+ either both words must fit in registers or both go
+ on the stack. If they go on the stack, they must
+ be 8-byte-aligned.
+
+ Also, only certain register pairs can be used for
+ passing long long int -- specifically (r3,r4), (r5,r6),
+ (r7,r8), (r9,r10).
+ */
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1
+ || intarg_count % 2 != 0)
+ intarg_count++;
+ intarg_count += 2;
+ break;
+
+ case FFI_TYPE_STRUCT:
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- do_struct:
+-#endif
+ /* We must allocate space for a copy of these to enforce
+ pass-by-value. Pad the space up to a multiple of 16
+ bytes (the maximum alignment required for anything under
+ the SYSV ABI). */
+ struct_copy_size += ((*ptr)->size + 15) & ~0xF;
+ /* Fall through (allocate space for the pointer). */
+
+- default:
+- soft_float_cif:
++ case FFI_TYPE_POINTER:
++ case FFI_TYPE_INT:
++ case FFI_TYPE_UINT32:
++ case FFI_TYPE_SINT32:
++ case FFI_TYPE_UINT16:
++ case FFI_TYPE_SINT16:
++ case FFI_TYPE_UINT8:
++ case FFI_TYPE_SINT8:
+ /* Everything else is passed as a 4-byte word in a GPR, either
+ the object itself or a pointer to it. */
+ intarg_count++;
+ break;
++ default:
++ FFI_ASSERT (0);
+ }
+ }
+ else
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ {
++ unsigned int elt, elnum;
++#ifdef __STRUCT_PARM_ALIGN__
++ unsigned int align;
++#endif
++
+ switch ((*ptr)->type)
+ {
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- intarg_count += 4;
+- else
+- {
+- fparg_count += 2;
+- intarg_count += 2;
+- }
++ fparg_count += 2;
++ intarg_count += 2;
++ if (fparg_count > NUM_FPR_ARG_REGISTERS)
++ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
+ #endif
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ fparg_count++;
+ intarg_count++;
++ if (fparg_count > NUM_FPR_ARG_REGISTERS)
++ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
+
+ case FFI_TYPE_STRUCT:
++#ifdef __STRUCT_PARM_ALIGN__
++ align = (*ptr)->alignment;
++ if (align > __STRUCT_PARM_ALIGN__)
++ align = __STRUCT_PARM_ALIGN__;
++ align = align / 8;
++ if (align > 1)
++ intarg_count = ALIGN (intarg_count, align);
++#endif
+ intarg_count += ((*ptr)->size + 7) / 8;
++ elt = 0;
++#if _CALL_ELF == 2
++ elt = discover_homogeneous_aggregate (*ptr, &elnum);
++#endif
++ if (elt)
++ {
++ fparg_count += elnum;
++ if (fparg_count > NUM_FPR_ARG_REGISTERS)
++ flags |= FLAG_ARG_NEEDS_PSAVE;
++ }
++ else
++ {
++ if (intarg_count > NUM_GPR_ARG_REGISTERS)
++ flags |= FLAG_ARG_NEEDS_PSAVE;
++ }
+ break;
+
+- default:
++ case FFI_TYPE_POINTER:
++ case FFI_TYPE_UINT64:
++ case FFI_TYPE_SINT64:
++ case FFI_TYPE_INT:
++ case FFI_TYPE_UINT32:
++ case FFI_TYPE_SINT32:
++ case FFI_TYPE_UINT16:
++ case FFI_TYPE_SINT16:
++ case FFI_TYPE_UINT8:
++ case FFI_TYPE_SINT8:
+ /* Everything else is passed as a 8-byte word in a GPR, either
+ the object itself or a pointer to it. */
+ intarg_count++;
++ if (intarg_count > NUM_GPR_ARG_REGISTERS)
++ flags |= FLAG_ARG_NEEDS_PSAVE;
+ break;
++ default:
++ FFI_ASSERT (0);
+ }
+ }
+
++#ifndef __NO_FPRS__
+ if (fparg_count != 0)
+ flags |= FLAG_FP_ARGUMENTS;
++#endif
+ if (intarg_count > 4)
+ flags |= FLAG_4_GPR_ARGUMENTS;
+ if (struct_copy_size != 0)
+ flags |= FLAG_ARG_NEEDS_COPY;
+
+ if (cif->abi != FFI_LINUX64)
+ {
++#ifndef __NO_FPRS__
+ /* Space for the FPR registers, if needed. */
+ if (fparg_count != 0)
+ bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
++#endif
+
+ /* Stack space. */
+ if (intarg_count > NUM_GPR_ARG_REGISTERS)
+ bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int);
++#ifndef __NO_FPRS__
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
+ bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double);
++#endif
+ }
+ else
+ {
++#ifndef __NO_FPRS__
+ /* Space for the FPR registers, if needed. */
+ if (fparg_count != 0)
+ bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
++#endif
+
+ /* Stack space. */
++#if _CALL_ELF == 2
++ if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
++ bytes += intarg_count * sizeof (long);
++#else
+ if (intarg_count > NUM_GPR_ARG_REGISTERS64)
+ bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
++#endif
+ }
+
+ /* The stack space allocated needs to be a multiple of 16 bytes. */
+ bytes = (bytes + 15) & ~0xF;
+
+ /* Add in the space for the copied structures. */
+ bytes += struct_copy_size;
+
+ cif->flags = flags;
+ cif->bytes = bytes;
+
+ return FFI_OK;
+ }
+
++ffi_status
++ffi_prep_cif_machdep (ffi_cif *cif)
++{
++ cif->nfixedargs = cif->nargs;
++ return ffi_prep_cif_machdep_core (cif);
++}
++
++ffi_status
++ffi_prep_cif_machdep_var (ffi_cif *cif,
++ unsigned int nfixedargs,
++ unsigned int ntotalargs MAYBE_UNUSED)
++{
++ cif->nfixedargs = nfixedargs;
++#if _CALL_ELF == 2
++ if (cif->abi == FFI_LINUX64)
++ cif->flags |= FLAG_ARG_NEEDS_PSAVE;
++#endif
++ return ffi_prep_cif_machdep_core (cif);
++}
++
+ extern void ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *,
+ void (*fn)(void));
+ extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long,
+ unsigned long, unsigned long *,
+ void (*fn)(void));
+
+ void
+ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ {
++ /*
++ * The final SYSV ABI says that structures smaller or equal 8 bytes
++ * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
++ * in memory.
++ *
++ * We bounce-buffer SYSV small struct return values so that sysv.S
++ * can write r3 and r4 to memory without worrying about struct size.
++ *
++ * For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
++ * for similar reasons.
++ */
++ unsigned long smst_buffer[8];
+ extended_cif ecif;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+- /* If the return value is a struct and we don't have a return */
+- /* value address then we need to make one */
+-
+- if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT))
+- {
+- ecif.rvalue = alloca(cif->rtype->size);
+- }
+- else
+- ecif.rvalue = rvalue;
+-
++ ecif.rvalue = rvalue;
++ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
++ ecif.rvalue = smst_buffer;
++ /* Ensure that we have a valid struct return value.
++ FIXME: Isn't this just papering over a user problem? */
++ else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT)
++ ecif.rvalue = alloca (cif->rtype->size);
+
+ switch (cif->abi)
+ {
+ #ifndef POWERPC64
++# ifndef __NO_FPRS__
+ case FFI_SYSV:
+ case FFI_GCC_SYSV:
+ case FFI_LINUX:
++# endif
+ case FFI_LINUX_SOFT_FLOAT:
+ ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn);
+ break;
+ #else
+ case FFI_LINUX64:
+ ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn);
+ break;
+ #endif
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
++
++ /* Check for a bounce-buffered return value */
++ if (rvalue && ecif.rvalue == smst_buffer)
++ {
++ unsigned int rsize = cif->rtype->size;
++#ifndef __LITTLE_ENDIAN__
++ /* The SYSV ABI returns a structure of up to 4 bytes in size
++ left-padded in r3. */
++ if (cif->abi == FFI_SYSV && rsize <= 4)
++ memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize);
++ /* The SYSV ABI returns a structure of up to 8 bytes in size
++ left-padded in r3/r4, and the ELFv2 ABI similarly returns a
++ structure of up to 8 bytes in size left-padded in r3. */
++ else if (rsize <= 8)
++ memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
++ else
++#endif
++ memcpy (rvalue, smst_buffer, rsize);
++ }
+ }
+
+
+-#ifndef POWERPC64
++#if !defined POWERPC64 || _CALL_ELF == 2
+ #define MIN_CACHE_LINE_SIZE 8
+
+ static void
+ flush_icache (char *wraddr, char *xaddr, int size)
+ {
+ int i;
+ for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
+ __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
+@@ -942,26 +1208,48 @@ flush_icache (char *wraddr, char *xaddr,
+ ffi_status
+ ffi_prep_closure_loc (ffi_closure *closure,
+ ffi_cif *cif,
+ void (*fun) (ffi_cif *, void *, void **, void *),
+ void *user_data,
+ void *codeloc)
+ {
+ #ifdef POWERPC64
++# if _CALL_ELF == 2
++ unsigned int *tramp = (unsigned int *) &closure->tramp[0];
++
++ if (cif->abi != FFI_LINUX64)
++ return FFI_BAD_ABI;
++
++ tramp[0] = 0xe96c0018; /* 0: ld 11,2f-0b(12) */
++ tramp[1] = 0xe98c0010; /* ld 12,1f-0b(12) */
++ tramp[2] = 0x7d8903a6; /* mtctr 12 */
++ tramp[3] = 0x4e800420; /* bctr */
++ /* 1: .quad function_addr */
++ /* 2: .quad context */
++ *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
++ *(void **) &tramp[6] = codeloc;
++ flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
++# else
+ void **tramp = (void **) &closure->tramp[0];
+
+- FFI_ASSERT (cif->abi == FFI_LINUX64);
++ if (cif->abi != FFI_LINUX64)
++ return FFI_BAD_ABI;
+ /* Copy function address and TOC from ffi_closure_LINUX64. */
+ memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
+ tramp[2] = codeloc;
++# endif
+ #else
+ unsigned int *tramp;
+
+- FFI_ASSERT (cif->abi == FFI_GCC_SYSV || cif->abi == FFI_SYSV);
++ if (! (cif->abi == FFI_GCC_SYSV
++ || cif->abi == FFI_SYSV
++ || cif->abi == FFI_LINUX
++ || cif->abi == FFI_LINUX_SOFT_FLOAT))
++ return FFI_BAD_ABI;
+
+ tramp = (unsigned int *) &closure->tramp[0];
+ tramp[0] = 0x7c0802a6; /* mflr r0 */
+ tramp[1] = 0x4800000d; /* bl 10 <trampoline_initial+0x10> */
+ tramp[4] = 0x7d6802a6; /* mflr r11 */
+ tramp[5] = 0x7c0803a6; /* mtlr r0 */
+ tramp[6] = 0x800b0000; /* lwz r0,0(r11) */
+ tramp[7] = 0x816b0004; /* lwz r11,4(r11) */
+@@ -1006,110 +1294,215 @@ ffi_closure_helper_SYSV (ffi_closure *cl
+ /* rvalue is the pointer to space for return value in closure assembly */
+ /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
+ /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV */
+ /* pst is the pointer to outgoing parameter stack in original caller */
+
+ void ** avalue;
+ ffi_type ** arg_types;
+ long i, avn;
+- long nf; /* number of floating registers already used */
+- long ng; /* number of general registers already used */
+- ffi_cif * cif;
+- double temp;
+- unsigned size;
++#ifndef __NO_FPRS__
++ long nf = 0; /* number of floating registers already used */
++#endif
++ long ng = 0; /* number of general registers already used */
+
+- cif = closure->cif;
++ ffi_cif *cif = closure->cif;
++ unsigned size = cif->rtype->size;
++ unsigned short rtypenum = cif->rtype->type;
++
+ avalue = alloca (cif->nargs * sizeof (void *));
+- size = cif->rtype->size;
+
+- nf = 0;
+- ng = 0;
++ /* First translate for softfloat/nonlinux */
++ if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
++ if (rtypenum == FFI_TYPE_FLOAT)
++ rtypenum = FFI_TYPE_UINT32;
++ if (rtypenum == FFI_TYPE_DOUBLE)
++ rtypenum = FFI_TYPE_UINT64;
++ if (rtypenum == FFI_TYPE_LONGDOUBLE)
++ rtypenum = FFI_TYPE_UINT128;
++ } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
++#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
++ if (rtypenum == FFI_TYPE_LONGDOUBLE)
++ rtypenum = FFI_TYPE_STRUCT;
++#endif
++ }
++
+
+ /* Copy the caller's structure return value address so that the closure
+ returns the data directly to the caller.
+ For FFI_SYSV the result is passed in r3/r4 if the struct size is less
+ or equal 8 bytes. */
+-
+- if ((cif->rtype->type == FFI_TYPE_STRUCT
+- && !((cif->abi == FFI_SYSV) && (size <= 8)))
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- || (cif->rtype->type == FFI_TYPE_LONGDOUBLE
+- && cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT)
+-#endif
+- )
+- {
++ if (rtypenum == FFI_TYPE_STRUCT && ((cif->abi != FFI_SYSV) || (size > 8))) {
+ rvalue = (void *) *pgr;
+ ng++;
+ pgr++;
+ }
+
+ i = 0;
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ /* Grab the addresses of the arguments from the stack frame. */
+- while (i < avn)
+- {
+- switch (arg_types[i]->type)
+- {
++ while (i < avn) {
++ unsigned short typenum = arg_types[i]->type;
++
++ /* We may need to handle some values depending on ABI */
++ if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
++ if (typenum == FFI_TYPE_FLOAT)
++ typenum = FFI_TYPE_UINT32;
++ if (typenum == FFI_TYPE_DOUBLE)
++ typenum = FFI_TYPE_UINT64;
++ if (typenum == FFI_TYPE_LONGDOUBLE)
++ typenum = FFI_TYPE_UINT128;
++ } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
++#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
++ if (typenum == FFI_TYPE_LONGDOUBLE)
++ typenum = FFI_TYPE_STRUCT;
++#endif
++ }
++
++ switch (typenum) {
++#ifndef __NO_FPRS__
++ case FFI_TYPE_FLOAT:
++ /* unfortunately float values are stored as doubles
++ * in the ffi_closure_SYSV code (since we don't check
++ * the type in that routine).
++ */
++
++ /* there are 8 64bit floating point registers */
++
++ if (nf < 8)
++ {
++ double temp = pfr->d;
++ pfr->f = (float) temp;
++ avalue[i] = pfr;
++ nf++;
++ pfr++;
++ }
++ else
++ {
++ /* FIXME? here we are really changing the values
++ * stored in the original calling routines outgoing
++ * parameter stack. This is probably a really
++ * naughty thing to do but...
++ */
++ avalue[i] = pst;
++ pst += 1;
++ }
++ break;
++
++ case FFI_TYPE_DOUBLE:
++ /* On the outgoing stack all values are aligned to 8 */
++ /* there are 8 64bit floating point registers */
++
++ if (nf < 8)
++ {
++ avalue[i] = pfr;
++ nf++;
++ pfr++;
++ }
++ else
++ {
++ if (((long) pst) & 4)
++ pst++;
++ avalue[i] = pst;
++ pst += 2;
++ }
++ break;
++
++#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
++ case FFI_TYPE_LONGDOUBLE:
++ if (nf < 7)
++ {
++ avalue[i] = pfr;
++ pfr += 2;
++ nf += 2;
++ }
++ else
++ {
++ if (((long) pst) & 4)
++ pst++;
++ avalue[i] = pst;
++ pst += 4;
++ nf = 8;
++ }
++ break;
++#endif
++#endif /* have FPRS */
++
++ case FFI_TYPE_UINT128:
++ /*
++ * Test if for the whole long double, 4 gprs are available.
++ * otherwise the stuff ends up on the stack.
++ */
++ if (ng < 5) {
++ avalue[i] = pgr;
++ pgr += 4;
++ ng += 4;
++ } else {
++ avalue[i] = pst;
++ pst += 4;
++ ng = 8+4;
++ }
++ break;
++
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
++#ifndef __LITTLE_ENDIAN__
+ /* there are 8 gpr registers used to pass values */
+ if (ng < 8)
+ {
+ avalue[i] = (char *) pgr + 3;
+ ng++;
+ pgr++;
+ }
+ else
+ {
+ avalue[i] = (char *) pst + 3;
+ pst++;
+ }
+ break;
++#endif
+
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
++#ifndef __LITTLE_ENDIAN__
+ /* there are 8 gpr registers used to pass values */
+ if (ng < 8)
+ {
+ avalue[i] = (char *) pgr + 2;
+ ng++;
+ pgr++;
+ }
+ else
+ {
+ avalue[i] = (char *) pst + 2;
+ pst++;
+ }
+ break;
++#endif
+
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_POINTER:
+- soft_float_closure:
+ /* there are 8 gpr registers used to pass values */
+ if (ng < 8)
+ {
+ avalue[i] = pgr;
+ ng++;
+ pgr++;
+ }
+ else
+ {
+ avalue[i] = pst;
+ pst++;
+ }
+ break;
+
+ case FFI_TYPE_STRUCT:
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- do_struct:
+-#endif
+ /* Structs are passed by reference. The address will appear in a
+ gpr if it is one of the first 8 arguments. */
+ if (ng < 8)
+ {
+ avalue[i] = (void *) *pgr;
+ ng++;
+ pgr++;
+ }
+@@ -1117,17 +1510,16 @@ ffi_closure_helper_SYSV (ffi_closure *cl
+ {
+ avalue[i] = (void *) *pst;
+ pst++;
+ }
+ break;
+
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+- soft_double_closure:
+ /* passing long long ints are complex, they must
+ * be passed in suitable register pairs such as
+ * (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
+ * and if the entire pair aren't available then the outgoing
+ * parameter stack is used for both but an alignment of 8
+ * must will be kept. So we must either look in pgr
+ * or pst to find the correct address for this type
+ * of parameter.
+@@ -1149,277 +1541,239 @@ ffi_closure_helper_SYSV (ffi_closure *cl
+ if (((long) pst) & 4)
+ pst++;
+ avalue[i] = pst;
+ pst += 2;
+ ng = 8;
+ }
+ break;
+
+- case FFI_TYPE_FLOAT:
+- /* With FFI_LINUX_SOFT_FLOAT floats are handled like UINT32. */
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- goto soft_float_closure;
+- /* unfortunately float values are stored as doubles
+- * in the ffi_closure_SYSV code (since we don't check
+- * the type in that routine).
+- */
+-
+- /* there are 8 64bit floating point registers */
+-
+- if (nf < 8)
+- {
+- temp = pfr->d;
+- pfr->f = (float) temp;
+- avalue[i] = pfr;
+- nf++;
+- pfr++;
+- }
+- else
+- {
+- /* FIXME? here we are really changing the values
+- * stored in the original calling routines outgoing
+- * parameter stack. This is probably a really
+- * naughty thing to do but...
+- */
+- avalue[i] = pst;
+- pst += 1;
+- }
+- break;
+-
+- case FFI_TYPE_DOUBLE:
+- /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- goto soft_double_closure;
+- /* On the outgoing stack all values are aligned to 8 */
+- /* there are 8 64bit floating point registers */
+-
+- if (nf < 8)
+- {
+- avalue[i] = pfr;
+- nf++;
+- pfr++;
+- }
+- else
+- {
+- if (((long) pst) & 4)
+- pst++;
+- avalue[i] = pst;
+- pst += 2;
+- }
+- break;
+-
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- case FFI_TYPE_LONGDOUBLE:
+- if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT)
+- goto do_struct;
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- { /* Test if for the whole long double, 4 gprs are available.
+- otherwise the stuff ends up on the stack. */
+- if (ng < 5)
+- {
+- avalue[i] = pgr;
+- pgr += 4;
+- ng += 4;
+- }
+- else
+- {
+- avalue[i] = pst;
+- pst += 4;
+- ng = 8;
+- }
+- break;
+- }
+- if (nf < 7)
+- {
+- avalue[i] = pfr;
+- pfr += 2;
+- nf += 2;
+- }
+- else
+- {
+- if (((long) pst) & 4)
+- pst++;
+- avalue[i] = pst;
+- pst += 4;
+- nf = 8;
+- }
+- break;
+-#endif
+-
+ default:
+- FFI_ASSERT (0);
++ FFI_ASSERT (0);
+ }
+
+ i++;
+ }
+
+
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+ /* Tell ffi_closure_SYSV how to perform return type promotions.
+ Because the FFI_SYSV ABI returns the structures <= 8 bytes in r3/r4
+ we have to tell ffi_closure_SYSV how to treat them. We combine the base
+ type FFI_SYSV_TYPE_SMALL_STRUCT - 1 with the size of the struct.
+ So a one byte struct gets the return type 16. Return type 1 to 15 are
+ already used and we never have a struct with size zero. That is the reason
+ for the subtraction of 1. See the comment in ffitarget.h about ordering.
+ */
+- if (cif->abi == FFI_SYSV && cif->rtype->type == FFI_TYPE_STRUCT
+- && size <= 8)
++ if (cif->abi == FFI_SYSV && rtypenum == FFI_TYPE_STRUCT && size <= 8)
+ return (FFI_SYSV_TYPE_SMALL_STRUCT - 1) + size;
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- else if (cif->rtype->type == FFI_TYPE_LONGDOUBLE
+- && cif->abi != FFI_LINUX && cif->abi != FFI_LINUX_SOFT_FLOAT)
+- return FFI_TYPE_STRUCT;
+-#endif
+- /* With FFI_LINUX_SOFT_FLOAT floats and doubles are handled like UINT32
+- respectivley UINT64. */
+- if (cif->abi == FFI_LINUX_SOFT_FLOAT)
+- {
+- switch (cif->rtype->type)
+- {
+- case FFI_TYPE_FLOAT:
+- return FFI_TYPE_UINT32;
+- break;
+- case FFI_TYPE_DOUBLE:
+- return FFI_TYPE_UINT64;
+- break;
+-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+- case FFI_TYPE_LONGDOUBLE:
+- return FFI_TYPE_UINT128;
+- break;
+-#endif
+- default:
+- return cif->rtype->type;
+- }
+- }
+- else
+- {
+- return cif->rtype->type;
+- }
++ return rtypenum;
+ }
+
+ int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *,
+ unsigned long *, ffi_dblfl *);
+
+ int FFI_HIDDEN
+ ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue,
+ unsigned long *pst, ffi_dblfl *pfr)
+ {
+ /* rvalue is the pointer to space for return value in closure assembly */
+ /* pst is the pointer to parameter save area
+ (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
+ /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
+
+ void **avalue;
+ ffi_type **arg_types;
+- long i, avn;
++ unsigned long i, avn, nfixedargs;
+ ffi_cif *cif;
+ ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
++#ifdef __STRUCT_PARM_ALIGN__
++ unsigned long align;
++#endif
+
+ cif = closure->cif;
+ avalue = alloca (cif->nargs * sizeof (void *));
+
+- /* Copy the caller's structure return value address so that the closure
+- returns the data directly to the caller. */
+- if (cif->rtype->type == FFI_TYPE_STRUCT)
++ /* Copy the caller's structure return value address so that the
++ closure returns the data directly to the caller. */
++ if (cif->rtype->type == FFI_TYPE_STRUCT
++ && (cif->flags & FLAG_RETURNS_SMST) == 0)
+ {
+ rvalue = (void *) *pst;
+ pst++;
+ }
+
+ i = 0;
+ avn = cif->nargs;
++ nfixedargs = cif->nfixedargs;
+ arg_types = cif->arg_types;
+
+ /* Grab the addresses of the arguments from the stack frame. */
+ while (i < avn)
+ {
++ unsigned int elt, elnum;
++
+ switch (arg_types[i]->type)
+ {
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
++#ifndef __LITTLE_ENDIAN__
+ avalue[i] = (char *) pst + 7;
+ pst++;
+ break;
++#endif
+
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
++#ifndef __LITTLE_ENDIAN__
+ avalue[i] = (char *) pst + 6;
+ pst++;
+ break;
++#endif
+
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
++#ifndef __LITTLE_ENDIAN__
+ avalue[i] = (char *) pst + 4;
+ pst++;
+ break;
++#endif
+
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_POINTER:
+ avalue[i] = pst;
+ pst++;
+ break;
+
+ case FFI_TYPE_STRUCT:
+- /* Structures with size less than eight bytes are passed
+- left-padded. */
+- if (arg_types[i]->size < 8)
+- avalue[i] = (char *) pst + 8 - arg_types[i]->size;
++#ifdef __STRUCT_PARM_ALIGN__
++ align = arg_types[i]->alignment;
++ if (align > __STRUCT_PARM_ALIGN__)
++ align = __STRUCT_PARM_ALIGN__;
++ if (align > 1)
++ pst = (unsigned long *) ALIGN ((size_t) pst, align);
++#endif
++ elt = 0;
++#if _CALL_ELF == 2
++ elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
++#endif
++ if (elt)
++ {
++ union {
++ void *v;
++ unsigned long *ul;
++ float *f;
++ double *d;
++ size_t p;
++ } to, from;
++
++ /* Repackage the aggregate from its parts. The
++ aggregate size is not greater than the space taken by
++ the registers so store back to the register/parameter
++ save arrays. */
++ if (pfr + elnum <= end_pfr)
++ to.v = pfr;
++ else
++ to.v = pst;
++
++ avalue[i] = to.v;
++ from.ul = pst;
++ if (elt == FFI_TYPE_FLOAT)
++ {
++ do
++ {
++ if (pfr < end_pfr && i < nfixedargs)
++ {
++ *to.f = (float) pfr->d;
++ pfr++;
++ }
++ else
++ *to.f = *from.f;
++ to.f++;
++ from.f++;
++ }
++ while (--elnum != 0);
++ }
++ else
++ {
++ do
++ {
++ if (pfr < end_pfr && i < nfixedargs)
++ {
++ *to.d = pfr->d;
++ pfr++;
++ }
++ else
++ *to.d = *from.d;
++ to.d++;
++ from.d++;
++ }
++ while (--elnum != 0);
++ }
++ }
+ else
+- avalue[i] = pst;
++ {
++#ifndef __LITTLE_ENDIAN__
++ /* Structures with size less than eight bytes are passed
++ left-padded. */
++ if (arg_types[i]->size < 8)
++ avalue[i] = (char *) pst + 8 - arg_types[i]->size;
++ else
++#endif
++ avalue[i] = pst;
++ }
+ pst += (arg_types[i]->size + 7) / 8;
+ break;
+
+ case FFI_TYPE_FLOAT:
+ /* unfortunately float values are stored as doubles
+ * in the ffi_closure_LINUX64 code (since we don't check
+ * the type in that routine).
+ */
+
+ /* there are 13 64bit floating point registers */
+
+- if (pfr < end_pfr)
++ if (pfr < end_pfr && i < nfixedargs)
+ {
+ double temp = pfr->d;
+ pfr->f = (float) temp;
+ avalue[i] = pfr;
+ pfr++;
+ }
+ else
+ avalue[i] = pst;
+ pst++;
+ break;
+
+ case FFI_TYPE_DOUBLE:
+ /* On the outgoing stack all values are aligned to 8 */
+ /* there are 13 64bit floating point registers */
+
+- if (pfr < end_pfr)
++ if (pfr < end_pfr && i < nfixedargs)
+ {
+ avalue[i] = pfr;
+ pfr++;
+ }
+ else
+ avalue[i] = pst;
+ pst++;
+ break;
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+- if (pfr + 1 < end_pfr)
++ if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
+ {
+ avalue[i] = pfr;
+ pfr += 2;
+ }
+ else
+ {
+- if (pfr < end_pfr)
++ if (pfr < end_pfr && i < nfixedargs)
+ {
+ /* Passed partly in f13 and partly on the stack.
+ Move it all to the stack. */
+ *pst = *(unsigned long *) pfr;
+ pfr++;
+ }
+ avalue[i] = pst;
+ }
+@@ -1433,10 +1787,19 @@ ffi_closure_helper_LINUX64 (ffi_closure
+
+ i++;
+ }
+
+
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+ /* Tell ffi_closure_LINUX64 how to perform return type promotions. */
++ if ((cif->flags & FLAG_RETURNS_SMST) != 0)
++ {
++ if ((cif->flags & FLAG_RETURNS_FP) == 0)
++ return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
++ else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
++ return FFI_V2_TYPE_DOUBLE_HOMOG;
++ else
++ return FFI_V2_TYPE_FLOAT_HOMOG;
++ }
+ return cif->rtype->type;
+ }
+diff --git a/js/src/ctypes/libffi/src/powerpc/ffi_darwin.c b/js/src/ctypes/libffi/src/powerpc/ffi_darwin.c
+--- a/js/src/ctypes/libffi/src/powerpc/ffi_darwin.c
++++ b/js/src/ctypes/libffi/src/powerpc/ffi_darwin.c
+@@ -1,14 +1,14 @@
+ /* -----------------------------------------------------------------------
+ ffi_darwin.c
+
+ Copyright (C) 1998 Geoffrey Keating
+ Copyright (C) 2001 John Hornkvist
+- Copyright (C) 2002, 2006, 2007, 2009 Free Software Foundation, Inc.
++ Copyright (C) 2002, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+ FFI support for Darwin and AIX.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+@@ -30,87 +30,112 @@
+ #include <ffi.h>
+ #include <ffi_common.h>
+
+ #include <stdlib.h>
+
+ extern void ffi_closure_ASM (void);
+
+ enum {
+- /* The assembly depends on these exact flags. */
+- FLAG_RETURNS_NOTHING = 1 << (31-30), /* These go in cr7 */
+- FLAG_RETURNS_FP = 1 << (31-29),
+- FLAG_RETURNS_64BITS = 1 << (31-28),
+- FLAG_RETURNS_128BITS = 1 << (31-31),
++ /* The assembly depends on these exact flags.
++ For Darwin64 (when FLAG_RETURNS_STRUCT is set):
++ FLAG_RETURNS_FP indicates that the structure embeds FP data.
++ FLAG_RETURNS_128BITS signals a special struct size that is not
++ expanded for float content. */
++ FLAG_RETURNS_128BITS = 1 << (31-31), /* These go in cr7 */
++ FLAG_RETURNS_NOTHING = 1 << (31-30),
++ FLAG_RETURNS_FP = 1 << (31-29),
++ FLAG_RETURNS_64BITS = 1 << (31-28),
++
++ FLAG_RETURNS_STRUCT = 1 << (31-27), /* This goes in cr6 */
+
+ FLAG_ARG_NEEDS_COPY = 1 << (31- 7),
+ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
+ FLAG_4_GPR_ARGUMENTS = 1 << (31- 5),
+ FLAG_RETVAL_REFERENCE = 1 << (31- 4)
+ };
+
+ /* About the DARWIN ABI. */
+ enum {
+ NUM_GPR_ARG_REGISTERS = 8,
+- NUM_FPR_ARG_REGISTERS = 13
++ NUM_FPR_ARG_REGISTERS = 13,
++ LINKAGE_AREA_GPRS = 6
+ };
+-enum { ASM_NEEDS_REGISTERS = 4 };
++
++enum { ASM_NEEDS_REGISTERS = 4 }; /* r28-r31 */
+
+ /* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments.
++
++ m32/m64
+
+ The stack layout we want looks like this:
+
+ | Return address from ffi_call_DARWIN | higher addresses
+ |--------------------------------------------|
+- | Previous backchain pointer 4 | stack pointer here
++ | Previous backchain pointer 4/8 | stack pointer here
+ |--------------------------------------------|<+ <<< on entry to
+- | Saved r28-r31 4*4 | | ffi_call_DARWIN
++ | ASM_NEEDS_REGISTERS=r28-r31 4*(4/8) | | ffi_call_DARWIN
+ |--------------------------------------------| |
+- | Parameters (at least 8*4=32) | |
++ | When we have any FP activity... the | |
++ | FPRs occupy NUM_FPR_ARG_REGISTERS slots | |
++ | here fp13 .. fp1 from high to low addr. | |
++ ~ ~ ~
++ | Parameters (at least 8*4/8=32/64) | | NUM_GPR_ARG_REGISTERS
+ |--------------------------------------------| |
+- | Space for GPR2 4 | |
++ | TOC=R2 (AIX) Reserved (Darwin) 4/8 | |
+ |--------------------------------------------| | stack |
+- | Reserved 2*4 | | grows |
++ | Reserved 2*4/8 | | grows |
+ |--------------------------------------------| | down V
+- | Space for callee's LR 4 | |
++ | Space for callee's LR 4/8 | |
+ |--------------------------------------------| | lower addresses
+- | Saved CR 4 | |
++ | Saved CR [low word for m64] 4/8 | |
+ |--------------------------------------------| | stack pointer here
+- | Current backchain pointer 4 |-/ during
++ | Current backchain pointer 4/8 |-/ during
+ |--------------------------------------------| <<< ffi_call_DARWIN
+
+ */
+
++#if defined(POWERPC_DARWIN64)
++static void
++darwin64_pass_struct_by_value
++ (ffi_type *, char *, unsigned, unsigned *, double **, unsigned long **);
++#endif
++
++/* This depends on GPR_SIZE = sizeof (unsigned long) */
++
+ void
+ ffi_prep_args (extended_cif *ecif, unsigned long *const stack)
+ {
+ const unsigned bytes = ecif->cif->bytes;
+ const unsigned flags = ecif->cif->flags;
+ const unsigned nargs = ecif->cif->nargs;
++#if !defined(POWERPC_DARWIN64)
+ const ffi_abi abi = ecif->cif->abi;
++#endif
+
+ /* 'stacktop' points at the previous backchain pointer. */
+ unsigned long *const stacktop = stack + (bytes / sizeof(unsigned long));
+
+ /* 'fpr_base' points at the space for fpr1, and grows upwards as
+ we use FPR registers. */
+ double *fpr_base = (double *) (stacktop - ASM_NEEDS_REGISTERS) - NUM_FPR_ARG_REGISTERS;
+- int fparg_count = 0;
+-
++ int gp_count = 0, fparg_count = 0;
+
+ /* 'next_arg' grows up as we put parameters in it. */
+- unsigned long *next_arg = stack + 6; /* 6 reserved positions. */
++ unsigned long *next_arg = stack + LINKAGE_AREA_GPRS; /* 6 reserved positions. */
+
+ int i;
+ double double_tmp;
+ void **p_argv = ecif->avalue;
+ unsigned long gprvalue;
+ ffi_type** ptr = ecif->cif->arg_types;
++#if !defined(POWERPC_DARWIN64)
+ char *dest_cpy;
++#endif
+ unsigned size_al = 0;
+
+ /* Check that everything starts aligned properly. */
+ FFI_ASSERT(((unsigned) (char *) stack & 0xF) == 0);
+ FFI_ASSERT(((unsigned) (char *) stacktop & 0xF) == 0);
+ FFI_ASSERT((bytes & 0xF) == 0);
+
+ /* Deal with return values that are actually pass-by-reference.
+@@ -125,78 +150,95 @@ ffi_prep_args (extended_cif *ecif, unsig
+ {
+ switch ((*ptr)->type)
+ {
+ /* If a floating-point parameter appears before all of the general-
+ purpose registers are filled, the corresponding GPRs that match
+ the size of the floating-point parameter are skipped. */
+ case FFI_TYPE_FLOAT:
+ double_tmp = *(float *) *p_argv;
+- if (fparg_count >= NUM_FPR_ARG_REGISTERS)
+- *(double *)next_arg = double_tmp;
+- else
++ if (fparg_count < NUM_FPR_ARG_REGISTERS)
+ *fpr_base++ = double_tmp;
++#if defined(POWERPC_DARWIN)
++ *(float *)next_arg = *(float *) *p_argv;
++#else
++ *(double *)next_arg = double_tmp;
++#endif
+ next_arg++;
++ gp_count++;
+ fparg_count++;
+ FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+ break;
+
+ case FFI_TYPE_DOUBLE:
+ double_tmp = *(double *) *p_argv;
+- if (fparg_count >= NUM_FPR_ARG_REGISTERS)
+- *(double *)next_arg = double_tmp;
+- else
++ if (fparg_count < NUM_FPR_ARG_REGISTERS)
+ *fpr_base++ = double_tmp;
++ *(double *)next_arg = double_tmp;
+ #ifdef POWERPC64
+ next_arg++;
++ gp_count++;
+ #else
+ next_arg += 2;
++ gp_count += 2;
+ #endif
+ fparg_count++;
+ FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+ break;
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+
+ case FFI_TYPE_LONGDOUBLE:
+-#ifdef POWERPC64
++# if defined(POWERPC64) && !defined(POWERPC_DARWIN64)
++ /* ??? This will exceed the regs count when the value starts at fp13
++ and it will not put the extra bit on the stack. */
+ if (fparg_count < NUM_FPR_ARG_REGISTERS)
+ *(long double *) fpr_base++ = *(long double *) *p_argv;
+ else
+ *(long double *) next_arg = *(long double *) *p_argv;
+ next_arg += 2;
+ fparg_count += 2;
+-#else
++# else
+ double_tmp = ((double *) *p_argv)[0];
+ if (fparg_count < NUM_FPR_ARG_REGISTERS)
+ *fpr_base++ = double_tmp;
+- else
+- *(double *) next_arg = double_tmp;
++ *(double *) next_arg = double_tmp;
++# if defined(POWERPC_DARWIN64)
++ next_arg++;
++ gp_count++;
++# else
+ next_arg += 2;
++ gp_count += 2;
++# endif
+ fparg_count++;
+-
+ double_tmp = ((double *) *p_argv)[1];
+ if (fparg_count < NUM_FPR_ARG_REGISTERS)
+ *fpr_base++ = double_tmp;
+- else
+- *(double *) next_arg = double_tmp;
++ *(double *) next_arg = double_tmp;
++# if defined(POWERPC_DARWIN64)
++ next_arg++;
++ gp_count++;
++# else
+ next_arg += 2;
++ gp_count += 2;
++# endif
+ fparg_count++;
+-#endif
++# endif
+ FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+ break;
+ #endif
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ #ifdef POWERPC64
+ gprvalue = *(long long *) *p_argv;
+ goto putgpr;
+ #else
+ *(long long *) next_arg = *(long long *) *p_argv;
+ next_arg += 2;
++ gp_count += 2;
+ #endif
+ break;
+ case FFI_TYPE_POINTER:
+ gprvalue = *(unsigned long *) *p_argv;
+ goto putgpr;
+ case FFI_TYPE_UINT8:
+ gprvalue = *(unsigned char *) *p_argv;
+ goto putgpr;
+@@ -206,101 +248,373 @@ ffi_prep_args (extended_cif *ecif, unsig
+ case FFI_TYPE_UINT16:
+ gprvalue = *(unsigned short *) *p_argv;
+ goto putgpr;
+ case FFI_TYPE_SINT16:
+ gprvalue = *(signed short *) *p_argv;
+ goto putgpr;
+
+ case FFI_TYPE_STRUCT:
+-#ifdef POWERPC64
+- dest_cpy = (char *) next_arg;
+ size_al = (*ptr)->size;
+- if ((*ptr)->elements[0]->type == 3)
+- size_al = ALIGN((*ptr)->size, 8);
+- if (size_al < 3 && abi == FFI_DARWIN)
+- dest_cpy += 4 - size_al;
+-
+- memcpy ((char *) dest_cpy, (char *) *p_argv, size_al);
+- next_arg += (size_al + 7) / 8;
++#if defined(POWERPC_DARWIN64)
++ next_arg = (unsigned long *)ALIGN((char *)next_arg, (*ptr)->alignment);
++ darwin64_pass_struct_by_value (*ptr, (char *) *p_argv,
++ (unsigned) size_al,
++ (unsigned int *) &fparg_count,
++ &fpr_base, &next_arg);
+ #else
+ dest_cpy = (char *) next_arg;
+
++ /* If the first member of the struct is a double, then include enough
++ padding in the struct size to align it to double-word. */
++ if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
++ size_al = ALIGN((*ptr)->size, 8);
++
++# if defined(POWERPC64)
++ FFI_ASSERT (abi != FFI_DARWIN);
++ memcpy ((char *) dest_cpy, (char *) *p_argv, size_al);
++ next_arg += (size_al + 7) / 8;
++# else
+ /* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
+ SI 4 bytes) are aligned as if they were those modes.
+ Structures with 3 byte in size are padded upwards. */
+- size_al = (*ptr)->size;
+- /* If the first member of the struct is a double, then align
+- the struct to double-word. */
+- if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
+- size_al = ALIGN((*ptr)->size, 8);
+ if (size_al < 3 && abi == FFI_DARWIN)
+ dest_cpy += 4 - size_al;
+
+ memcpy((char *) dest_cpy, (char *) *p_argv, size_al);
+ next_arg += (size_al + 3) / 4;
++# endif
+ #endif
+ break;
+
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ gprvalue = *(signed int *) *p_argv;
+ goto putgpr;
+
+ case FFI_TYPE_UINT32:
+ gprvalue = *(unsigned int *) *p_argv;
+ putgpr:
+ *next_arg++ = gprvalue;
++ gp_count++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Check that we didn't overrun the stack... */
+ //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
+ //FFI_ASSERT((unsigned *)fpr_base
+ // <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
+ //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
+ }
+
++#if defined(POWERPC_DARWIN64)
++
++/* See if we can put some of the struct into fprs.
++ This should not be called for structures of size 16 bytes, since these are not
++ broken out this way. */
++static void
++darwin64_scan_struct_for_floats (ffi_type *s, unsigned *nfpr)
++{
++ int i;
++
++ FFI_ASSERT (s->type == FFI_TYPE_STRUCT)
++
++ for (i = 0; s->elements[i] != NULL; i++)
++ {
++ ffi_type *p = s->elements[i];
++ switch (p->type)
++ {
++ case FFI_TYPE_STRUCT:
++ darwin64_scan_struct_for_floats (p, nfpr);
++ break;
++ case FFI_TYPE_LONGDOUBLE:
++ (*nfpr) += 2;
++ break;
++ case FFI_TYPE_DOUBLE:
++ case FFI_TYPE_FLOAT:
++ (*nfpr) += 1;
++ break;
++ default:
++ break;
++ }
++ }
++}
++
++static int
++darwin64_struct_size_exceeds_gprs_p (ffi_type *s, char *src, unsigned *nfpr)
++{
++ unsigned struct_offset=0, i;
++
++ for (i = 0; s->elements[i] != NULL; i++)
++ {
++ char *item_base;
++ ffi_type *p = s->elements[i];
++ /* Find the start of this item (0 for the first one). */
++ if (i > 0)
++ struct_offset = ALIGN(struct_offset, p->alignment);
++
++ item_base = src + struct_offset;
++
++ switch (p->type)
++ {
++ case FFI_TYPE_STRUCT:
++ if (darwin64_struct_size_exceeds_gprs_p (p, item_base, nfpr))
++ return 1;
++ break;
++ case FFI_TYPE_LONGDOUBLE:
++ if (*nfpr >= NUM_FPR_ARG_REGISTERS)
++ return 1;
++ (*nfpr) += 1;
++ item_base += 8;
++ /* FALL THROUGH */
++ case FFI_TYPE_DOUBLE:
++ if (*nfpr >= NUM_FPR_ARG_REGISTERS)
++ return 1;
++ (*nfpr) += 1;
++ break;
++ case FFI_TYPE_FLOAT:
++ if (*nfpr >= NUM_FPR_ARG_REGISTERS)
++ return 1;
++ (*nfpr) += 1;
++ break;
++ default:
++ /* If we try and place any item, that is non-float, once we've
++ exceeded the 8 GPR mark, then we can't fit the struct. */
++ if ((unsigned long)item_base >= 8*8)
++ return 1;
++ break;
++ }
++ /* now count the size of what we just used. */
++ struct_offset += p->size;
++ }
++ return 0;
++}
++
++/* Can this struct be returned by value? */
++int
++darwin64_struct_ret_by_value_p (ffi_type *s)
++{
++ unsigned nfp = 0;
++
++ FFI_ASSERT (s && s->type == FFI_TYPE_STRUCT);
++
++ /* The largest structure we can return is 8long + 13 doubles. */
++ if (s->size > 168)
++ return 0;
++
++ /* We can't pass more than 13 floats. */
++ darwin64_scan_struct_for_floats (s, &nfp);
++ if (nfp > 13)
++ return 0;
++
++ /* If there are not too many floats, and the struct is
++ small enough to accommodate in the GPRs, then it must be OK. */
++ if (s->size <= 64)
++ return 1;
++
++ /* Well, we have to look harder. */
++ nfp = 0;
++ if (darwin64_struct_size_exceeds_gprs_p (s, NULL, &nfp))
++ return 0;
++
++ return 1;
++}
++
++void
++darwin64_pass_struct_floats (ffi_type *s, char *src,
++ unsigned *nfpr, double **fprs)
++{
++ int i;
++ double *fpr_base = *fprs;
++ unsigned struct_offset = 0;
++
++ /* We don't assume anything about the alignment of the source. */
++ for (i = 0; s->elements[i] != NULL; i++)
++ {
++ char *item_base;
++ ffi_type *p = s->elements[i];
++ /* Find the start of this item (0 for the first one). */
++ if (i > 0)
++ struct_offset = ALIGN(struct_offset, p->alignment);
++ item_base = src + struct_offset;
++
++ switch (p->type)
++ {
++ case FFI_TYPE_STRUCT:
++ darwin64_pass_struct_floats (p, item_base, nfpr,
++ &fpr_base);
++ break;
++ case FFI_TYPE_LONGDOUBLE:
++ if (*nfpr < NUM_FPR_ARG_REGISTERS)
++ *fpr_base++ = *(double *)item_base;
++ (*nfpr) += 1;
++ item_base += 8;
++ /* FALL THROUGH */
++ case FFI_TYPE_DOUBLE:
++ if (*nfpr < NUM_FPR_ARG_REGISTERS)
++ *fpr_base++ = *(double *)item_base;
++ (*nfpr) += 1;
++ break;
++ case FFI_TYPE_FLOAT:
++ if (*nfpr < NUM_FPR_ARG_REGISTERS)
++ *fpr_base++ = (double) *(float *)item_base;
++ (*nfpr) += 1;
++ break;
++ default:
++ break;
++ }
++ /* now count the size of what we just used. */
++ struct_offset += p->size;
++ }
++ /* Update the scores. */
++ *fprs = fpr_base;
++}
++
++/* Darwin64 special rules.
++ Break out a struct into params and float registers. */
++static void
++darwin64_pass_struct_by_value (ffi_type *s, char *src, unsigned size,
++ unsigned *nfpr, double **fprs, unsigned long **arg)
++{
++ unsigned long *next_arg = *arg;
++ char *dest_cpy = (char *)next_arg;
++
++ FFI_ASSERT (s->type == FFI_TYPE_STRUCT)
++
++ if (!size)
++ return;
++
++ /* First... special cases. */
++ if (size < 3
++ || (size == 4
++ && s->elements[0]
++ && s->elements[0]->type != FFI_TYPE_FLOAT))
++ {
++ /* Must be at least one GPR, padding is unspecified in value,
++ let's make it zero. */
++ *next_arg = 0UL;
++ dest_cpy += 8 - size;
++ memcpy ((char *) dest_cpy, src, size);
++ next_arg++;
++ }
++ else if (size == 16)
++ {
++ memcpy ((char *) dest_cpy, src, size);
++ next_arg += 2;
++ }
++ else
++ {
++ /* now the general case, we consider embedded floats. */
++ memcpy ((char *) dest_cpy, src, size);
++ darwin64_pass_struct_floats (s, src, nfpr, fprs);
++ next_arg += (size+7)/8;
++ }
++
++ *arg = next_arg;
++}
++
++double *
++darwin64_struct_floats_to_mem (ffi_type *s, char *dest, double *fprs, unsigned *nf)
++{
++ int i;
++ unsigned struct_offset = 0;
++
++ /* We don't assume anything about the alignment of the source. */
++ for (i = 0; s->elements[i] != NULL; i++)
++ {
++ char *item_base;
++ ffi_type *p = s->elements[i];
++ /* Find the start of this item (0 for the first one). */
++ if (i > 0)
++ struct_offset = ALIGN(struct_offset, p->alignment);
++ item_base = dest + struct_offset;
++
++ switch (p->type)
++ {
++ case FFI_TYPE_STRUCT:
++ fprs = darwin64_struct_floats_to_mem (p, item_base, fprs, nf);
++ break;
++ case FFI_TYPE_LONGDOUBLE:
++ if (*nf < NUM_FPR_ARG_REGISTERS)
++ {
++ *(double *)item_base = *fprs++ ;
++ (*nf) += 1;
++ }
++ item_base += 8;
++ /* FALL THROUGH */
++ case FFI_TYPE_DOUBLE:
++ if (*nf < NUM_FPR_ARG_REGISTERS)
++ {
++ *(double *)item_base = *fprs++ ;
++ (*nf) += 1;
++ }
++ break;
++ case FFI_TYPE_FLOAT:
++ if (*nf < NUM_FPR_ARG_REGISTERS)
++ {
++ *(float *)item_base = (float) *fprs++ ;
++ (*nf) += 1;
++ }
++ break;
++ default:
++ break;
++ }
++ /* now count the size of what we just used. */
++ struct_offset += p->size;
++ }
++ return fprs;
++}
++
++#endif
++
+ /* Adjust the size of S to be correct for Darwin.
+- On Darwin, the first field of a structure has natural alignment. */
++ On Darwin m32, the first field of a structure has natural alignment.
++ On Darwin m64, all fields have natural alignment. */
+
+ static void
+ darwin_adjust_aggregate_sizes (ffi_type *s)
+ {
+ int i;
+
+ if (s->type != FFI_TYPE_STRUCT)
+ return;
+
+ s->size = 0;
+ for (i = 0; s->elements[i] != NULL; i++)
+ {
+ ffi_type *p;
+ int align;
+
+ p = s->elements[i];
+- darwin_adjust_aggregate_sizes (p);
+- if (i == 0
+- && (p->type == FFI_TYPE_UINT64
+- || p->type == FFI_TYPE_SINT64
+- || p->type == FFI_TYPE_DOUBLE
+- || p->alignment == 8))
+- align = 8;
++ if (p->type == FFI_TYPE_STRUCT)
++ darwin_adjust_aggregate_sizes (p);
++#if defined(POWERPC_DARWIN64)
++ /* Natural alignment for all items. */
++ align = p->alignment;
++#else
++ /* Natrual alignment for the first item... */
++ if (i == 0)
++ align = p->alignment;
+ else if (p->alignment == 16 || p->alignment < 4)
++ /* .. subsequent items with vector or align < 4 have natural align. */
+ align = p->alignment;
+ else
++ /* .. or align is 4. */
+ align = 4;
++#endif
++ /* Pad, if necessary, before adding the current item. */
+ s->size = ALIGN(s->size, align) + p->size;
+ }
+
+ s->size = ALIGN(s->size, s->alignment);
+
++ /* This should not be necessary on m64, but harmless. */
+ if (s->elements[0]->type == FFI_TYPE_UINT64
+ || s->elements[0]->type == FFI_TYPE_SINT64
+ || s->elements[0]->type == FFI_TYPE_DOUBLE
+ || s->elements[0]->alignment == 8)
+ s->alignment = s->alignment > 8 ? s->alignment : 8;
+ /* Do not add additional tail padding. */
+ }
+
+@@ -342,17 +656,17 @@ aix_adjust_aggregate_sizes (ffi_type *s)
+ /* Perform machine dependent cif processing. */
+ ffi_status
+ ffi_prep_cif_machdep (ffi_cif *cif)
+ {
+ /* All this is for the DARWIN ABI. */
+ unsigned i;
+ ffi_type **ptr;
+ unsigned bytes;
+- int fparg_count = 0, intarg_count = 0;
++ unsigned fparg_count = 0, intarg_count = 0;
+ unsigned flags = 0;
+ unsigned size_al = 0;
+
+ /* All the machine-independent calculation of cif->bytes will be wrong.
+ All the calculation of structure sizes will also be wrong.
+ Redo the calculation for DARWIN. */
+
+ if (cif->abi == FFI_DARWIN)
+@@ -367,26 +681,35 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+ aix_adjust_aggregate_sizes (cif->rtype);
+ for (i = 0; i < cif->nargs; i++)
+ aix_adjust_aggregate_sizes (cif->arg_types[i]);
+ }
+
+ /* Space for the frame pointer, callee's LR, CR, etc, and for
+ the asm's temp regs. */
+
+- bytes = (6 + ASM_NEEDS_REGISTERS) * sizeof(long);
++ bytes = (LINKAGE_AREA_GPRS + ASM_NEEDS_REGISTERS) * sizeof(unsigned long);
+
+- /* Return value handling. The rules are as follows:
++ /* Return value handling.
++ The rules m32 are as follows:
+ - 32-bit (or less) integer values are returned in gpr3;
+- - Structures of size <= 4 bytes also returned in gpr3;
+- - 64-bit integer values and structures between 5 and 8 bytes are returned
+- in gpr3 and gpr4;
++ - structures of size <= 4 bytes also returned in gpr3;
++ - 64-bit integer values [??? and structures between 5 and 8 bytes] are
++ returned in gpr3 and gpr4;
+ - Single/double FP values are returned in fpr1;
+ - Long double FP (if not equivalent to double) values are returned in
+ fpr1 and fpr2;
++ m64:
++ - 64-bit or smaller integral values are returned in GPR3
++ - Single/double FP values are returned in fpr1;
++ - Long double FP values are returned in fpr1 and fpr2;
++ m64 Structures:
++ - If the structure could be accommodated in registers were it to be the
++ first argument to a routine, then it is returned in those registers.
++ m32/m64 structures otherwise:
+ - Larger structures values are allocated space and a pointer is passed
+ as the first argument. */
+ switch (cif->rtype->type)
+ {
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+ case FFI_TYPE_LONGDOUBLE:
+ flags |= FLAG_RETURNS_128BITS;
+@@ -405,124 +728,193 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+ case FFI_TYPE_SINT64:
+ #ifdef POWERPC64
+ case FFI_TYPE_POINTER:
+ #endif
+ flags |= FLAG_RETURNS_64BITS;
+ break;
+
+ case FFI_TYPE_STRUCT:
++#if defined(POWERPC_DARWIN64)
++ {
++ /* Can we fit the struct into regs? */
++ if (darwin64_struct_ret_by_value_p (cif->rtype))
++ {
++ unsigned nfpr = 0;
++ flags |= FLAG_RETURNS_STRUCT;
++ if (cif->rtype->size != 16)
++ darwin64_scan_struct_for_floats (cif->rtype, &nfpr) ;
++ else
++ flags |= FLAG_RETURNS_128BITS;
++ /* Will be 0 for 16byte struct. */
++ if (nfpr)
++ flags |= FLAG_RETURNS_FP;
++ }
++ else /* By ref. */
++ {
++ flags |= FLAG_RETVAL_REFERENCE;
++ flags |= FLAG_RETURNS_NOTHING;
++ intarg_count++;
++ }
++ }
++#elif defined(DARWIN_PPC)
++ if (cif->rtype->size <= 4)
++ flags |= FLAG_RETURNS_STRUCT;
++ else /* else by reference. */
++ {
++ flags |= FLAG_RETVAL_REFERENCE;
++ flags |= FLAG_RETURNS_NOTHING;
++ intarg_count++;
++ }
++#else /* assume we pass by ref. */
+ flags |= FLAG_RETVAL_REFERENCE;
+ flags |= FLAG_RETURNS_NOTHING;
+ intarg_count++;
++#endif
+ break;
+ case FFI_TYPE_VOID:
+ flags |= FLAG_RETURNS_NOTHING;
+ break;
+
+ default:
+ /* Returns 32-bit integer, or similar. Nothing to do here. */
+ break;
+ }
+
+ /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
+ first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
+- goes on the stack. Structures are passed as a pointer to a copy of
+- the structure. Stuff on the stack needs to keep proper alignment. */
++ goes on the stack.
++ ??? Structures are passed as a pointer to a copy of the structure.
++ Stuff on the stack needs to keep proper alignment.
++ For m64 the count is effectively of half-GPRs. */
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+ {
++ unsigned align_words;
+ switch ((*ptr)->type)
+ {
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ fparg_count++;
++#if !defined(POWERPC_DARWIN64)
+ /* If this FP arg is going on the stack, it must be
+ 8-byte-aligned. */
+ if (fparg_count > NUM_FPR_ARG_REGISTERS
+- && intarg_count%2 != 0)
++ && (intarg_count & 0x01) != 0)
+ intarg_count++;
++#endif
+ break;
+
+ #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+-
+ case FFI_TYPE_LONGDOUBLE:
+ fparg_count += 2;
+ /* If this FP arg is going on the stack, it must be
+- 8-byte-aligned. */
+- if (fparg_count > NUM_FPR_ARG_REGISTERS
+- && intarg_count%2 != 0)
+- intarg_count++;
+- intarg_count +=2;
++ 16-byte-aligned. */
++ if (fparg_count >= NUM_FPR_ARG_REGISTERS)
++#if defined (POWERPC64)
++ intarg_count = ALIGN(intarg_count, 2);
++#else
++ intarg_count = ALIGN(intarg_count, 4);
++#endif
+ break;
+ #endif
+
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
++#if defined(POWERPC64)
++ intarg_count++;
++#else
+ /* 'long long' arguments are passed as two words, but
+ either both words must fit in registers or both go
+ on the stack. If they go on the stack, they must
+ be 8-byte-aligned. */
+ if (intarg_count == NUM_GPR_ARG_REGISTERS-1
+- || (intarg_count >= NUM_GPR_ARG_REGISTERS && intarg_count%2 != 0))
++ || (intarg_count >= NUM_GPR_ARG_REGISTERS
++ && (intarg_count & 0x01) != 0))
+ intarg_count++;
+ intarg_count += 2;
++#endif
+ break;
+
+ case FFI_TYPE_STRUCT:
+ size_al = (*ptr)->size;
++#if defined(POWERPC_DARWIN64)
++ align_words = (*ptr)->alignment >> 3;
++ if (align_words)
++ intarg_count = ALIGN(intarg_count, align_words);
++ /* Base size of the struct. */
++ intarg_count += (size_al + 7) / 8;
++ /* If 16 bytes then don't worry about floats. */
++ if (size_al != 16)
++ /* Scan through for floats to be placed in regs. */
++ darwin64_scan_struct_for_floats (*ptr, &fparg_count) ;
++#else
++ align_words = (*ptr)->alignment >> 2;
++ if (align_words)
++ intarg_count = ALIGN(intarg_count, align_words);
+ /* If the first member of the struct is a double, then align
+- the struct to double-word. */
++ the struct to double-word.
+ if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
+- size_al = ALIGN((*ptr)->size, 8);
+-#ifdef POWERPC64
++ size_al = ALIGN((*ptr)->size, 8); */
++# ifdef POWERPC64
+ intarg_count += (size_al + 7) / 8;
+-#else
++# else
+ intarg_count += (size_al + 3) / 4;
++# endif
+ #endif
+ break;
+
+ default:
+ /* Everything else is passed as a 4-byte word in a GPR, either
+ the object itself or a pointer to it. */
+ intarg_count++;
+ break;
+ }
+ }
+
+ if (fparg_count != 0)
+ flags |= FLAG_FP_ARGUMENTS;
+
++#if defined(POWERPC_DARWIN64)
++ /* Space to image the FPR registers, if needed - which includes when they might be
++ used in a struct return. */
++ if (fparg_count != 0
++ || ((flags & FLAG_RETURNS_STRUCT)
++ && (flags & FLAG_RETURNS_FP)))
++ bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
++#else
+ /* Space for the FPR registers, if needed. */
+ if (fparg_count != 0)
+ bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
++#endif
+
+ /* Stack space. */
+ #ifdef POWERPC64
+ if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS)
+ bytes += (intarg_count + fparg_count) * sizeof(long);
+ #else
+ if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS)
+ bytes += (intarg_count + 2 * fparg_count) * sizeof(long);
+ #endif
+ else
+ bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
+
+ /* The stack space allocated needs to be a multiple of 16 bytes. */
+- bytes = (bytes + 15) & ~0xF;
++ bytes = ALIGN(bytes, 16) ;
+
+ cif->flags = flags;
+ cif->bytes = bytes;
+
+ return FFI_OK;
+ }
+
+ extern void ffi_call_AIX(extended_cif *, long, unsigned, unsigned *,
+ void (*fn)(void), void (*fn2)(void));
++
+ extern void ffi_call_DARWIN(extended_cif *, long, unsigned, unsigned *,
+- void (*fn)(void), void (*fn2)(void));
++ void (*fn)(void), void (*fn2)(void), ffi_type*);
+
+ void
+ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ {
+ extended_cif ecif;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+@@ -541,17 +933,17 @@ ffi_call (ffi_cif *cif, void (*fn)(void)
+ switch (cif->abi)
+ {
+ case FFI_AIX:
+ ffi_call_AIX(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn,
+ FFI_FN(ffi_prep_args));
+ break;
+ case FFI_DARWIN:
+ ffi_call_DARWIN(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn,
+- FFI_FN(ffi_prep_args));
++ FFI_FN(ffi_prep_args), cif->rtype);
+ break;
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+ }
+
+ static void flush_icache(char *);
+@@ -561,123 +953,127 @@ static void flush_range(char *, int);
+ points to one of these. */
+
+ typedef struct aix_fd_struct {
+ void *code_pointer;
+ void *toc;
+ } aix_fd;
+
+ /* here I'd like to add the stack frame layout we use in darwin_closure.S
+- and aix_clsoure.S
++ and aix_closure.S
+
+- SP previous -> +---------------------------------------+ <--- child frame
+- | back chain to caller 4 |
+- +---------------------------------------+ 4
+- | saved CR 4 |
+- +---------------------------------------+ 8
+- | saved LR 4 |
+- +---------------------------------------+ 12
+- | reserved for compilers 4 |
+- +---------------------------------------+ 16
+- | reserved for binders 4 |
+- +---------------------------------------+ 20
+- | saved TOC pointer 4 |
+- +---------------------------------------+ 24
+- | always reserved 8*4=32 (previous GPRs)|
+- | according to the linkage convention |
+- | from AIX |
+- +---------------------------------------+ 56
+- | our FPR area 13*8=104 |
+- | f1 |
+- | . |
+- | f13 |
+- +---------------------------------------+ 160
+- | result area 8 |
+- +---------------------------------------+ 168
+- | alignement to the next multiple of 16 |
+-SP current --> +---------------------------------------+ 176 <- parent frame
+- | back chain to caller 4 |
+- +---------------------------------------+ 180
+- | saved CR 4 |
+- +---------------------------------------+ 184
+- | saved LR 4 |
+- +---------------------------------------+ 188
+- | reserved for compilers 4 |
+- +---------------------------------------+ 192
+- | reserved for binders 4 |
+- +---------------------------------------+ 196
+- | saved TOC pointer 4 |
+- +---------------------------------------+ 200
+- | always reserved 8*4=32 we store our |
+- | GPRs here |
+- | r3 |
+- | . |
+- | r10 |
+- +---------------------------------------+ 232
+- | overflow part |
+- +---------------------------------------+ xxx
+- | ???? |
+- +---------------------------------------+ xxx
++ m32/m64
++
++ The stack layout looks like this:
++
++ | Additional params... | | Higher address
++ ~ ~ ~
++ | Parameters (at least 8*4/8=32/64) | | NUM_GPR_ARG_REGISTERS
++ |--------------------------------------------| |
++ | TOC=R2 (AIX) Reserved (Darwin) 4/8 | |
++ |--------------------------------------------| |
++ | Reserved 2*4/8 | |
++ |--------------------------------------------| |
++ | Space for callee's LR 4/8 | |
++ |--------------------------------------------| |
++ | Saved CR [low word for m64] 4/8 | |
++ |--------------------------------------------| |
++ | Current backchain pointer 4/8 |-/ Parent's frame.
++ |--------------------------------------------| <+ <<< on entry to ffi_closure_ASM
++ | Result Bytes 16 | |
++ |--------------------------------------------| |
++ ~ padding to 16-byte alignment ~ ~
++ |--------------------------------------------| |
++ | NUM_FPR_ARG_REGISTERS slots | |
++ | here fp13 .. fp1 13*8 | |
++ |--------------------------------------------| |
++ | R3..R10 8*4/8=32/64 | | NUM_GPR_ARG_REGISTERS
++ |--------------------------------------------| |
++ | TOC=R2 (AIX) Reserved (Darwin) 4/8 | |
++ |--------------------------------------------| | stack |
++ | Reserved [compiler,binder] 2*4/8 | | grows |
++ |--------------------------------------------| | down V
++ | Space for callee's LR 4/8 | |
++ |--------------------------------------------| | lower addresses
++ | Saved CR [low word for m64] 4/8 | |
++ |--------------------------------------------| | stack pointer here
++ | Current backchain pointer 4/8 |-/ during
++ |--------------------------------------------| <<< ffi_closure_ASM.
+
+ */
++
+ ffi_status
+ ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+ {
+ unsigned int *tramp;
+ struct ffi_aix_trampoline_struct *tramp_aix;
+ aix_fd *fd;
+
+ switch (cif->abi)
+ {
+- case FFI_DARWIN:
++ case FFI_DARWIN:
+
+- FFI_ASSERT (cif->abi == FFI_DARWIN);
++ FFI_ASSERT (cif->abi == FFI_DARWIN);
+
+- tramp = (unsigned int *) &closure->tramp[0];
+- tramp[0] = 0x7c0802a6; /* mflr r0 */
+- tramp[1] = 0x429f000d; /* bcl- 20,4*cr7+so,0x10 */
+- tramp[4] = 0x7d6802a6; /* mflr r11 */
+- tramp[5] = 0x818b0000; /* lwz r12,0(r11) function address */
+- tramp[6] = 0x7c0803a6; /* mtlr r0 */
+- tramp[7] = 0x7d8903a6; /* mtctr r12 */
+- tramp[8] = 0x816b0004; /* lwz r11,4(r11) static chain */
+- tramp[9] = 0x4e800420; /* bctr */
+- tramp[2] = (unsigned long) ffi_closure_ASM; /* function */
+- tramp[3] = (unsigned long) codeloc; /* context */
++ tramp = (unsigned int *) &closure->tramp[0];
++#if defined(POWERPC_DARWIN64)
++ tramp[0] = 0x7c0802a6; /* mflr r0 */
++ tramp[1] = 0x429f0015; /* bcl- 20,4*cr7+so, +0x18 (L1) */
++ /* We put the addresses here. */
++ tramp[6] = 0x7d6802a6; /*L1: mflr r11 */
++ tramp[7] = 0xe98b0000; /* ld r12,0(r11) function address */
++ tramp[8] = 0x7c0803a6; /* mtlr r0 */
++ tramp[9] = 0x7d8903a6; /* mtctr r12 */
++ tramp[10] = 0xe96b0008; /* lwz r11,8(r11) static chain */
++ tramp[11] = 0x4e800420; /* bctr */
+
+- closure->cif = cif;
+- closure->fun = fun;
+- closure->user_data = user_data;
++ *((unsigned long *)&tramp[2]) = (unsigned long) ffi_closure_ASM; /* function */
++ *((unsigned long *)&tramp[4]) = (unsigned long) codeloc; /* context */
++#else
++ tramp[0] = 0x7c0802a6; /* mflr r0 */
++ tramp[1] = 0x429f000d; /* bcl- 20,4*cr7+so,0x10 */
++ tramp[4] = 0x7d6802a6; /* mflr r11 */
++ tramp[5] = 0x818b0000; /* lwz r12,0(r11) function address */
++ tramp[6] = 0x7c0803a6; /* mtlr r0 */
++ tramp[7] = 0x7d8903a6; /* mtctr r12 */
++ tramp[8] = 0x816b0004; /* lwz r11,4(r11) static chain */
++ tramp[9] = 0x4e800420; /* bctr */
++ tramp[2] = (unsigned long) ffi_closure_ASM; /* function */
++ tramp[3] = (unsigned long) codeloc; /* context */
++#endif
++ closure->cif = cif;
++ closure->fun = fun;
++ closure->user_data = user_data;
+
+- /* Flush the icache. Only necessary on Darwin. */
+- flush_range(codeloc, FFI_TRAMPOLINE_SIZE);
++ /* Flush the icache. Only necessary on Darwin. */
++ flush_range(codeloc, FFI_TRAMPOLINE_SIZE);
+
+- break;
++ break;
+
+ case FFI_AIX:
+
+ tramp_aix = (struct ffi_aix_trampoline_struct *) (closure->tramp);
+ fd = (aix_fd *)(void *)ffi_closure_ASM;
+
+ FFI_ASSERT (cif->abi == FFI_AIX);
+
+ tramp_aix->code_pointer = fd->code_pointer;
+ tramp_aix->toc = fd->toc;
+ tramp_aix->static_chain = codeloc;
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
++ break;
+
+ default:
+-
+- FFI_ASSERT(0);
++ return FFI_BAD_ABI;
+ break;
+ }
+ return FFI_OK;
+ }
+
+ static void
+ flush_icache(char *addr)
+ {
+@@ -703,28 +1099,28 @@ flush_range(char * addr1, int size)
+ }
+
+ typedef union
+ {
+ float f;
+ double d;
+ } ffi_dblfl;
+
+-int
++ffi_type *
+ ffi_closure_helper_DARWIN (ffi_closure *, void *,
+ unsigned long *, ffi_dblfl *);
+
+ /* Basically the trampoline invokes ffi_closure_ASM, and on
+ entry, r11 holds the address of the closure.
+ After storing the registers that could possibly contain
+ parameters to be passed into the stack frame and setting
+ up space for a return value, ffi_closure_ASM invokes the
+ following helper function to do most of the work. */
+
+-int
++ffi_type *
+ ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue,
+ unsigned long *pgr, ffi_dblfl *pfr)
+ {
+ /* rvalue is the pointer to space for return value in closure assembly
+ pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM
+ pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM. */
+
+ typedef double ldbits[2];
+@@ -736,97 +1132,132 @@ ffi_closure_helper_DARWIN (ffi_closure *
+ };
+
+ void ** avalue;
+ ffi_type ** arg_types;
+ long i, avn;
+ ffi_cif * cif;
+ ffi_dblfl * end_pfr = pfr + NUM_FPR_ARG_REGISTERS;
+ unsigned size_al;
++#if defined(POWERPC_DARWIN64)
++ unsigned fpsused = 0;
++#endif
+
+ cif = closure->cif;
+ avalue = alloca (cif->nargs * sizeof(void *));
+
+- /* Copy the caller's structure return value address so that the closure
+- returns the data directly to the caller. */
+ if (cif->rtype->type == FFI_TYPE_STRUCT)
+ {
++#if defined(POWERPC_DARWIN64)
++ if (!darwin64_struct_ret_by_value_p (cif->rtype))
++ {
++ /* Won't fit into the regs - return by ref. */
++ rvalue = (void *) *pgr;
++ pgr++;
++ }
++#elif defined(DARWIN_PPC)
++ if (cif->rtype->size > 4)
++ {
++ rvalue = (void *) *pgr;
++ pgr++;
++ }
++#else /* assume we return by ref. */
+ rvalue = (void *) *pgr;
+ pgr++;
++#endif
+ }
+
+ i = 0;
+ avn = cif->nargs;
+ arg_types = cif->arg_types;
+
+ /* Grab the addresses of the arguments from the stack frame. */
+ while (i < avn)
+ {
+ switch (arg_types[i]->type)
+ {
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+-#ifdef POWERPC64
++#if defined(POWERPC64)
+ avalue[i] = (char *) pgr + 7;
+ #else
+ avalue[i] = (char *) pgr + 3;
+ #endif
+ pgr++;
+ break;
+
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+-#ifdef POWERPC64
++#if defined(POWERPC64)
+ avalue[i] = (char *) pgr + 6;
+ #else
+ avalue[i] = (char *) pgr + 2;
+ #endif
+ pgr++;
+ break;
+
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+-#ifdef POWERPC64
++#if defined(POWERPC64)
+ avalue[i] = (char *) pgr + 4;
+ #else
+ case FFI_TYPE_POINTER:
+ avalue[i] = pgr;
+ #endif
+ pgr++;
+ break;
+
+ case FFI_TYPE_STRUCT:
+-#ifdef POWERPC64
+ size_al = arg_types[i]->size;
+- if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
+- size_al = ALIGN (arg_types[i]->size, 8);
+- if (size_al < 3 && cif->abi == FFI_DARWIN)
+- avalue[i] = (char *) pgr + 8 - size_al;
+- else
+- avalue[i] = pgr;
++#if defined(POWERPC_DARWIN64)
++ pgr = (unsigned long *)ALIGN((char *)pgr, arg_types[i]->alignment);
++ if (size_al < 3 || size_al == 4)
++ {
++ avalue[i] = ((char *)pgr)+8-size_al;
++ if (arg_types[i]->elements[0]->type == FFI_TYPE_FLOAT
++ && fpsused < NUM_FPR_ARG_REGISTERS)
++ {
++ *(float *)pgr = (float) *(double *)pfr;
++ pfr++;
++ fpsused++;
++ }
++ }
++ else
++ {
++ if (size_al != 16)
++ pfr = (ffi_dblfl *)
++ darwin64_struct_floats_to_mem (arg_types[i], (char *)pgr,
++ (double *)pfr, &fpsused);
++ avalue[i] = pgr;
++ }
+ pgr += (size_al + 7) / 8;
+ #else
+- /* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
+- SI 4 bytes) are aligned as if they were those modes. */
+- size_al = arg_types[i]->size;
+ /* If the first member of the struct is a double, then align
+ the struct to double-word. */
+ if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
+ size_al = ALIGN(arg_types[i]->size, 8);
++# if defined(POWERPC64)
++ FFI_ASSERT (cif->abi != FFI_DARWIN);
++ avalue[i] = pgr;
++ pgr += (size_al + 7) / 8;
++# else
++ /* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
++ SI 4 bytes) are aligned as if they were those modes. */
+ if (size_al < 3 && cif->abi == FFI_DARWIN)
+ avalue[i] = (char*) pgr + 4 - size_al;
+ else
+ avalue[i] = pgr;
+ pgr += (size_al + 3) / 4;
++# endif
+ #endif
+ break;
+
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+-#ifdef POWERPC64
++#if defined(POWERPC64)
+ case FFI_TYPE_POINTER:
+ avalue[i] = pgr;
+ pgr++;
+ break;
+ #else
+ /* Long long ints are passed in two gpr's. */
+ avalue[i] = pgr;
+ pgr += 2;
+@@ -919,10 +1350,10 @@ ffi_closure_helper_DARWIN (ffi_closure *
+ FFI_ASSERT(0);
+ }
+ i++;
+ }
+
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+ /* Tell ffi_closure_ASM to perform return type promotions. */
+- return cif->rtype->type;
++ return cif->rtype;
+ }
+diff --git a/js/src/ctypes/libffi/src/powerpc/ffitarget.h b/js/src/ctypes/libffi/src/powerpc/ffitarget.h
+--- a/js/src/ctypes/libffi/src/powerpc/ffitarget.h
++++ b/js/src/ctypes/libffi/src/powerpc/ffitarget.h
+@@ -1,11 +1,13 @@
+ /* -----------------------------------------------------------------*-C-*-
+- ffitarget.h - Copyright (c) 1996-2003 Red Hat, Inc.
+- Copyright (C) 2007, 2008 Free Software Foundation, Inc
++ ffitarget.h - Copyright (c) 2012 Anthony Green
++ Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc
++ Copyright (c) 1996-2003 Red Hat, Inc.
++
+ Target configuration macros for PowerPC.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+@@ -23,26 +25,33 @@
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+ ----------------------------------------------------------------------- */
+
+ #ifndef LIBFFI_TARGET_H
+ #define LIBFFI_TARGET_H
+
++#ifndef LIBFFI_H
++#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
++#endif
++
+ /* ---- System specific configurations ----------------------------------- */
+
+ #if defined (POWERPC) && defined (__powerpc64__) /* linux64 */
+ #ifndef POWERPC64
+ #define POWERPC64
+ #endif
+-#elif defined (POWERPC_DARWIN) && defined (__ppc64__) /* Darwin */
++#elif defined (POWERPC_DARWIN) && defined (__ppc64__) /* Darwin64 */
+ #ifndef POWERPC64
+ #define POWERPC64
+ #endif
++#ifndef POWERPC_DARWIN64
++#define POWERPC_DARWIN64
++#endif
+ #elif defined (POWERPC_AIX) && defined (__64BIT__) /* AIX64 */
+ #ifndef POWERPC64
+ #define POWERPC64
+ #endif
+ #endif
+
+ #ifndef LIBFFI_ASM
+ typedef unsigned long ffi_arg;
+@@ -52,28 +61,24 @@ typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+
+ #ifdef POWERPC
+ FFI_SYSV,
+ FFI_GCC_SYSV,
+ FFI_LINUX64,
+ FFI_LINUX,
+ FFI_LINUX_SOFT_FLOAT,
+-# ifdef POWERPC64
++# if defined(POWERPC64)
+ FFI_DEFAULT_ABI = FFI_LINUX64,
++# elif defined(__NO_FPRS__)
++ FFI_DEFAULT_ABI = FFI_LINUX_SOFT_FLOAT,
++# elif (__LDBL_MANT_DIG__ == 106)
++ FFI_DEFAULT_ABI = FFI_LINUX,
+ # else
+-# if (!defined(__NO_FPRS__) && (__LDBL_MANT_DIG__ == 106))
+- FFI_DEFAULT_ABI = FFI_LINUX,
+-# else
+-# ifdef __NO_FPRS__
+- FFI_DEFAULT_ABI = FFI_LINUX_SOFT_FLOAT,
+-# else
+ FFI_DEFAULT_ABI = FFI_GCC_SYSV,
+-# endif
+-# endif
+ # endif
+ #endif
+
+ #ifdef POWERPC_AIX
+ FFI_AIX,
+ FFI_DARWIN,
+ FFI_DEFAULT_ABI = FFI_AIX,
+ #endif
+@@ -96,32 +101,49 @@ typedef enum ffi_abi {
+ FFI_LAST_ABI
+ } ffi_abi;
+ #endif
+
+ /* ---- Definitions for closures ----------------------------------------- */
+
+ #define FFI_CLOSURES 1
+ #define FFI_NATIVE_RAW_API 0
++#if defined (POWERPC) || defined (POWERPC_FREEBSD)
++# define FFI_TARGET_SPECIFIC_VARIADIC 1
++# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs
++#endif
+
+ /* For additional types like the below, take care about the order in
+ ppc_closures.S. They must follow after the FFI_TYPE_LAST. */
+
+ /* Needed for soft-float long-double-128 support. */
+ #define FFI_TYPE_UINT128 (FFI_TYPE_LAST + 1)
+
+ /* Needed for FFI_SYSV small structure returns.
+ We use two flag bits, (FLAG_SYSV_SMST_R3, FLAG_SYSV_SMST_R4) which are
+ defined in ffi.c, to determine the exact return type and its size. */
+ #define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2)
+
+-#if defined(POWERPC64) || defined(POWERPC_AIX)
+-#define FFI_TRAMPOLINE_SIZE 24
+-#else /* POWERPC || POWERPC_AIX */
+-#define FFI_TRAMPOLINE_SIZE 40
++/* Used by ELFv2 for homogenous structure returns. */
++#define FFI_V2_TYPE_FLOAT_HOMOG (FFI_TYPE_LAST + 1)
++#define FFI_V2_TYPE_DOUBLE_HOMOG (FFI_TYPE_LAST + 2)
++#define FFI_V2_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 3)
++
++#if _CALL_ELF == 2
++# define FFI_TRAMPOLINE_SIZE 32
++#else
++# if defined(POWERPC64) || defined(POWERPC_AIX)
++# if defined(POWERPC_DARWIN64)
++# define FFI_TRAMPOLINE_SIZE 48
++# else
++# define FFI_TRAMPOLINE_SIZE 24
++# endif
++# else /* POWERPC || POWERPC_AIX */
++# define FFI_TRAMPOLINE_SIZE 40
++# endif
+ #endif
+
+ #ifndef LIBFFI_ASM
+ #if defined(POWERPC_DARWIN) || defined(POWERPC_AIX)
+ struct ffi_aix_trampoline_struct {
+ void * code_pointer; /* Pointer to ffi_closure_ASM */
+ void * toc; /* TOC */
+ void * static_chain; /* Pointer to closure */
+diff --git a/js/src/ctypes/libffi/src/powerpc/linux64.S b/js/src/ctypes/libffi/src/powerpc/linux64.S
+--- a/js/src/ctypes/libffi/src/powerpc/linux64.S
++++ b/js/src/ctypes/libffi/src/powerpc/linux64.S
+@@ -25,56 +25,86 @@
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+ #define LIBFFI_ASM
+ #include <fficonfig.h>
+ #include <ffi.h>
+
+ #ifdef __powerpc64__
+- .hidden ffi_call_LINUX64, .ffi_call_LINUX64
+- .globl ffi_call_LINUX64, .ffi_call_LINUX64
++ .hidden ffi_call_LINUX64
++ .globl ffi_call_LINUX64
++# if _CALL_ELF == 2
++ .text
++ffi_call_LINUX64:
++ addis %r2, %r12, .TOC.-ffi_call_LINUX64@ha
++ addi %r2, %r2, .TOC.-ffi_call_LINUX64@l
++ .localentry ffi_call_LINUX64, . - ffi_call_LINUX64
++# else
+ .section ".opd","aw"
+ .align 3
+ ffi_call_LINUX64:
++# ifdef _CALL_LINUX
++ .quad .L.ffi_call_LINUX64,.TOC.@tocbase,0
++ .type ffi_call_LINUX64,@function
++ .text
++.L.ffi_call_LINUX64:
++# else
++ .hidden .ffi_call_LINUX64
++ .globl .ffi_call_LINUX64
+ .quad .ffi_call_LINUX64,.TOC.@tocbase,0
+ .size ffi_call_LINUX64,24
+ .type .ffi_call_LINUX64,@function
+ .text
+ .ffi_call_LINUX64:
++# endif
++# endif
+ .LFB1:
+ mflr %r0
+ std %r28, -32(%r1)
+ std %r29, -24(%r1)
+ std %r30, -16(%r1)
+ std %r31, -8(%r1)
+ std %r0, 16(%r1)
+
+ mr %r28, %r1 /* our AP. */
+ .LCFI0:
+ stdux %r1, %r1, %r4
+ mr %r31, %r5 /* flags, */
+ mr %r30, %r6 /* rvalue, */
+ mr %r29, %r7 /* function address. */
++/* Save toc pointer, not for the ffi_prep_args64 call, but for the later
++ bctrl function call. */
++# if _CALL_ELF == 2
++ std %r2, 24(%r1)
++# else
+ std %r2, 40(%r1)
++# endif
+
+ /* Call ffi_prep_args64. */
+ mr %r4, %r1
++# if defined _CALL_LINUX || _CALL_ELF == 2
++ bl ffi_prep_args64
++# else
+ bl .ffi_prep_args64
++# endif
+
+- ld %r0, 0(%r29)
++# if _CALL_ELF == 2
++ mr %r12, %r29
++# else
++ ld %r12, 0(%r29)
+ ld %r2, 8(%r29)
+ ld %r11, 16(%r29)
+-
++# endif
+ /* Now do the call. */
+ /* Set up cr1 with bits 4-7 of the flags. */
+ mtcrf 0x40, %r31
+
+ /* Get the address to call into CTR. */
+- mtctr %r0
++ mtctr %r12
+ /* Load all those argument registers. */
+ ld %r3, -32-(8*8)(%r28)
+ ld %r4, -32-(7*8)(%r28)
+ ld %r5, -32-(6*8)(%r28)
+ ld %r6, -32-(5*8)(%r28)
+ bf- 5, 1f
+ ld %r7, -32-(4*8)(%r28)
+ ld %r8, -32-(3*8)(%r28)
+@@ -99,50 +129,93 @@ 1:
+ lfd %f13, -32-(9*8)(%r28)
+ 2:
+
+ /* Make the call. */
+ bctrl
+
+ /* This must follow the call immediately, the unwinder
+ uses this to find out if r2 has been saved or not. */
++# if _CALL_ELF == 2
++ ld %r2, 24(%r1)
++# else
+ ld %r2, 40(%r1)
++# endif
+
+ /* Now, deal with the return value. */
+ mtcrf 0x01, %r31
+- bt- 30, .Ldone_return_value
+- bt- 29, .Lfp_return_value
++ bt 31, .Lstruct_return_value
++ bt 30, .Ldone_return_value
++ bt 29, .Lfp_return_value
+ std %r3, 0(%r30)
+ /* Fall through... */
+
+ .Ldone_return_value:
+ /* Restore the registers we used and return. */
+ mr %r1, %r28
+ ld %r0, 16(%r28)
+- ld %r28, -32(%r1)
++ ld %r28, -32(%r28)
+ mtlr %r0
+ ld %r29, -24(%r1)
+ ld %r30, -16(%r1)
+ ld %r31, -8(%r1)
+ blr
+
+ .Lfp_return_value:
+ bf 28, .Lfloat_return_value
+ stfd %f1, 0(%r30)
+ mtcrf 0x02, %r31 /* cr6 */
+ bf 27, .Ldone_return_value
+ stfd %f2, 8(%r30)
+ b .Ldone_return_value
+ .Lfloat_return_value:
+ stfs %f1, 0(%r30)
+ b .Ldone_return_value
++
++.Lstruct_return_value:
++ bf 29, .Lsmall_struct
++ bf 28, .Lfloat_homog_return_value
++ stfd %f1, 0(%r30)
++ stfd %f2, 8(%r30)
++ stfd %f3, 16(%r30)
++ stfd %f4, 24(%r30)
++ stfd %f5, 32(%r30)
++ stfd %f6, 40(%r30)
++ stfd %f7, 48(%r30)
++ stfd %f8, 56(%r30)
++ b .Ldone_return_value
++
++.Lfloat_homog_return_value:
++ stfs %f1, 0(%r30)
++ stfs %f2, 4(%r30)
++ stfs %f3, 8(%r30)
++ stfs %f4, 12(%r30)
++ stfs %f5, 16(%r30)
++ stfs %f6, 20(%r30)
++ stfs %f7, 24(%r30)
++ stfs %f8, 28(%r30)
++ b .Ldone_return_value
++
++.Lsmall_struct:
++ std %r3, 0(%r30)
++ std %r4, 8(%r30)
++ b .Ldone_return_value
++
+ .LFE1:
+ .long 0
+ .byte 0,12,0,1,128,4,0,0
++# if _CALL_ELF == 2
++ .size ffi_call_LINUX64,.-ffi_call_LINUX64
++# else
++# ifdef _CALL_LINUX
++ .size ffi_call_LINUX64,.-.L.ffi_call_LINUX64
++# else
+ .size .ffi_call_LINUX64,.-.ffi_call_LINUX64
++# endif
++# endif
+
+ .section .eh_frame,EH_FRAME_FLAGS,@progbits
+ .Lframe1:
+ .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry
+ .LSCIE1:
+ .4byte 0x0 # CIE Identifier Tag
+ .byte 0x1 # CIE Version
+ .ascii "zR\0" # CIE Augmentation
+@@ -175,13 +248,13 @@ 2:
+ .byte 0x9e # DW_CFA_offset, column 0x1e
+ .uleb128 0x2
+ .byte 0x9d # DW_CFA_offset, column 0x1d
+ .uleb128 0x3
+ .byte 0x9c # DW_CFA_offset, column 0x1c
+ .uleb128 0x4
+ .align 3
+ .LEFDE1:
++
++# if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2
++ .section .note.GNU-stack,"",@progbits
++# endif
+ #endif
+-
+-#if defined __ELF__ && defined __linux__
+- .section .note.GNU-stack,"",@progbits
+-#endif
+diff --git a/js/src/ctypes/libffi/src/powerpc/linux64_closure.S b/js/src/ctypes/libffi/src/powerpc/linux64_closure.S
+--- a/js/src/ctypes/libffi/src/powerpc/linux64_closure.S
++++ b/js/src/ctypes/libffi/src/powerpc/linux64_closure.S
+@@ -27,179 +27,332 @@
+ #define LIBFFI_ASM
+ #include <fficonfig.h>
+ #include <ffi.h>
+
+ .file "linux64_closure.S"
+
+ #ifdef __powerpc64__
+ FFI_HIDDEN (ffi_closure_LINUX64)
+- FFI_HIDDEN (.ffi_closure_LINUX64)
+- .globl ffi_closure_LINUX64, .ffi_closure_LINUX64
++ .globl ffi_closure_LINUX64
++# if _CALL_ELF == 2
++ .text
++ffi_closure_LINUX64:
++ addis %r2, %r12, .TOC.-ffi_closure_LINUX64@ha
++ addi %r2, %r2, .TOC.-ffi_closure_LINUX64@l
++ .localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64
++# else
+ .section ".opd","aw"
+ .align 3
+ ffi_closure_LINUX64:
++# ifdef _CALL_LINUX
++ .quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0
++ .type ffi_closure_LINUX64,@function
++ .text
++.L.ffi_closure_LINUX64:
++# else
++ FFI_HIDDEN (.ffi_closure_LINUX64)
++ .globl .ffi_closure_LINUX64
+ .quad .ffi_closure_LINUX64,.TOC.@tocbase,0
+ .size ffi_closure_LINUX64,24
+ .type .ffi_closure_LINUX64,@function
+ .text
+ .ffi_closure_LINUX64:
++# endif
++# endif
++
++# if _CALL_ELF == 2
++# 32 byte special reg save area + 64 byte parm save area and retval
++# + 13*8 fpr save area + round to 16
++# define STACKFRAME 208
++# define PARMSAVE 32
++# No parameter save area is needed for the call to ffi_closure_helper_LINUX64,
++# so return value can start there.
++# define RETVAL PARMSAVE
++# else
++# 48 bytes special reg save area + 64 bytes parm save area
++# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
++# define STACKFRAME 240
++# define PARMSAVE 48
++# define RETVAL PARMSAVE+64
++# endif
++
+ .LFB1:
+- # save general regs into parm save area
+- std %r3, 48(%r1)
+- std %r4, 56(%r1)
+- std %r5, 64(%r1)
+- std %r6, 72(%r1)
++# if _CALL_ELF == 2
++ ld %r12, FFI_TRAMPOLINE_SIZE(%r11) # closure->cif
+ mflr %r0
++ lwz %r12, 28(%r12) # cif->flags
++ mtcrf 0x40, %r12
++ addi %r12, %r1, PARMSAVE
++ bt 7, .Lparmsave
++ # Our caller has not allocated a parameter save area.
++ # We need to allocate one here and use it to pass gprs to
++ # ffi_closure_helper_LINUX64. The return value area will do.
++ addi %r12, %r1, -STACKFRAME+RETVAL
++.Lparmsave:
++ std %r0, 16(%r1)
++ # Save general regs into parm save area
++ std %r3, 0(%r12)
++ std %r4, 8(%r12)
++ std %r5, 16(%r12)
++ std %r6, 24(%r12)
++ std %r7, 32(%r12)
++ std %r8, 40(%r12)
++ std %r9, 48(%r12)
++ std %r10, 56(%r12)
+
+- std %r7, 80(%r1)
+- std %r8, 88(%r1)
+- std %r9, 96(%r1)
+- std %r10, 104(%r1)
++ # load up the pointer to the parm save area
++ mr %r5, %r12
++# else
++ mflr %r0
++ # Save general regs into parm save area
++ # This is the parameter save area set up by our caller.
++ std %r3, PARMSAVE+0(%r1)
++ std %r4, PARMSAVE+8(%r1)
++ std %r5, PARMSAVE+16(%r1)
++ std %r6, PARMSAVE+24(%r1)
++ std %r7, PARMSAVE+32(%r1)
++ std %r8, PARMSAVE+40(%r1)
++ std %r9, PARMSAVE+48(%r1)
++ std %r10, PARMSAVE+56(%r1)
++
+ std %r0, 16(%r1)
+
+- # mandatory 48 bytes special reg save area + 64 bytes parm save area
+- # + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
+- stdu %r1, -240(%r1)
++ # load up the pointer to the parm save area
++ addi %r5, %r1, PARMSAVE
++# endif
++
++ # next save fpr 1 to fpr 13
++ stfd %f1, -104+(0*8)(%r1)
++ stfd %f2, -104+(1*8)(%r1)
++ stfd %f3, -104+(2*8)(%r1)
++ stfd %f4, -104+(3*8)(%r1)
++ stfd %f5, -104+(4*8)(%r1)
++ stfd %f6, -104+(5*8)(%r1)
++ stfd %f7, -104+(6*8)(%r1)
++ stfd %f8, -104+(7*8)(%r1)
++ stfd %f9, -104+(8*8)(%r1)
++ stfd %f10, -104+(9*8)(%r1)
++ stfd %f11, -104+(10*8)(%r1)
++ stfd %f12, -104+(11*8)(%r1)
++ stfd %f13, -104+(12*8)(%r1)
++
++ # load up the pointer to the saved fpr registers */
++ addi %r6, %r1, -104
++
++ # load up the pointer to the result storage
++ addi %r4, %r1, -STACKFRAME+RETVAL
++
++ stdu %r1, -STACKFRAME(%r1)
+ .LCFI0:
+
+- # next save fpr 1 to fpr 13
+- stfd %f1, 128+(0*8)(%r1)
+- stfd %f2, 128+(1*8)(%r1)
+- stfd %f3, 128+(2*8)(%r1)
+- stfd %f4, 128+(3*8)(%r1)
+- stfd %f5, 128+(4*8)(%r1)
+- stfd %f6, 128+(5*8)(%r1)
+- stfd %f7, 128+(6*8)(%r1)
+- stfd %f8, 128+(7*8)(%r1)
+- stfd %f9, 128+(8*8)(%r1)
+- stfd %f10, 128+(9*8)(%r1)
+- stfd %f11, 128+(10*8)(%r1)
+- stfd %f12, 128+(11*8)(%r1)
+- stfd %f13, 128+(12*8)(%r1)
+-
+- # set up registers for the routine that actually does the work
+ # get the context pointer from the trampoline
+- mr %r3, %r11
+-
+- # now load up the pointer to the result storage
+- addi %r4, %r1, 112
+-
+- # now load up the pointer to the parameter save area
+- # in the previous frame
+- addi %r5, %r1, 240 + 48
+-
+- # now load up the pointer to the saved fpr registers */
+- addi %r6, %r1, 128
++ mr %r3, %r11
+
+ # make the call
++# if defined _CALL_LINUX || _CALL_ELF == 2
++ bl ffi_closure_helper_LINUX64
++# else
+ bl .ffi_closure_helper_LINUX64
++# endif
+ .Lret:
+
+ # now r3 contains the return type
+ # so use it to look up in a table
+ # so we know how to deal with each type
+
+ # look up the proper starting point in table
+ # by using return type as offset
++ ld %r0, STACKFRAME+16(%r1)
++ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT
++ bge .Lsmall
+ mflr %r4 # move address of .Lret to r4
+ sldi %r3, %r3, 4 # now multiply return type by 16
+ addi %r4, %r4, .Lret_type0 - .Lret
+- ld %r0, 240+16(%r1)
+ add %r3, %r3, %r4 # add contents of table to table address
+ mtctr %r3
+ bctr # jump to it
+
+ # Each of the ret_typeX code fragments has to be exactly 16 bytes long
+ # (4 instructions). For cache effectiveness we align to a 16 byte boundary
+ # first.
+ .align 4
+
+ .Lret_type0:
+ # case FFI_TYPE_VOID
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ nop
+ # case FFI_TYPE_INT
+- lwa %r3, 112+4(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lwa %r3, RETVAL+0(%r1)
++# else
++ lwa %r3, RETVAL+4(%r1)
++# endif
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_FLOAT
+- lfs %f1, 112+0(%r1)
++ lfs %f1, RETVAL+0(%r1)
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_DOUBLE
+- lfd %f1, 112+0(%r1)
++ lfd %f1, RETVAL+0(%r1)
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_LONGDOUBLE
+- lfd %f1, 112+0(%r1)
++ lfd %f1, RETVAL+0(%r1)
+ mtlr %r0
+- lfd %f2, 112+8(%r1)
++ lfd %f2, RETVAL+8(%r1)
+ b .Lfinish
+ # case FFI_TYPE_UINT8
+- lbz %r3, 112+7(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lbz %r3, RETVAL+0(%r1)
++# else
++ lbz %r3, RETVAL+7(%r1)
++# endif
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_SINT8
+- lbz %r3, 112+7(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lbz %r3, RETVAL+0(%r1)
++# else
++ lbz %r3, RETVAL+7(%r1)
++# endif
+ extsb %r3,%r3
+ mtlr %r0
+ b .Lfinish
+ # case FFI_TYPE_UINT16
+- lhz %r3, 112+6(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lhz %r3, RETVAL+0(%r1)
++# else
++ lhz %r3, RETVAL+6(%r1)
++# endif
+ mtlr %r0
+ .Lfinish:
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_SINT16
+- lha %r3, 112+6(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lha %r3, RETVAL+0(%r1)
++# else
++ lha %r3, RETVAL+6(%r1)
++# endif
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_UINT32
+- lwz %r3, 112+4(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lwz %r3, RETVAL+0(%r1)
++# else
++ lwz %r3, RETVAL+4(%r1)
++# endif
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_SINT32
+- lwa %r3, 112+4(%r1)
++# ifdef __LITTLE_ENDIAN__
++ lwa %r3, RETVAL+0(%r1)
++# else
++ lwa %r3, RETVAL+4(%r1)
++# endif
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_UINT64
+- ld %r3, 112+0(%r1)
++ ld %r3, RETVAL+0(%r1)
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_SINT64
+- ld %r3, 112+0(%r1)
++ ld %r3, RETVAL+0(%r1)
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ # case FFI_TYPE_STRUCT
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+ nop
+ # case FFI_TYPE_POINTER
+- ld %r3, 112+0(%r1)
++ ld %r3, RETVAL+0(%r1)
+ mtlr %r0
+- addi %r1, %r1, 240
++ addi %r1, %r1, STACKFRAME
+ blr
+-# esac
++# case FFI_V2_TYPE_FLOAT_HOMOG
++ lfs %f1, RETVAL+0(%r1)
++ lfs %f2, RETVAL+4(%r1)
++ lfs %f3, RETVAL+8(%r1)
++ b .Lmorefloat
++# case FFI_V2_TYPE_DOUBLE_HOMOG
++ lfd %f1, RETVAL+0(%r1)
++ lfd %f2, RETVAL+8(%r1)
++ lfd %f3, RETVAL+16(%r1)
++ lfd %f4, RETVAL+24(%r1)
++ mtlr %r0
++ lfd %f5, RETVAL+32(%r1)
++ lfd %f6, RETVAL+40(%r1)
++ lfd %f7, RETVAL+48(%r1)
++ lfd %f8, RETVAL+56(%r1)
++ addi %r1, %r1, STACKFRAME
++ blr
++.Lmorefloat:
++ lfs %f4, RETVAL+12(%r1)
++ mtlr %r0
++ lfs %f5, RETVAL+16(%r1)
++ lfs %f6, RETVAL+20(%r1)
++ lfs %f7, RETVAL+24(%r1)
++ lfs %f8, RETVAL+28(%r1)
++ addi %r1, %r1, STACKFRAME
++ blr
++.Lsmall:
++# ifdef __LITTLE_ENDIAN__
++ ld %r3,RETVAL+0(%r1)
++ mtlr %r0
++ ld %r4,RETVAL+8(%r1)
++ addi %r1, %r1, STACKFRAME
++ blr
++# else
++ # A struct smaller than a dword is returned in the low bits of r3
++ # ie. right justified. Larger structs are passed left justified
++ # in r3 and r4. The return value area on the stack will have
++ # the structs as they are usually stored in memory.
++ cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes?
++ neg %r5, %r3
++ ld %r3,RETVAL+0(%r1)
++ blt .Lsmalldown
++ mtlr %r0
++ ld %r4,RETVAL+8(%r1)
++ addi %r1, %r1, STACKFRAME
++ blr
++.Lsmalldown:
++ addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7
++ mtlr %r0
++ sldi %r5, %r5, 3
++ addi %r1, %r1, STACKFRAME
++ srd %r3, %r3, %r5
++ blr
++# endif
++
+ .LFE1:
+ .long 0
+ .byte 0,12,0,1,128,0,0,0
++# if _CALL_ELF == 2
++ .size ffi_closure_LINUX64,.-ffi_closure_LINUX64
++# else
++# ifdef _CALL_LINUX
++ .size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
++# else
+ .size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64
++# endif
++# endif
+
+ .section .eh_frame,EH_FRAME_FLAGS,@progbits
+ .Lframe1:
+ .4byte .LECIE1-.LSCIE1 # Length of Common Information Entry
+ .LSCIE1:
+ .4byte 0x0 # CIE Identifier Tag
+ .byte 0x1 # CIE Version
+ .ascii "zR\0" # CIE Augmentation
+@@ -218,19 +371,19 @@ ffi_closure_LINUX64:
+ .LASFDE1:
+ .4byte .LASFDE1-.Lframe1 # FDE CIE offset
+ .8byte .LFB1-. # FDE initial location
+ .8byte .LFE1-.LFB1 # FDE address range
+ .uleb128 0x0 # Augmentation size
+ .byte 0x2 # DW_CFA_advance_loc1
+ .byte .LCFI0-.LFB1
+ .byte 0xe # DW_CFA_def_cfa_offset
+- .uleb128 240
++ .uleb128 STACKFRAME
+ .byte 0x11 # DW_CFA_offset_extended_sf
+ .uleb128 0x41
+ .sleb128 -2
+ .align 3
+ .LEFDE1:
++
++# if defined __ELF__ && defined __linux__
++ .section .note.GNU-stack,"",@progbits
++# endif
+ #endif
+-
+-#if defined __ELF__ && defined __linux__
+- .section .note.GNU-stack,"",@progbits
+-#endif
+diff --git a/js/src/ctypes/libffi/src/powerpc/ppc_closure.S b/js/src/ctypes/libffi/src/powerpc/ppc_closure.S
+--- a/js/src/ctypes/libffi/src/powerpc/ppc_closure.S
++++ b/js/src/ctypes/libffi/src/powerpc/ppc_closure.S
+@@ -117,53 +117,88 @@ ENTRY(ffi_closure_SYSV)
+ # case FFI_TYPE_INT
+ lwz %r3,112+0(%r1)
+ mtlr %r0
+ .Lfinish:
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_TYPE_FLOAT
++#ifndef __NO_FPRS__
+ lfs %f1,112+0(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
++#else
++ nop
++ nop
++ nop
++#endif
+ blr
+
+ # case FFI_TYPE_DOUBLE
++#ifndef __NO_FPRS__
+ lfd %f1,112+0(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
++#else
++ nop
++ nop
++ nop
++#endif
+ blr
+
+ # case FFI_TYPE_LONGDOUBLE
++#ifndef __NO_FPRS__
+ lfd %f1,112+0(%r1)
+ lfd %f2,112+8(%r1)
+ mtlr %r0
+ b .Lfinish
++#else
++ nop
++ nop
++ nop
++ blr
++#endif
+
+ # case FFI_TYPE_UINT8
++#ifdef __LITTLE_ENDIAN__
++ lbz %r3,112+0(%r1)
++#else
+ lbz %r3,112+3(%r1)
++#endif
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_TYPE_SINT8
++#ifdef __LITTLE_ENDIAN__
++ lbz %r3,112+0(%r1)
++#else
+ lbz %r3,112+3(%r1)
++#endif
+ extsb %r3,%r3
+ mtlr %r0
+ b .Lfinish
+
+ # case FFI_TYPE_UINT16
++#ifdef __LITTLE_ENDIAN__
++ lhz %r3,112+0(%r1)
++#else
+ lhz %r3,112+2(%r1)
++#endif
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_TYPE_SINT16
++#ifdef __LITTLE_ENDIAN__
++ lha %r3,112+0(%r1)
++#else
+ lha %r3,112+2(%r1)
++#endif
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_TYPE_UINT32
+ lwz %r3,112+0(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
+@@ -198,76 +233,99 @@ ENTRY(ffi_closure_SYSV)
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_TYPE_UINT128
+ lwz %r3,112+0(%r1)
+ lwz %r4,112+4(%r1)
+ lwz %r5,112+8(%r1)
+- bl .Luint128
++ b .Luint128
+
+ # The return types below are only used when the ABI type is FFI_SYSV.
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct.
+ lbz %r3,112+0(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 2. Two byte struct.
+ lhz %r3,112+0(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 3. Three byte struct.
+ lwz %r3,112+0(%r1)
++#ifdef __LITTLE_ENDIAN__
++ mtlr %r0
++ addi %r1,%r1,144
++ blr
++#else
+ srwi %r3,%r3,8
+ mtlr %r0
+ b .Lfinish
++#endif
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 4. Four byte struct.
+ lwz %r3,112+0(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 5. Five byte struct.
+ lwz %r3,112+0(%r1)
+ lwz %r4,112+4(%r1)
++#ifdef __LITTLE_ENDIAN__
++ mtlr %r0
++ b .Lfinish
++#else
+ li %r5,24
+ b .Lstruct567
++#endif
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 6. Six byte struct.
+ lwz %r3,112+0(%r1)
+ lwz %r4,112+4(%r1)
++#ifdef __LITTLE_ENDIAN__
++ mtlr %r0
++ b .Lfinish
++#else
+ li %r5,16
+ b .Lstruct567
++#endif
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 7. Seven byte struct.
+ lwz %r3,112+0(%r1)
+ lwz %r4,112+4(%r1)
++#ifdef __LITTLE_ENDIAN__
++ mtlr %r0
++ b .Lfinish
++#else
+ li %r5,8
+ b .Lstruct567
++#endif
+
+ # case FFI_SYSV_TYPE_SMALL_STRUCT + 8. Eight byte struct.
+ lwz %r3,112+0(%r1)
+ lwz %r4,112+4(%r1)
+ mtlr %r0
+ b .Lfinish
+
++#ifndef __LITTLE_ENDIAN__
+ .Lstruct567:
+ subfic %r6,%r5,32
+ srw %r4,%r4,%r5
+ slw %r6,%r3,%r6
+ srw %r3,%r3,%r5
+ or %r4,%r6,%r4
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
++#endif
+
+ .Luint128:
+ lwz %r6,112+12(%r1)
+ mtlr %r0
+ addi %r1,%r1,144
+ blr
+
+ END(ffi_closure_SYSV)
+diff --git a/js/src/ctypes/libffi/src/powerpc/sysv.S b/js/src/ctypes/libffi/src/powerpc/sysv.S
+--- a/js/src/ctypes/libffi/src/powerpc/sysv.S
++++ b/js/src/ctypes/libffi/src/powerpc/sysv.S
+@@ -78,37 +78,41 @@ ENTRY(ffi_call_SYSV)
+ nop
+ lwz %r7,-16-(4*4)(%r28)
+ lwz %r8,-16-(3*4)(%r28)
+ lwz %r9,-16-(2*4)(%r28)
+ lwz %r10,-16-(1*4)(%r28)
+ nop
+ 1:
+
++#ifndef __NO_FPRS__
+ /* Load all the FP registers. */
+ bf- 6,2f
+ lfd %f1,-16-(8*4)-(8*8)(%r28)
+ lfd %f2,-16-(8*4)-(7*8)(%r28)
+ lfd %f3,-16-(8*4)-(6*8)(%r28)
+ lfd %f4,-16-(8*4)-(5*8)(%r28)
+ nop
+ lfd %f5,-16-(8*4)-(4*8)(%r28)
+ lfd %f6,-16-(8*4)-(3*8)(%r28)
+ lfd %f7,-16-(8*4)-(2*8)(%r28)
+ lfd %f8,-16-(8*4)-(1*8)(%r28)
++#endif
+ 2:
+
+ /* Make the call. */
+ bctrl
+
+ /* Now, deal with the return value. */
+ mtcrf 0x01,%r31 /* cr7 */
+ bt- 31,L(small_struct_return_value)
+ bt- 30,L(done_return_value)
++#ifndef __NO_FPRS__
+ bt- 29,L(fp_return_value)
++#endif
+ stw %r3,0(%r30)
+ bf+ 28,L(done_return_value)
+ stw %r4,4(%r30)
+ mtcrf 0x02,%r31 /* cr6 */
+ bf 27,L(done_return_value)
+ stw %r5,8(%r30)
+ stw %r6,12(%r30)
+ /* Fall through... */
+@@ -119,41 +123,38 @@ L(done_return_value):
+ lwz %r31, -4(%r28)
+ mtlr %r9
+ lwz %r30, -8(%r28)
+ lwz %r29,-12(%r28)
+ lwz %r28,-16(%r28)
+ lwz %r1,0(%r1)
+ blr
+
++#ifndef __NO_FPRS__
+ L(fp_return_value):
+ bf 28,L(float_return_value)
+ stfd %f1,0(%r30)
+ mtcrf 0x02,%r31 /* cr6 */
+ bf 27,L(done_return_value)
+ stfd %f2,8(%r30)
+ b L(done_return_value)
+ L(float_return_value):
+ stfs %f1,0(%r30)
+ b L(done_return_value)
++#endif
+
+ L(small_struct_return_value):
+- extrwi %r6,%r31,2,19 /* number of bytes padding = shift/8 */
+- mtcrf 0x02,%r31 /* copy flags to cr[24:27] (cr6) */
+- extrwi %r5,%r31,5,19 /* r5 <- number of bits of padding */
+- subfic %r6,%r6,4 /* r6 <- number of useful bytes in r3 */
+- bf- 25,L(done_return_value) /* struct in r3 ? if not, done. */
+-/* smst_one_register: */
+- slw %r3,%r3,%r5 /* Left-justify value in r3 */
+- mtxer %r6 /* move byte count to XER ... */
+- stswx %r3,0,%r30 /* ... and store that many bytes */
+- bf+ 26,L(done_return_value) /* struct in r3:r4 ? */
+- add %r6,%r6,%r30 /* adjust pointer */
+- stswi %r4,%r6,4 /* store last four bytes */
+- b L(done_return_value)
++ /*
++ * The C code always allocates a properly-aligned 8-byte bounce
++ * buffer to make this assembly code very simple. Just write out
++ * r3 and r4 to the buffer to allow the C code to handle the rest.
++ */
++ stw %r3, 0(%r30)
++ stw %r4, 4(%r30)
++ b L(done_return_value)
+
+ .LFE1:
+ END(ffi_call_SYSV)
+
+ .section ".eh_frame",EH_FRAME_FLAGS,@progbits
+ .Lframe1:
+ .4byte .LECIE1-.LSCIE1 /* Length of Common Information Entry */
+ .LSCIE1:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mozilla-xpcom-ppc64le.patch Thu Jan 02 21:56:34 2014 +0100
@@ -0,0 +1,407 @@
+# HG changeset patch
+# Parent 5f1384375f10a5f0ee7c8288adef9593822e4e68
+# User Ulrich Weigand <uweigand@de.ibm.com>
+PPC64 LE support for XPCOM
+
+diff --git a/xpcom/reflect/xptcall/src/md/unix/Makefile.in b/xpcom/reflect/xptcall/src/md/unix/Makefile.in
+--- a/xpcom/reflect/xptcall/src/md/unix/Makefile.in
++++ b/xpcom/reflect/xptcall/src/md/unix/Makefile.in
+@@ -161,17 +161,17 @@ endif
+ ifneq (,$(filter Linuxpowerpc FreeBSDpowerpc,$(OS_ARCH)$(OS_TEST)))
+ ASFILES := xptcinvoke_asm_ppc_linux.s xptcstubs_asm_ppc_linux.s
+ AS := $(CC) -c -x assembler-with-cpp
+ endif
+
+ #
+ # Linux/PPC64
+ #
+-ifneq (,$(filter Linuxpowerpc64 FreeBSDpowerpc64,$(OS_ARCH)$(OS_TEST)))
++ifneq (,$(filter Linuxpowerpc64 Linuxpowerpc64le FreeBSDpowerpc64,$(OS_ARCH)$(OS_TEST)))
+ ASFILES := xptcinvoke_asm_ppc64_linux.s xptcstubs_asm_ppc64_linux.s
+ AS := $(CC) -c -x assembler-with-cpp
+ endif
+
+ #
+ # NetBSD/PPC
+ #
+ ifneq (,$(filter NetBSDmacppc NetBSDbebox NetBSDofppc NetBSDprep NetBSDamigappc,$(OS_ARCH)$(OS_TEST)))
+diff --git a/xpcom/reflect/xptcall/src/md/unix/moz.build b/xpcom/reflect/xptcall/src/md/unix/moz.build
+--- a/xpcom/reflect/xptcall/src/md/unix/moz.build
++++ b/xpcom/reflect/xptcall/src/md/unix/moz.build
+@@ -186,16 +186,23 @@ if CONFIG['OS_TEST'] == 'powerpc':
+
+ if CONFIG['OS_TEST'] == 'powerpc64':
+ if CONFIG['OS_ARCH'] in ('Linux', 'FreeBSD'):
+ SOURCES += [
+ 'xptcinvoke_ppc64_linux.cpp',
+ 'xptcstubs_ppc64_linux.cpp',
+ ]
+
++if CONFIG['OS_TEST'] == 'powerpc64le':
++ if CONFIG['OS_ARCH'] == 'Linux':
++ CPP_SOURCES += [
++ 'xptcinvoke_ppc64_linux.cpp',
++ 'xptcstubs_ppc64_linux.cpp',
++ ]
++
+ if CONFIG['OS_TEST'] in ('macppc', 'bebox', 'ofppc', 'prep', 'amigappc'):
+ if CONFIG['OS_ARCH'] == 'NetBSD':
+ SOURCES += [
+ 'xptcinvoke_ppc_netbsd.cpp',
+ 'xptcstubs_ppc_netbsd.cpp',
+ ]
+
+ if CONFIG['OS_ARCH'] == 'OpenBSD' and CONFIG['OS_TEST'] == 'powerpc':
+diff --git a/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_asm_ppc64_linux.s b/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_asm_ppc64_linux.s
+--- a/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_asm_ppc64_linux.s
++++ b/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_asm_ppc64_linux.s
+@@ -12,33 +12,51 @@
+ .set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
+ .set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
+ .set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
+ .set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
+ .set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
+ .set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
+ .set f30,30; .set f31,31
+
++#if _CALL_ELF == 2
++#define STACK_TOC 28
++#define STACK_PARAMS 96
++#else
++#define STACK_TOC 40
++#define STACK_PARAMS 112
++#endif
+
+ #
+ # NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
+ # uint32_t paramCount, nsXPTCVariant* params)
+ #
+
++#if _CALL_ELF == 2
++ .section ".text"
++ .type NS_InvokeByIndex,@function
++ .globl NS_InvokeByIndex
++ .align 2
++NS_InvokeByIndex:
++0: addis 2,12,(.TOC.-0b)@ha
++ addi 2,2,(.TOC.-0b)@l
++ .localentry NS_InvokeByIndex,.-NS_InvokeByIndex
++#else
+ .section ".toc","aw"
+ .section ".text"
+ .align 2
+ .globl NS_InvokeByIndex
+ .section ".opd","aw"
+ .align 3
+ NS_InvokeByIndex:
+ .quad .NS_InvokeByIndex,.TOC.@tocbase
+ .previous
+ .type NS_InvokeByIndex,@function
+ .NS_InvokeByIndex:
++#endif
+ mflr 0
+ std 0,16(r1)
+
+ std r29,-24(r1)
+ std r30,-16(r1)
+ std r31,-8(r1)
+
+ mr r29,r3 # Save 'that' in r29
+@@ -51,51 +69,55 @@ NS_InvokeByIndex:
+ # that the stack remains 16-byte aligned.
+ #
+ # | ..128-byte stack frame.. | | 7 GP | 13 FP | 3 NV |
+ # | |(params)........| regs | regs | regs |
+ # (r1)...........(+112)....(+128)
+ # (-23*8).(-16*8).(-3*8)..(r31)
+
+ # +stack frame, -unused stack params, +regs storage, +1 for alignment
+- addi r7,r5,((112/8)-7+7+13+3+1)
++ addi r7,r5,((STACK_PARAMS/8)-7+7+13+3+1)
+ rldicr r7,r7,3,59 # multiply by 8 and mask with ~15
+ neg r7,r7
+ stdux r1,r1,r7
+
+
+ # Call invoke_copy_to_stack(uint64_t* gpregs, double* fpregs,
+ # uint32_t paramCount, nsXPTCVariant* s,
+ # uint64_t* d))
+
+ # r5, r6 are passed through intact (paramCount, params)
+ # r7 (d) has to be r1+112 -- where parameters are passed on the stack.
+ # r3, r4 are above that, easier to address from r31 than from r1
+
+ subi r3,r31,(23*8) # r3 --> GPRS
+ subi r4,r31,(16*8) # r4 --> FPRS
+- addi r7,r1,112 # r7 --> params
++ addi r7,r1,STACK_PARAMS # r7 --> params
+ bl invoke_copy_to_stack
+ nop
+
+ # Set up to invoke function
+
+ ld r9,0(r29) # vtable (r29 is 'that')
+ mr r3,r29 # self is first arg, obviously
+
+ sldi r30,r30,3 # Find function descriptor
+ add r9,r9,r30
+- ld r9,0(r9)
++ ld r12,0(r9)
+
+- ld r0,0(r9) # Actual address from fd.
+- std r2,40(r1) # Save r2 (TOC pointer)
++ std r2,STACK_TOC(r1) # Save r2 (TOC pointer)
+
++#if _CALL_ELF == 2
++ mtctr r12
++#else
++ ld r0,0(r12) # Actual address from fd.
+ mtctr 0
+- ld r11,16(r9) # Environment pointer from fd.
+- ld r2,8(r9) # TOC pointer from fd.
++ ld r11,16(r12) # Environment pointer from fd.
++ ld r2,8(r12) # TOC pointer from fd.
++#endif
+
+ # Load FP and GP registers as required
+ ld r4, -(23*8)(r31)
+ ld r5, -(22*8)(r31)
+ ld r6, -(21*8)(r31)
+ ld r7, -(20*8)(r31)
+ ld r8, -(19*8)(r31)
+ ld r9, -(18*8)(r31)
+@@ -112,21 +134,25 @@ NS_InvokeByIndex:
+ lfd f9, -(8*8)(r31)
+ lfd f10, -(7*8)(r31)
+ lfd f11, -(6*8)(r31)
+ lfd f12, -(5*8)(r31)
+ lfd f13, -(4*8)(r31)
+
+ bctrl # Do it
+
+- ld r2,40(r1) # Load our own TOC pointer
++ ld r2,STACK_TOC(r1) # Load our own TOC pointer
+ ld r1,0(r1) # Revert stack frame
+ ld 0,16(r1) # Reload lr
+ ld 29,-24(r1) # Restore NVGPRS
+ ld 30,-16(r1)
+ ld 31,-8(r1)
+ mtlr 0
+ blr
+
++#if _CALL_ELF == 2
++ .size NS_InvokeByIndex,.-NS_InvokeByIndex
++#else
+ .size NS_InvokeByIndex,.-.NS_InvokeByIndex
++#endif
+
+ # Magic indicating no need for an executable stack
+ .section .note.GNU-stack, "", @progbits ; .previous
+diff --git a/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_ppc64_linux.cpp b/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_ppc64_linux.cpp
+--- a/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_ppc64_linux.cpp
++++ b/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_ppc64_linux.cpp
+@@ -69,17 +69,19 @@ invoke_copy_to_stack(uint64_t* gpregs,
+ else
+ *(double *)d = s->val.d;
+ }
+ else if (!s->IsPtrData() && s->type == nsXPTType::T_FLOAT) {
+ if (i < FPR_COUNT) {
+ fpregs[i] = s->val.f; // if passed in registers, floats are promoted to doubles
+ } else {
+ float *p = (float *)d;
++#ifndef __LITTLE_ENDIAN__
+ p++;
++#endif
+ *p = s->val.f;
+ }
+ }
+ else {
+ if (i < GPR_COUNT)
+ gpregs[i] = tempu64;
+ else
+ *d = tempu64;
+diff --git a/xpcom/reflect/xptcall/src/md/unix/xptcstubs_asm_ppc64_linux.s b/xpcom/reflect/xptcall/src/md/unix/xptcstubs_asm_ppc64_linux.s
+--- a/xpcom/reflect/xptcall/src/md/unix/xptcstubs_asm_ppc64_linux.s
++++ b/xpcom/reflect/xptcall/src/md/unix/xptcstubs_asm_ppc64_linux.s
+@@ -12,28 +12,50 @@
+ .set f0,0; .set f1,1; .set f2,2; .set f3,3; .set f4,4
+ .set f5,5; .set f6,6; .set f7,7; .set f8,8; .set f9,9
+ .set f10,10; .set f11,11; .set f12,12; .set f13,13; .set f14,14
+ .set f15,15; .set f16,16; .set f17,17; .set f18,18; .set f19,19
+ .set f20,20; .set f21,21; .set f22,22; .set f23,23; .set f24,24
+ .set f25,25; .set f26,26; .set f27,27; .set f28,28; .set f29,29
+ .set f30,30; .set f31,31
+
++#if _CALL_ELF == 2
++#define STACK_PARAMS 96
++#else
++#define STACK_PARAMS 112
++#endif
++
++#if _CALL_ELF == 2
++ .section ".text"
++ .type SharedStub,@function
++ .globl SharedStub
++ # Make the symbol hidden so that the branch from the stub does
++ # not go via a PLT. This is not only better for performance,
++ # but may be necessary to avoid linker errors since there is
++ # no place to restore the TOC register in a sibling call.
++ .hidden SharedStub
++ .align 2
++SharedStub:
++0: addis 2,12,(.TOC.-0b)@ha
++ addi 2,2,(.TOC.-0b)@l
++ .localentry SharedStub,.-SharedStub
++#else
+ .section ".text"
+ .align 2
+ .globl SharedStub
+ .section ".opd","aw"
+ .align 3
+
+ SharedStub:
+ .quad .SharedStub,.TOC.@tocbase
+ .previous
+ .type SharedStub,@function
+
+ .SharedStub:
++#endif
+ mflr r0
+
+ std r4, -56(r1) # Save all GPRS
+ std r5, -48(r1)
+ std r6, -40(r1)
+ std r7, -32(r1)
+ std r8, -24(r1)
+ std r9, -16(r1)
+@@ -50,17 +72,17 @@ SharedStub:
+ stfd f5, -128(r1)
+ stfd f4, -136(r1)
+ stfd f3, -144(r1)
+ stfd f2, -152(r1)
+ stfd f1, -160(r1)
+
+ subi r6,r1,56 # r6 --> gprData
+ subi r7,r1,160 # r7 --> fprData
+- addi r5,r1,112 # r5 --> extra stack args
++ addi r5,r1,STACK_PARAMS # r5 --> extra stack args
+
+ std r0, 16(r1)
+
+ stdu r1,-288(r1)
+ # r3 has the 'self' pointer
+ # already
+
+ mr r4,r11 # r4 is methodIndex selector,
+@@ -70,12 +92,16 @@ SharedStub:
+ bl PrepareAndDispatch
+ nop
+
+ ld 1,0(r1) # restore stack
+ ld r0,16(r1) # restore LR
+ mtlr r0
+ blr
+
++#if _CALL_ELF == 2
++ .size SharedStub,.-SharedStub
++#else
+ .size SharedStub,.-.SharedStub
++#endif
+
+ # Magic indicating no need for an executable stack
+ .section .note.GNU-stack, "", @progbits ; .previous
+diff --git a/xpcom/reflect/xptcall/src/md/unix/xptcstubs_ppc64_linux.cpp b/xpcom/reflect/xptcall/src/md/unix/xptcstubs_ppc64_linux.cpp
+--- a/xpcom/reflect/xptcall/src/md/unix/xptcstubs_ppc64_linux.cpp
++++ b/xpcom/reflect/xptcall/src/md/unix/xptcstubs_ppc64_linux.cpp
+@@ -78,17 +78,19 @@ PrepareAndDispatch(nsXPTCStubBase* self,
+ dp->val.d = fprData[i];
+ else
+ dp->val.d = *(double*) ap;
+ } else if (!param.IsOut() && type == nsXPTType::T_FLOAT) {
+ if (i < FPR_COUNT)
+ dp->val.f = (float) fprData[i]; // in registers floats are passed as doubles
+ else {
+ float *p = (float *)ap;
++#ifndef __LITTLE_ENDIAN__
+ p++;
++#endif
+ dp->val.f = *p;
+ }
+ } else { /* integer type or pointer */
+ if (i < GPR_COUNT)
+ tempu64 = gprData[i];
+ else
+ tempu64 = *ap;
+
+@@ -148,16 +150,53 @@ PrepareAndDispatch(nsXPTCStubBase* self,
+ // Create names would be like:
+ // _ZN14nsXPTCStubBase5Stub1Ev
+ // _ZN14nsXPTCStubBase6Stub12Ev
+ // _ZN14nsXPTCStubBase7Stub123Ev
+ // _ZN14nsXPTCStubBase8Stub1234Ev
+ // etc.
+ // Use assembler directives to get the names right...
+
++#if _CALL_ELF == 2
++# define STUB_ENTRY(n) \
++__asm__ ( \
++ ".section \".text\" \n\t" \
++ ".align 2 \n\t" \
++ ".if "#n" < 10 \n\t" \
++ ".globl _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t" \
++ ".type _ZN14nsXPTCStubBase5Stub"#n"Ev,@function \n\n" \
++"_ZN14nsXPTCStubBase5Stub"#n"Ev: \n\t" \
++ "0: addis 2,12,.TOC.-0b@ha \n\t" \
++ "addi 2,2,.TOC.-0b@l \n\t" \
++ ".localentry _ZN14nsXPTCStubBase5Stub"#n"Ev,.-_ZN14nsXPTCStubBase5Stub"#n"Ev \n\t" \
++ \
++ ".elseif "#n" < 100 \n\t" \
++ ".globl _ZN14nsXPTCStubBase6Stub"#n"Ev \n\t" \
++ ".type _ZN14nsXPTCStubBase6Stub"#n"Ev,@function \n\n" \
++"_ZN14nsXPTCStubBase6Stub"#n"Ev: \n\t" \
++ "0: addis 2,12,.TOC.-0b@ha \n\t" \
++ "addi 2,2,.TOC.-0b@l \n\t" \
++ ".localentry _ZN14nsXPTCStubBase6Stub"#n"Ev,.-_ZN14nsXPTCStubBase6Stub"#n"Ev \n\t" \
++ \
++ ".elseif "#n" < 1000 \n\t" \
++ ".globl _ZN14nsXPTCStubBase7Stub"#n"Ev \n\t" \
++ ".type _ZN14nsXPTCStubBase7Stub"#n"Ev,@function \n\n" \
++"_ZN14nsXPTCStubBase7Stub"#n"Ev: \n\t" \
++ "0: addis 2,12,.TOC.-0b@ha \n\t" \
++ "addi 2,2,.TOC.-0b@l \n\t" \
++ ".localentry _ZN14nsXPTCStubBase7Stub"#n"Ev,.-_ZN14nsXPTCStubBase7Stub"#n"Ev \n\t" \
++ \
++ ".else \n\t" \
++ ".err \"stub number "#n" >= 1000 not yet supported\"\n" \
++ ".endif \n\t" \
++ \
++ "li 11,"#n" \n\t" \
++ "b SharedStub \n" \
++);
++#else
+ # define STUB_ENTRY(n) \
+ __asm__ ( \
+ ".section \".toc\",\"aw\" \n\t" \
+ ".section \".text\" \n\t" \
+ ".align 2 \n\t" \
+ ".if "#n" < 10 \n\t" \
+ ".globl _ZN14nsXPTCStubBase5Stub"#n"Ev \n\t" \
+ ".section \".opd\",\"aw\" \n\t" \
+@@ -190,16 +229,17 @@ PrepareAndDispatch(nsXPTCStubBase* self,
+ \
+ ".else \n\t" \
+ ".err \"stub number "#n" >= 1000 not yet supported\"\n" \
+ ".endif \n\t" \
+ \
+ "li 11,"#n" \n\t" \
+ "b SharedStub \n" \
+ );
++#endif
+
+ #define SENTINEL_ENTRY(n) \
+ nsresult nsXPTCStubBase::Sentinel##n() \
+ { \
+ NS_ERROR("nsXPTCStubBase::Sentinel called"); \
+ return NS_ERROR_NOT_IMPLEMENTED; \
+ }
+