From e67697c370e8875f28b2bf62fac25edad7558eca Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq@debian.org>
Date: Thu, 31 Mar 2022 20:44:49 +0800
Subject: MIPS: add Complex support (#698)

---
 src/mips/ffi.c       | 285 ++++++++++++++++++++++++++++++++++++++++-----------
 src/mips/ffitarget.h |   6 ++
 src/mips/n32.S       | 104 ++++++++++++++++---
 src/mips/o32.S       |  65 +++++++++++-
 4 files changed, 380 insertions(+), 80 deletions(-)

(limited to 'src')

diff --git a/src/mips/ffi.c b/src/mips/ffi.c
index 979ca49..77bf3db 100644
--- a/src/mips/ffi.c
+++ b/src/mips/ffi.c
@@ -31,6 +31,7 @@
 
 #include <stdint.h>
 #include <stdlib.h>
+#include <stdio.h>
 
 #ifdef __GNUC__
 #  if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))
@@ -77,22 +78,37 @@ static void ffi_prep_args(char *stack,
 {
   int i;
   void **p_argv;
-  char *argp;
+  char *argp, *argp_f;
   ffi_type **p_arg;
 
+  memset(stack, 0, bytes);
+
 #ifdef FFI_MIPS_N32
   /* If more than 8 double words are used, the remainder go
      on the stack. We reorder stuff on the stack here to 
      support this easily. */
-  if (bytes > 8 * sizeof(ffi_arg))
-    argp = &stack[bytes - (8 * sizeof(ffi_arg))];
+  /* if ret is _Complex long double, args reg shift2, and a0 should holds pointer to rvalue */
+  if (ecif->cif->rtype->type == FFI_TYPE_COMPLEX && ecif->cif->rtype->elements[0]->type == FFI_TYPE_LONGDOUBLE)
+    {
+      if (bytes + 16 > 8 * sizeof(ffi_arg))
+        argp = &stack[bytes - (8 * sizeof(ffi_arg))];
+      else
+        argp = stack;
+      * (unsigned long *) argp = (unsigned long) ecif->rvalue;
+      argp += 16;
+    }
   else
-    argp = stack;
+    {
+      if (bytes > 8 * sizeof(ffi_arg))
+        argp = &stack[bytes - (8 * sizeof(ffi_arg))];
+      else
+        argp = stack;
+    }
 #else
   argp = stack;
 #endif
 
-  memset(stack, 0, bytes);
+  argp_f = argp;
 
 #ifdef FFI_MIPS_N32
   if ( ecif->cif->rstruct_flag != 0 )
@@ -183,6 +199,24 @@ static void ffi_prep_args(char *stack,
 #endif
 		break;
 
+#ifdef FFI_MIPS_N32
+	      case FFI_TYPE_COMPLEX:
+		/* expand from 4+4 to 8+8 if pass with fpr reg */
+		/* argp will wind back to stack when we process all of reg args */
+		/* all var_args passed with gpr, should be expand */
+	        if((*p_arg)->elements[0]->type == FFI_TYPE_FLOAT
+				&& argp>=argp_f
+				&& i < ecif->cif->mips_nfixedargs)
+		  {
+		    *(float *) argp = *(float *)(* p_argv);
+		    argp += z;
+		    char *tmp = (void *) (*p_argv);
+		    *(float *) argp = *(float *)(tmp+4);
+		  }
+		else
+		  memcpy(argp, *p_argv, (*p_arg)->size);
+		break;
+#endif
 	      /* This can only happen with 64bit slots.  */
 	      case FFI_TYPE_FLOAT:
 		*(float *) argp = *(float *)(* p_argv);
@@ -235,6 +269,24 @@ static void ffi_prep_args(char *stack,
    passed in an integer register". This code traverses structure
    definitions and generates the appropriate flags. */
 
+static int
+calc_n32_struct_flags_element(unsigned *flags, ffi_type *e,
+			      unsigned *loc, unsigned *arg_reg)
+{
+  /* Align this object.  */
+  *loc = FFI_ALIGN(*loc, e->alignment);
+  if (e->type == FFI_TYPE_DOUBLE)
+    {
+      /* Already aligned to FFI_SIZEOF_ARG.  */
+      *arg_reg = *loc / FFI_SIZEOF_ARG;
+      if (*arg_reg > 7)
+	return 1;
+      *flags += (FFI_TYPE_DOUBLE << (*arg_reg * FFI_FLAG_BITS));
+    }
+  *loc += e->size;
+  return 0;
+}
+
 static unsigned
 calc_n32_struct_flags(int soft_float, ffi_type *arg,
 		      unsigned *loc, unsigned *arg_reg)
@@ -249,19 +301,16 @@ calc_n32_struct_flags(int soft_float, ffi_type *arg,
 
   while ((e = arg->elements[index]))
     {
-      /* Align this object.  */
-      *loc = FFI_ALIGN(*loc, e->alignment);
-      if (e->type == FFI_TYPE_DOUBLE)
+      if (e->type == FFI_TYPE_COMPLEX)
 	{
-          /* Already aligned to FFI_SIZEOF_ARG.  */
-          *arg_reg = *loc / FFI_SIZEOF_ARG;
-          if (*arg_reg > 7)
-            break;
-	  flags += (FFI_TYPE_DOUBLE << (*arg_reg * FFI_FLAG_BITS));
-          *loc += e->size;
+	  if (calc_n32_struct_flags_element(&flags, e->elements[0], loc, arg_reg))
+	    break;
+	  if (calc_n32_struct_flags_element(&flags, e->elements[0], loc, arg_reg))
+	    break;
 	}
       else
-        *loc += e->size;
+	if (calc_n32_struct_flags_element(&flags, e, loc, arg_reg))
+	  break;
       index++;
     }
   /* Next Argument register at alignment of FFI_SIZEOF_ARG.  */
@@ -273,7 +322,7 @@ calc_n32_struct_flags(int soft_float, ffi_type *arg,
 static unsigned
 calc_n32_return_struct_flags(int soft_float, ffi_type *arg)
 {
-  unsigned flags = 0;
+  unsigned flags;
   unsigned small = FFI_TYPE_SMALLSTRUCT;
   ffi_type *e;
 
@@ -292,33 +341,48 @@ calc_n32_return_struct_flags(int soft_float, ffi_type *arg)
 
   e = arg->elements[0];
 
-  if (e->type == FFI_TYPE_DOUBLE)
-    flags = FFI_TYPE_DOUBLE;
-  else if (e->type == FFI_TYPE_FLOAT)
-    flags = FFI_TYPE_FLOAT;
-
-  if (flags && (e = arg->elements[1]))
+  if (e->type == FFI_TYPE_COMPLEX)
     {
-      if (e->type == FFI_TYPE_DOUBLE)
-	flags += FFI_TYPE_DOUBLE << FFI_FLAG_BITS;
-      else if (e->type == FFI_TYPE_FLOAT)
-	flags += FFI_TYPE_FLOAT << FFI_FLAG_BITS;
-      else 
+      int type = e->elements[0]->type;
+
+      if (type != FFI_TYPE_DOUBLE && type != FFI_TYPE_FLOAT)
 	return small;
 
-      if (flags && (arg->elements[2]))
+      if (arg->elements[1])
 	{
-	  /* There are three arguments and the first two are 
-	     floats! This must be passed the old way. */
+	  /* Two floating point fields with more fields!
+	     This must be passed the old way. */
 	  return small;
 	}
-      if (soft_float)
-	flags += FFI_TYPE_STRUCT_SOFT;
+
+      flags = (type << FFI_FLAG_BITS) + type;
     }
   else
-    if (!flags)
-      return small;
+    {
+      if (e->type != FFI_TYPE_DOUBLE && e->type != FFI_TYPE_FLOAT)
+	return small;
+
+      flags = e->type;
+
+      if (arg->elements[1])
+	{
+	  e = arg->elements[1];
+	  if (e->type != FFI_TYPE_DOUBLE && e->type != FFI_TYPE_FLOAT)
+	    return small;
+
+	  if (arg->elements[2])
+	    {
+	      /* There are three arguments and the first two are
+		 floats! This must be passed the old way. */
+	      return small;
+	    }
 
+	  flags += e->type << FFI_FLAG_BITS;
+	}
+    }
+
+  if (soft_float)
+    flags += FFI_TYPE_STRUCT_SOFT;
   return flags;
 }
 
@@ -335,7 +399,7 @@ static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
    * does not have special handling for floating point args.
    */
 
-  if (cif->rtype->type != FFI_TYPE_STRUCT && cif->abi == FFI_O32)
+  if (cif->rtype->type != FFI_TYPE_STRUCT && cif->rtype->type != FFI_TYPE_COMPLEX && cif->abi == FFI_O32)
     {
       if (cif->nargs > 0 && cif->nargs == nfixedargs)
 	{
@@ -403,7 +467,10 @@ static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
         case FFI_TYPE_STRUCT:
         case FFI_TYPE_FLOAT:
         case FFI_TYPE_DOUBLE:
+        case FFI_TYPE_COMPLEX:
           cif->flags += cif->rtype->type << (FFI_FLAG_BITS * 2);
+	  if (cif->rtype->type == FFI_TYPE_COMPLEX)
+            cif->flags +=  ((*cif->rtype->elements[0]).type) << (FFI_FLAG_BITS * 4);
           break;
 
         case FFI_TYPE_SINT64:
@@ -421,7 +488,6 @@ static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
 #ifdef FFI_MIPS_N32
   /* Set the flags necessary for N32 processing */
   {
-    int type;
     unsigned arg_reg = 0;
     unsigned loc = 0;
     unsigned count = (cif->nargs < 8) ? cif->nargs : 8;
@@ -453,29 +519,14 @@ static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
 
     while (count-- > 0 && arg_reg < 8)
       {
-	type = (cif->arg_types)[index]->type;
+	ffi_type *t = cif->arg_types[index];
 
-	// Pass variadic arguments in integer registers even if they're floats
-	if (soft_float || index >= nfixedargs)
-	  {
-	    switch (type)
-	      {
-	      case FFI_TYPE_FLOAT:
-		type = FFI_TYPE_UINT32;
-		break;
-	      case FFI_TYPE_DOUBLE:
-		type = FFI_TYPE_UINT64;
-		break;
-	      default:
-		break;
-	      }
-	  }
-	switch (type)
+	switch (t->type)
 	  {
 	  case FFI_TYPE_FLOAT:
 	  case FFI_TYPE_DOUBLE:
-	    cif->flags +=
-              ((cif->arg_types)[index]->type << (arg_reg * FFI_FLAG_BITS));
+	    if (!soft_float && index < nfixedargs)
+              cif->flags += t->type << (arg_reg * FFI_FLAG_BITS);
 	    arg_reg++;
 	    break;
           case FFI_TYPE_LONGDOUBLE:
@@ -491,17 +542,71 @@ static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
 		cif->flags +=
 		  (FFI_TYPE_DOUBLE << (arg_reg * FFI_FLAG_BITS));
 		arg_reg++;
+		if (arg_reg >= 8)
+		  continue;
 		cif->flags +=
 		  (FFI_TYPE_DOUBLE << (arg_reg * FFI_FLAG_BITS));
 		arg_reg++;
 	      }
             break;
 
+	  case FFI_TYPE_COMPLEX:
+	    switch (t->elements[0]->type)
+	      {
+	      case FFI_TYPE_LONGDOUBLE:
+		arg_reg = FFI_ALIGN(arg_reg, 2);
+		if (soft_float || index >= nfixedargs)
+		  {
+		    arg_reg += 2;
+		  }
+		else
+		  {
+		    cif->flags +=
+		      (FFI_TYPE_DOUBLE << (arg_reg * FFI_FLAG_BITS));
+		    arg_reg++;
+		    if (arg_reg >= 8)
+		        continue;
+		    cif->flags +=
+		      (FFI_TYPE_DOUBLE << (arg_reg * FFI_FLAG_BITS));
+		    arg_reg++;
+		    if (arg_reg >= 8)
+		        continue;
+		  }
+		/* passthrough */
+	      case FFI_TYPE_FLOAT:
+		// one fpr can only holds one arg even it is single
+		cif->bytes += 16;
+		/* passthrough */
+	      case FFI_TYPE_SINT32:
+	      case FFI_TYPE_UINT32:
+	      case FFI_TYPE_DOUBLE:
+		if (soft_float || index >= nfixedargs)
+		  {
+		    arg_reg += 2;
+		  }
+		else
+		  {
+		    uint32_t type = t->elements[0]->type != FFI_TYPE_LONGDOUBLE? t->elements[0]->type: FFI_TYPE_DOUBLE;
+		    cif->flags +=
+		      (type << (arg_reg * FFI_FLAG_BITS));
+		    arg_reg++;
+		    if (arg_reg >= 8)
+		        continue;
+		    cif->flags +=
+		      (type << (arg_reg * FFI_FLAG_BITS));
+		    arg_reg++;
+		  }
+		break;
+	      default:
+		arg_reg += 2;
+		break;
+	      }
+	    break;
+
 	  case FFI_TYPE_STRUCT:
             loc = arg_reg * FFI_SIZEOF_ARG;
 	    cif->flags += calc_n32_struct_flags(soft_float || index >= nfixedargs,
-						(cif->arg_types)[index],
-						&loc, &arg_reg);
+						t, &loc, &arg_reg);
 	    break;
 
 	  default:
@@ -574,13 +679,40 @@ static ffi_status ffi_prep_cif_machdep_int(ffi_cif *cif, unsigned nfixedargs)
 					      << (4 + (FFI_FLAG_BITS * 8));
 	  }
 	break;
+      case FFI_TYPE_COMPLEX:
+	{
+	  int type = cif->rtype->elements[0]->type;
+
+	  cif->flags += (FFI_TYPE_COMPLEX << (FFI_FLAG_BITS * 8));
+	  if (soft_float || (type != FFI_TYPE_FLOAT && type != FFI_TYPE_DOUBLE && type != FFI_TYPE_LONGDOUBLE))
+	    {
+	      switch (type)
+		{
+		case FFI_TYPE_DOUBLE:
+		case FFI_TYPE_SINT64:
+		case FFI_TYPE_UINT64:
+		case FFI_TYPE_INT:
+		  type = FFI_TYPE_SMALLSTRUCT2;
+		  break;
+		default:
+		  type = FFI_TYPE_SMALLSTRUCT;
+		}
+	      cif->flags += type << (4 + (FFI_FLAG_BITS * 8));
+	    }
+	  else
+	    {
+	      //cif->flags += (type + (type << FFI_FLAG_BITS))
+		//	    << (4 + (FFI_FLAG_BITS * 8));
+	      cif->flags += type << (4 + (FFI_FLAG_BITS * 8));
+	    }
+	  break;
+	}
       default:
 	cif->flags += FFI_TYPE_INT << (FFI_FLAG_BITS * 8);
 	break;
       }
   }
 #endif
-  
   return FFI_OK;
 }
 
@@ -618,7 +750,7 @@ void ffi_call_int(ffi_cif *cif, void (*fn)(void), void *rvalue,
   /* value address then we need to make one		        */
   
   if ((rvalue == NULL) && 
-      (cif->rtype->type == FFI_TYPE_STRUCT))
+      (cif->rtype->type == FFI_TYPE_STRUCT || cif->rtype->type == FFI_TYPE_COMPLEX))
     ecif.rvalue = alloca(cif->rtype->size);
   else
     ecif.rvalue = rvalue;
@@ -830,6 +962,11 @@ ffi_closure_mips_inner_O32 (ffi_cif *cif,
       argn = 1;
       seen_int = 1;
     }
+  if ((cif->flags >> (FFI_FLAG_BITS * 2)) == FFI_TYPE_COMPLEX)
+    {
+      rvalue = fpr;
+      argn = 1;
+    }
 
   i = 0;
   avn = cif->nargs;
@@ -902,6 +1039,9 @@ ffi_closure_mips_inner_O32 (ffi_cif *cif,
     }
   else
     {
+      if (cif->rtype->type == FFI_TYPE_COMPLEX) {
+          __asm__ volatile ("move $v1, %0" : : "r"(cif->rtype->size));
+      }
       return cif->rtype->type;
     }
 }
@@ -991,6 +1131,8 @@ ffi_closure_mips_inner_N32 (ffi_cif *cif,
 #endif
       argn = 1;
     }
+  if (cif->rtype->type == FFI_TYPE_COMPLEX && cif->rtype->elements[0]->type == FFI_TYPE_LONGDOUBLE)
+    argn = 2;
 
   i = 0;
   avn = cif->nargs;
@@ -1015,6 +1157,31 @@ ffi_closure_mips_inner_N32 (ffi_cif *cif,
 #endif
             avaluep[i] = (char *) argp;
         }
+      else if (arg_types[i]->type == FFI_TYPE_COMPLEX && arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
+        {
+          argp = (argn >= 8 || i >= cif->mips_nfixedargs || soft_float) ? ar + argn : fpr + argn;
+          avaluep[i] = (char *) argp;
+        }
+      else if (arg_types[i]->type == FFI_TYPE_COMPLEX && arg_types[i]->elements[0]->type == FFI_TYPE_LONGDOUBLE)
+        {
+	  /* align long double */
+	  argn += ((argn & 0x1)? 1 : 0);
+          argp = (argn >= 8 || i >= cif->mips_nfixedargs || soft_float) ? ar + argn : fpr + argn;
+          avaluep[i] = (char *) argp;
+        }
+      else if (arg_types[i]->type == FFI_TYPE_COMPLEX && arg_types[i]->elements[0]->type == FFI_TYPE_FLOAT)
+        {
+          if (argn >= 8 || i >= cif->mips_nfixedargs || soft_float)
+	     argp = ar + argn;
+	  else
+	    {
+	      argp = fpr + argn;
+	      /* the normal args for function holds 8bytes, while here we convert it to ptr */
+	      uint32_t *tmp = (uint32_t *)argp;
+	      tmp[1] = tmp[2];
+	    }
+          avaluep[i] = (char *) argp;
+        }
       else
         {
           unsigned type = arg_types[i]->type;
diff --git a/src/mips/ffitarget.h b/src/mips/ffitarget.h
index fdd5ca9..61d04f9 100644
--- a/src/mips/ffitarget.h
+++ b/src/mips/ffitarget.h
@@ -80,6 +80,7 @@
 #  endif
 #endif
 
+#define FFI_TARGET_HAS_COMPLEX_TYPE 1
 #define FFI_FLAG_BITS 2
 
 /* SGI's strange assembler requires that we multiply by 4 rather 
@@ -111,6 +112,11 @@
 #define FFI_TYPE_STRUCT_SMALL  93
 #define FFI_TYPE_STRUCT_SMALL2 109
 
+#define FFI_TYPE_COMPLEX_II    95
+#define FFI_TYPE_COMPLEX_FF    47
+#define FFI_TYPE_COMPLEX_DD    63
+#define FFI_TYPE_COMPLEX_LDLD  79
+
 /* and for n32 soft float, add 16 * 2^4 */
 #define FFI_TYPE_STRUCT_D_SOFT      317
 #define FFI_TYPE_STRUCT_F_SOFT      301
diff --git a/src/mips/n32.S b/src/mips/n32.S
index 23b77fd..f9bfa5a 100644
--- a/src/mips/n32.S
+++ b/src/mips/n32.S
@@ -114,6 +114,16 @@ loadregs:
 
 	REG_L	t6, 3*FFI_SIZEOF_ARG($fp)  # load the flags word into t6.
 
+	# when retval is _Complex long double, $f12/$a0, $f13/$a1 will be skipped
+	# no idea why, but gcc does it.
+	SRL	t4, t6, 8*FFI_FLAG_BITS
+	move	t8, t6
+	bne	t4, FFI_TYPE_COMPLEX_LDLD, loadregs1
+
+	SLL	t8, t6, 2*FFI_FLAG_BITS
+
+
+loadregs1:
 #ifdef __mips_soft_float
 	REG_L	a0, 0*FFI_SIZEOF_ARG(t9)
 	REG_L	a1, 1*FFI_SIZEOF_ARG(t9)
@@ -124,7 +134,7 @@ loadregs:
 	REG_L	a6, 6*FFI_SIZEOF_ARG(t9)
 	REG_L	a7, 7*FFI_SIZEOF_ARG(t9)
 #else
-	and	t4, t6, ((1<<FFI_FLAG_BITS)-1)
+	and	t4, t8, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a0, 0*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg1_next
 	bne	t4, FFI_TYPE_FLOAT, arg1_doublep
@@ -134,7 +144,7 @@ arg1_doublep:
 	l.d	$f12, 0*FFI_SIZEOF_ARG(t9)
 arg1_next:	
 	
-	SRL	t4, t6, 1*FFI_FLAG_BITS
+	SRL	t4, t8, 1*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a1, 1*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg2_next
@@ -145,7 +155,7 @@ arg2_doublep:
 	l.d	$f13, 1*FFI_SIZEOF_ARG(t9)	
 arg2_next:	
 	
-	SRL	t4, t6, 2*FFI_FLAG_BITS
+	SRL	t4, t8, 2*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a2, 2*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg3_next
@@ -156,7 +166,7 @@ arg3_doublep:
 	l.d	$f14, 2*FFI_SIZEOF_ARG(t9)	
 arg3_next:	
 	
-	SRL	t4, t6, 3*FFI_FLAG_BITS
+	SRL	t4, t8, 3*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a3, 3*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg4_next
@@ -167,7 +177,7 @@ arg4_doublep:
 	l.d	$f15, 3*FFI_SIZEOF_ARG(t9)	
 arg4_next:	
 	
-	SRL	t4, t6, 4*FFI_FLAG_BITS
+	SRL	t4, t8, 4*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a4, 4*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg5_next
@@ -178,7 +188,7 @@ arg5_doublep:
 	l.d	$f16, 4*FFI_SIZEOF_ARG(t9)	
 arg5_next:	
 	
-	SRL	t4, t6, 5*FFI_FLAG_BITS
+	SRL	t4, t8, 5*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a5, 5*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg6_next
@@ -189,7 +199,7 @@ arg6_doublep:
 	l.d	$f17, 5*FFI_SIZEOF_ARG(t9)	
 arg6_next:	
 	
-	SRL	t4, t6, 6*FFI_FLAG_BITS
+	SRL	t4, t8, 6*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a6, 6*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg7_next
@@ -200,7 +210,7 @@ arg7_doublep:
 	l.d	$f18, 6*FFI_SIZEOF_ARG(t9)	
 arg7_next:	
 	
-	SRL	t4, t6, 7*FFI_FLAG_BITS
+	SRL	t4, t8, 7*FFI_FLAG_BITS
 	and	t4, ((1<<FFI_FLAG_BITS)-1)
 	REG_L	a7, 7*FFI_SIZEOF_ARG(t9)
 	beqz	t4, arg8_next
@@ -212,7 +222,7 @@ arg8_doublep:
 arg8_next:	
 #endif
 
-callit:		
+callit:
 	# Load the function pointer
 	REG_L	t9, 5*FFI_SIZEOF_ARG($fp)
 
@@ -263,16 +273,37 @@ retstruct_f:
 	s.s	$f0, 0(t4)
 	b	epilogue
 	
-retstruct_d_d:	
-	bne	t6, FFI_TYPE_STRUCT_DD, retstruct_f_f
+retstruct_d_d:
+	bne	t6, FFI_TYPE_STRUCT_DD, retcomplex_d_d
+	jal	t9
+	REG_L	t4, 4*FFI_SIZEOF_ARG($fp)
+	s.d	$f0, 0(t4)
+	s.d	$f2, 8(t4)
+	b	epilogue
+
+retcomplex_d_d:
+	bne	t6, FFI_TYPE_COMPLEX_DD, retcomplex_ld_ld
 	jal	t9
 	REG_L	t4, 4*FFI_SIZEOF_ARG($fp)
 	s.d	$f0, 0(t4)
 	s.d	$f2, 8(t4)
 	b	epilogue
+
+retcomplex_ld_ld:
+	bne	t6, FFI_TYPE_COMPLEX_LDLD, retstruct_f_f
+	jal	t9
+	b	epilogue
 	
-retstruct_f_f:	
-	bne	t6, FFI_TYPE_STRUCT_FF, retstruct_d_f
+retstruct_f_f:
+	bne	t6, FFI_TYPE_STRUCT_FF, retcomplex_f_f
+	jal	t9
+	REG_L	t4, 4*FFI_SIZEOF_ARG($fp)
+	s.s	$f0, 0(t4)
+	s.s	$f2, 4(t4)
+	b	epilogue
+
+retcomplex_f_f:
+	bne	t6, FFI_TYPE_COMPLEX_FF, retstruct_d_f
 	jal	t9
 	REG_L	t4, 4*FFI_SIZEOF_ARG($fp)
 	s.s	$f0, 0(t4)
@@ -350,13 +381,20 @@ retstruct_small:
 	b	epilogue
 	
 retstruct_small2:	
-	bne	t6, FFI_TYPE_STRUCT_SMALL2, retstruct
+	bne	t6, FFI_TYPE_STRUCT_SMALL2, retcomplex_i_i
 	jal	t9
 	REG_L	t4, 4*FFI_SIZEOF_ARG($fp)
 	REG_S	v0, 0(t4)
 	REG_S	v1, 8(t4)
 	b	epilogue
 	
+retcomplex_i_i:
+	bne	t6, FFI_TYPE_COMPLEX_II, retstruct
+	jal	t9
+	REG_L	t4, 4*FFI_SIZEOF_ARG($fp)
+	REG_S	v0, 0(t4)
+	b	epilogue
+
 retstruct:	
 noretval:	
 	jal	t9
@@ -403,7 +441,7 @@ epilogue:
 	 */
 
 #define SIZEOF_FRAME2	(20 * FFI_SIZEOF_ARG)
-	
+
 #define A7_OFF2		(19 * FFI_SIZEOF_ARG)
 #define A6_OFF2		(18 * FFI_SIZEOF_ARG)
 #define A5_OFF2		(17 * FFI_SIZEOF_ARG)
@@ -485,10 +523,17 @@ ffi_closure_N32:
 
 	# Call ffi_closure_mips_inner_N32 to do the real work.
 	LA	t9, ffi_closure_mips_inner_N32
+#if _MIPS_SIM==_ABIN32
+	lw	a0, 20($12)   # cif
+	lw	a1, 24($12)   # fun
+	lw	a2, 28($12) # user_data
+#else
 	REG_L	a0, 56($12)   # cif
 	REG_L	a1, 64($12)   # fun
 	REG_L	a2, 72($12) # user_data
+#endif
 	ADDU	a3, $sp, V0_OFF2
+	# FIXME: a4 does work, while if ret is _Complex long double, it will overwrite Fn_OFF2
 	ADDU	a4, $sp, A0_OFF2
 	ADDU	a5, $sp, F12_OFF2
 
@@ -544,17 +589,42 @@ cls_retstruct_f:
 	b	cls_epilogue
 	
 cls_retstruct_d_d:	
-	bne	v0, FFI_TYPE_STRUCT_DD, cls_retstruct_f_f
+	bne	v0, FFI_TYPE_STRUCT_DD, cls_retcomplex_d_d
+	l.d	$f0, V0_OFF2($sp)
+	l.d	$f2, V1_OFF2($sp)
+	b	cls_epilogue
+
+cls_retcomplex_d_d:
+	bne	v0, FFI_TYPE_COMPLEX_DD, cls_retcomplex_ld_ld
 	l.d	$f0, V0_OFF2($sp)
 	l.d	$f2, V1_OFF2($sp)
 	b	cls_epilogue
 	
+cls_retcomplex_ld_ld:
+	bne	v0, FFI_TYPE_COMPLEX_LDLD, cls_retstruct_f_f
+	REG_L	t8, A0_OFF2($sp)
+	REG_L	t9, 16($sp)
+	REG_S	t9, 0(t8)
+	REG_L	t9, 24($sp)
+	REG_S	t9, 8(t8)
+	REG_L	t9, 32($sp)
+	REG_S	t9, 16(t8)
+	REG_L	t9, 40($sp)
+	REG_S	t9, 24(t8)
+	b	cls_epilogue
+
 cls_retstruct_f_f:	
-	bne	v0, FFI_TYPE_STRUCT_FF, cls_retstruct_d_f
+	bne	v0, FFI_TYPE_STRUCT_FF, cls_retcomplex_f_f
 	l.s	$f0, V0_OFF2($sp)
 	l.s	$f2, V1_OFF2($sp)
 	b	cls_epilogue
 	
+cls_retcomplex_f_f:
+	bne	v0, FFI_TYPE_COMPLEX_FF, cls_retstruct_d_f
+	l.s	$f0, V0_OFF2($sp)
+	l.s	$f2, (V0_OFF2+4)($sp)
+	b	cls_epilogue
+
 cls_retstruct_d_f:	
 	bne	v0, FFI_TYPE_STRUCT_DF, cls_retstruct_f_d
 	l.d	$f0, V0_OFF2($sp)
diff --git a/src/mips/o32.S b/src/mips/o32.S
index 799139b..f1a39c5 100644
--- a/src/mips/o32.S
+++ b/src/mips/o32.S
@@ -133,6 +133,7 @@ pass_f_d:
  #	bne	t0, FFI_ARGS_F_D, call_it
 	l.s	$f12, 0*FFI_SIZEOF_ARG($sp)	# load $fp regs from args
 	l.d	$f14, 2*FFI_SIZEOF_ARG($sp)	# passing double and float
+
 #endif
 
 call_it:	
@@ -146,7 +147,8 @@ call_it:
 	REG_L	t1, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	beqz	t1, noretval
 
-	bne     t2, FFI_TYPE_INT, retlonglong
+	and     t1, t2, ((1<<4)-1)
+	bne     t1, FFI_TYPE_INT, retlonglong
 	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	REG_S	v0, 0(t0)
@@ -154,7 +156,7 @@ call_it:
 
 retlonglong:
 	# Really any 64-bit int, signed or not.
-	bne	t2, FFI_TYPE_UINT64, retfloat
+	bne	t1, FFI_TYPE_UINT64, retfloat
 	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 	REG_S	v1, 4(t0)
@@ -162,7 +164,7 @@ retlonglong:
 	b	epilogue
 
 retfloat:
-	bne     t2, FFI_TYPE_FLOAT, retdouble
+	bne     t1, FFI_TYPE_FLOAT, retdouble
 	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 #ifndef __mips_soft_float
@@ -173,7 +175,7 @@ retfloat:
 	b	epilogue
 
 retdouble:	
-	bne	t2, FFI_TYPE_DOUBLE, noretval
+	bne	t1, FFI_TYPE_DOUBLE, retcomplex
 	jalr	t9
 	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
 #ifndef __mips_soft_float
@@ -184,6 +186,48 @@ retdouble:
 #endif
 	b	epilogue
 	
+retcomplex:
+	# mask out the complex elements type.
+	# the struct of flags (bits):
+	# 0-1: arg0
+	# 2-3: arg1
+	# 4-7: return type
+	# 8-11: rtype elements type: for complex
+	# Note here: t2 is flags>>4
+	bne	t1, FFI_TYPE_COMPLEX, noretval
+	jalr	t9
+	REG_L	t0, SIZEOF_FRAME + 4*FFI_SIZEOF_ARG($fp)
+	REG_L	t1, A3_OFF($fp)		# load the flags word
+	SRL	t1, t1, 8
+	li	t3, 3
+	beq	t1, t3, 3f # double
+	li	t3, 2
+	beq	t1, t3, 2f # float
+	# FIXME: long double
+	slti	t3, t1, 5
+	beqz	t3, 5f     # (u)int8/16/32/64
+2:
+#ifndef __mips_soft_float
+	s.s	$f0, 0(t0)
+	s.s	$f2, 4(t0)
+#else
+	FIXME
+#endif
+	b	epilogue
+3:
+#ifndef __mips_soft_float
+	s.d	$f0, 0(t0)
+	s.d	$f2, 8(t0)
+#else
+	FIXME
+#endif
+	b	epilogue
+
+5:
+	REG_S	v1, 4(t0)
+	REG_S	v0, 0(t0)
+	b	epilogue
+
 noretval:	
 	jalr	t9
 	
@@ -378,6 +422,19 @@ $do_closure:
 	li	$9, FFI_TYPE_DOUBLE
 	l.d	$f0, V0_OFF2($fp)
 	beq	$8, $9, closure_done
+
+	li      $9, FFI_TYPE_COMPLEX
+	bne     $8, $9, 1f
+
+	li      $9, 8
+	l.s	$f0, V0_OFF2($fp)
+	l.s	$f2, V1_OFF2($fp)
+	beq     $3, $9, closure_done
+
+	li      $9, 16
+	l.d	$f0, V0_OFF2($fp)
+	l.d	$f2, (V0_OFF2+8)($fp)
+	beq     $3, $9, closure_done
 #endif
 1:	
 	REG_L	$3, V1_OFF2($fp)
-- 
cgit v1.2.1