/*
 * RSQ.C
 *
 * calculate r2+eps2
 *
 * function rsq
 *
 * Copyright Jun Makino 1997, 1998
 *
 * Version 1.0 Nov 17 1997
 * Version 1.1 Mar 2 1998
 *
 * Major changes
 * -- test mode added
 *
 */
#include "grape6sim.h"

ULONG rsq( ULONG x, 
	    ULONG y,
	    ULONG z,
	    ULONG eps2,
	   ULONG nbits)
	 
{
  ULONG x2, y2, z2, xpy,zpe;
  x2 = mult(x,x,nbits,nbits,nbits);
  y2 = mult(y,y,nbits,nbits,nbits);
  z2 = mult(z,z,nbits,nbits,nbits);
  xpy = add(x2,y2,nbits,nbits,nbits);
  zpe = add(z2,eps2,nbits,nbits,nbits);
  return add(xpy,zpe,nbits,nbits,nbits);
}
ULONG rsq_with_test( ULONG x, 
		     ULONG y,
		     ULONG z,
		     ULONG eps2,
		     ULONG nbits,
		     ULONG testmode)
	 
{
  ULONG x2, y2, z2, xpy,zpe;
  if (testmode == 0){
      x2 = mult(x,x,nbits,nbits,nbits);
      y2 = mult(y,y,nbits,nbits,nbits);
      z2 = mult(z,z,nbits,nbits,nbits);
  }else{
      x2 = x;
      y2 = y;
      z2 = z;
  }
  xpy = add(x2,y2,nbits,nbits,nbits);
  zpe = add(z2,eps2,nbits,nbits,nbits);
  return add(xpy,zpe,nbits,nbits,nbits);
}
ULONG inner_product( ULONG v1[3], 
	    ULONG v2[3],
	    ULONG v1len,
	    ULONG v2len,
	   ULONG outlen)
	 
{
  ULONG product, sum;
  int k;
  for(k=0;k<3;k++){
    product = mult(v1[k],v2[k],v1len, v2len, outlen);
    dprintf(5, "(inner product) k, v1, v2 = %ld %lx %lx\n", k, v1[k], v2[k]);
    dprintf(5, "(inner product) v1, v2, product = %le %le %le\n",
	    convert_grape_float_to_double(v1[k], v1len),
	    convert_grape_float_to_double(v2[k], v2len),
	    convert_grape_float_to_double(product, outlen) );
    if(k==0){
      sum = product;
    }else{
	dprintf(5, "(inner product) product, sum before add =  %lx %lx %le\n",
		product, sum,
	  convert_grape_float_to_double(sum, outlen));
      sum = add(sum, product, outlen, outlen, outlen);
	dprintf(5, "(inner product) sum after add =  %lx %le\n", sum,
	  convert_grape_float_to_double(sum, outlen));
    }
  dprintf(5, "(inner product) sum =  %lx %le\n", sum,
	  convert_grape_float_to_double(sum, outlen));
  }
  return sum;
}
#ifdef TEST
main()
{
  ULONG  ix, iy, iz, ieps2,  ir2, nbits, err ;
  double x, y, z, eps2, r2, r2exact;
  set_debug_level(4);
  printf("enter x, y, z, eps2, nbits: ");
  scanf("%le%le%le%le%ld",&x, &y, &z, &eps2, &nbits);
  ix = convert_double_to_grape_float(x, nbits);
  iy = convert_double_to_grape_float(y, nbits);
  iz = convert_double_to_grape_float(z, nbits);
  ieps2 = convert_double_to_grape_float(eps2, nbits);
  ir2 =  rsq(ix, iy, iz, ieps2, nbits);
  r2exact   = x*x + y*y + z*z + eps2;
  r2 = convert_grape_float_to_double(ir2, nbits);
  if(r2exact != 0.0){
    err = (r2exact - r2)/r2;
  }else{
    err = (r2exact - r2);
  }
  printf("r2  =  0x%lx %le %le %le\n",
	   ir2, r2, r2exact, err);
}
#endif
#ifdef SYSTEST
main()
{
  ULONG  in1, in2, inb1, inb2, outb, sum, maxbits, ibits ;
  int ntest, i, itest, iscale;
  double in1f, in2f, realsum, gsum, err;
  double errsum, err2sum;
  printf("enter nbits, ntest: ");
  scanf("%ld%d", &maxbits, &ntest);
  inb1 = inb2 = outb;

  for(ibits = 6; ibits <= maxbits; ibits++){
    itest = 0;
    iscale = 2;
    errsum = err2sum = 0.0;
    srand48((long)12345);
      inb1 = inb2 = outb = ibits;
    for(i=0;i<ntest;i++){
      in1f = drand48();
      in2f = drand48();
/*      if(ibits == 10 && (i > 8192))set_debug_level(4);*/
      in1 = convert_double_to_grape_float(in1f, inb1);
      in2 = convert_double_to_grape_float(in2f, inb2);
      dprintf(5,"in1 = %22.16le %lx %le\n",	 in1f,
	      in1, convert_grape_float_to_double(in1, inb1));
      dprintf(5,"in2 = %22.16le %lx %le\n",	 in2f,
	      in2, convert_grape_float_to_double(in2, inb2));
      sum =  add(in1, in2,  inb1, inb2,  outb);
      realsum = in1f+in2f;
      gsum = convert_grape_float_to_double(sum, outb);
      if(realsum != 0.0){
	err = (gsum - realsum)/realsum;
	itest ++;
	errsum += err;
	err2sum += err*err;
	if (fabs(err) > 0.1){
	  printf("in1 = %22.16le %lx %le\n",	 in1f,
		 in1, convert_grape_float_to_double(in1, inb1));
	  printf("in2 = %22.16le %lx %le\n",	 in2f,
		 in2, convert_grape_float_to_double(in2, inb2));
	  printf("outb, sum  = %ld 0x%lx %le %le %le\n",
		 outb, sum, realsum, gsum, err);
	  fflush(stdout);
	}
	
	
      }
      if (itest == iscale){
	printf("nbits, ntest, err, rms error = %d %d %le %le %le %le\n",
	       outb, itest, errsum, err2sum, errsum/itest, sqrt(err2sum/itest));
	  fflush(stdout);
	iscale *= 2;
      }
    }
  }
}

#endif

