/*
 * G6CHIP.C
 *
 * full simulator of a single chip with external memory unit
 *
 * Copyright Jun Makino 1997
 *
 * Version 1.0 98/02/05
 * Version 1.1 98/04/26
 *   routines to generate test vectors added 
 *
 *

 Note: Guide to setting xunit, tunit, xscale etc...
       xunit, tunit: the location of the binary point
                     for the fixed point format, counted
		     from LSB. 2^(63-xunit)gives the max value
		     for position. For systems with the standard
		     unit, xunit=tunit=54 should work fine
       fscale: -fscale+xunit*2-512 is used as the argument
               for ldexp. If the force is around 2^k, the argument
	       for ldexp should be k-(48-56), which means
	       fscale = (48-56)-512+xunit*2-k ??

       jscale: similarly,
	       jscale = (48-56)-512+xunit*3-k ??
       pscale: similarly,
	       pscale = (48-56)-512+xunit-k ??
       
 */
#include "grape6sim.h"

#define G6CHIP_BODY

#include "g6chip.h"
#include "g6control.h"


#define NMAX 100000

static LONG tunit;
static LONG xunit;
static ULONG iti;
static int njp;
static int nip;
double xscale2, xscaleinv;

static struct jparticle jpmem[NMAX];
static struct predicted_particle predmem[NMAX];
static struct iparticle ipmem[NMAX];

static struct chip_control_struct g6cu;

FILE * ftestout;
static int dump_mode = 0;

static ULONG global_rscale;





int getnpipe()
{
  return NPIPEPERCHIP;
}
int g6_getnpipe_()
{
  return 16;/* THIS IS ONLY FOR TEST BY NAMURA */
}

void set_tunit(int newtunit)
{
  tunit = newtunit;
}

void g6_set_tunit_(int * newtunit)
{
  set_tunit(*newtunit);
}

void set_njp(int n)
{
  njp = n;
}

void g6_set_njp_(int * n)
{
  set_njp(*n);
}

void  g6_set_debug_level_(int * i)
{
  set_debug_level(*i);
}

void set_xunit(int newxunit)
{
  dprintf(3,"(set_xunit) arg = %ld\n", newxunit);
  xunit = newxunit;
  xscale2 = (ULONG_ONE)<<((int)xunit);
  xscaleinv = 1.0/xscale2;
  xscale2 *= xscale2;
}
void g6_set_xunit_(int * pxunit)
{
  set_xunit(*pxunit);
}


void  adjust_scaling_parameters(struct iparticle * inreg)
{
  inreg->fscale += xunit*2 - 512;
  inreg->jscale += xunit*2+tunit - 512;
  inreg->phiscale += xunit - 512;
}
  


void g6_set_i_xv_(int *address,
				 double x[3], /* position */
				 double v[3] /* velocity */)
{
  set_i_particle_xv_on_emulator(* address,  x, v);
}

void set_i_particle_xv_on_emulator(int address,
				 double x[3], /* position */
				 double v[3] /* velocity */)
{
  struct iparticle * ip;
  int k;
  ip = ipmem + address;
  /*  printf("set_iparticle address = %d\n", address);*/
  for (k=0;k<3;k++){
    ip->xi[k] = CONVERT_DOUBLE_TO_GRAPE_INT_POS(x[k],xunit);
    ip->vi[k] = convert_double_to_grape_float(ldexp(v[k],(int)(xunit-tunit)),
					      INTERACTION_F_LEN_U); 
  }
}

void g6_set_i_scales_(int *  address,
				       int *fscale,
				       int  *jscale,
				       int  *phiscale)
{
  set_i_particle_scales_on_emulator( *address, *fscale,
				     *jscale, *phiscale);
}




void g6_set_i_parms_(int *  address,
				       double * eps2,
				       double * h2,
				       double *rscale,
				       int *index)
{
  set_i_particle_parms_on_emulator( *address, *eps2,*h2,*rscale, *index);
}

void set_i_particle_parms_on_emulator(int address,
				       double eps2,
				       double h2,
				       double rscale,
				       int index)
{
  struct iparticle * ip;
  ip = ipmem + address;

  ip->eps2 = convert_double_to_grape_float(eps2*xscale2, INTERACTION_F_LEN_U);
  ip->h2 = convert_double_to_grape_float(h2*xscale2, INTERACTION_F_LEN_U);
  ip->rscale = convert_double_to_grape_float(xscaleinv/rscale,CUTOFF_MANTISSA_LEN);
  global_rscale = ip->rscale ;
  ip->index = index;
  
}
void set_i_particle_scales_on_emulator(int address,
				       int fscale,
				       int  jscale,
				       int  phiscale)
{
  struct iparticle * ip;
  ip = ipmem + address;

  ip->fscale = fscale;
  ip->jscale = jscale;
  ip->phiscale = phiscale;
}

static int acc_point  = 50;
static int jerk_point = 27;

void g6_adjust_i_scales_from_real_(int *address,
				       double acc[3],
				       double jerk[3],
				       double *phi)
{
  adjust_i_particle_scales_on_emulator_from_real_value(*address, acc, jerk, *phi);
}

void g6_set_i_scales_from_real_value_(int *address,
				       double acc[3],
				       double jerk[3],
				       double *phi)
{
  set_i_particle_scales_on_emulator_from_real_value(*address, acc, jerk, *phi);
}

void set_i_particle_scales_on_emulator_from_real_value(int address,
				       double acc[3],
				       double jerk[3],
				       double phi)
{
  struct iparticle * ip;
  double amax = fabs(acc[0]);
  double jmax = fabs(jerk[0]);
  int k;
  ip = ipmem + address;
  for(k=1;k<3;k++){
    if(fabs(acc[k])>amax)amax = fabs(acc[k]);
    if(fabs(jerk[k])>jmax)jmax = fabs(jerk[k]);
  }
  frexp(amax, &k);
  ip->fscale = acc_point -k;
  frexp(jmax, &k);
  ip->jscale = jerk_point -k;
  frexp(phi, &k);
  ip->phiscale = acc_point -k;
}
void adjust_i_particle_scales_on_emulator_from_real_value(int address,
				       double acc[3],
				       double jerk[3],
				       double phi)
{
  struct iparticle * ip;
  double amax = fabs(acc[0]);
  double jmax = fabs(jerk[0]);
  int k;
  ip = ipmem + address;
  for(k=1;k<3;k++){
    if(fabs(acc[k])>amax)amax = fabs(acc[k]);
    if(fabs(jerk[k])>jmax)jmax = fabs(jerk[k]);
  }
  frexp(amax, &k);
  if (  ip->fscale > acc_point -k){
      /* preset fscale is too large, immediate adjust */
      ip->fscale = acc_point -k;
  }else if( ip->fscale ==  acc_point -k){
      /* exactly same, not clear what should be done */
      ip->fscale -=10;
  }else{
      /* preset fscale is too small. Incremental change */
      ip->fscale ++;
  }
  frexp(jmax, &k);
  if (  ip->jscale >=jerk_point -k){
      if (  ip->jscale ==jerk_point -k){
	  ip->jscale -= 10;
      }else{
	  ip->jscale = jerk_point -k;
      }
  }else{
      ip->jscale ++;
  }
  frexp(phi, &k);
  if (ip->phiscale > acc_point -k){
      ip->phiscale = acc_point -k;
  }else if (ip->phiscale == acc_point -k){
      ip->phiscale -=10;
      
  }else{
      ip->phiscale ++;
  }
  /*  printf("adjusted scales = %d %d %d\n", ip->fscale, ip->jscale, ip->phiscale);*/
}

  

void get_i_particle_parms_on_emulator(int address,
				       int * fscale,
				       int * jscale,
				       int * phiscale)
{
  struct iparticle * ip;
  ip = ipmem + address;
  *fscale = ip->fscale;
  *jscale = ip->jscale;
  *phiscale = ip->phiscale;
}

static struct gchip fortran_callable_gchip;
int g6_move_i_particles_(int *nparticles,
			     int * index)
{
  return move_i_particles_to_chip(*nparticles, index,&fortran_callable_gchip);
}

int move_i_particles_to_chip(int nparticles,
			     int * index,
			     struct gchip *chip)
{
  int k;
  if (nparticles > NPIPEPERCHIP){
    fprintf(stderr,"(move_i_particles_to_chip) nchip = %d too large\n",
	    nparticles);
    return 1;
  }
  for(k=0;k<nparticles; k++){
    chip->inreg[k] = ipmem[*(index+k)];
    adjust_scaling_parameters(&(chip->inreg[k]));
    dprintf(2,"(move_i_particles) index %ld\n", chip->inreg[k].index);
    dprintf(2,"(move_i_particles) xi %lx %lx %lx\n", chip->inreg[k].xi[0],
	    chip->inreg[k].xi[1], chip->inreg[k].xi[2]);
  }
  nip = nparticles;
  return 0;
}

int g6_set_j_particle_(int *address,
				 int *index,
				 double *tj, /* particle time */
				 double *dtj, /* particle time */
				 double *mass,
				 double a2by18[3], /* a2dot divided by 18 */
				 double a1by6[3], /* a1dot divided by 6 */
				 double aby2[3], /* a divided by 2 */
				 double v[3], /* velocity */
				 double x[3] /* position */)
{
    int retcode = (int) set_j_particle_on_emulator(*address, *index,*tj, *dtj, *mass,
					 a2by18, a1by6, aby2,  v, x);
    put_jpport_data(*address, jpmem + *address, &g6cu);
    return retcode;
}

ULONG set_j_particle_on_emulator(int address,
				 int index,
				 double tj, /* particle time */
				 double dtj, /* particle time */
				 double mass,
				 double a2by18[3], /* a2dot divided by 18 */
				 double a1by6[3], /* a1dot divided by 6 */
				 double aby2[3], /* a divided by 2 */
				 double v[3], /* velocity */
				 double x[3] /* position */)
{
  struct jparticle * jp;
  jp = jpmem + address;
  jp->index = index;
  jp->mass = convert_double_to_grape_float(mass, INTERACTION_F_LEN_U);

  return convert_predictor_vector(&(jp->tjlsb), &(jp->dtjmsb), jp->ix,
				  tj, dtj, x, v, aby2, a1by6, a2by18,
				  xunit, tunit);
}




void set_ti(double ti)
{
  iti = (ULONG) ldexp(ti, (int)tunit);
}

void g6_set_ti_(double *ti)
{
  set_ti(*ti);
}

void get_predictor(int address,
		   LONG ixp[3],
		   ULONG ivp[3],
		   ULONG *mass);


void get_predictor(int address,
		   LONG ixp[3],
		   ULONG ivp[3],
		   ULONG *mass)
{
  struct jparticle * jp;
  int k;
  jp = jpmem + address;
  *mass = jp->mass;
  for(k=0;k<3;k++){
    predict((ULONG*)ixp+k,ULONG_ZERO,iti,jp->tjlsb, jp->dtjmsb, jp->ix[k][4],
	    jp->ix[k][3],jp->ix[k][2],jp->ix[k][1],jp->ix[k][0]);
    predict(ivp+k,ULONG_ONE,iti,jp->tjlsb, jp->dtjmsb, jp->ix[k][4],
	    jp->ix[k][3],jp->ix[k][2],jp->ix[k][1],jp->ix[k][0]);

  }

}


void predict_in_chip()
{
  int i;
  struct predicted_particle * pp;
  
  for(i=0,pp=predmem;i<njp; i++,pp++){
    int k;
    pp->index = jpmem[i].index;
    get_predictor(i, pp->xj,  pp->vj,  &(pp->mass));
    dprintf(1,"(predict_in_chip), i, x %d %lx %lx %lx\n",
	    i, pp->xj[0], pp->xj[1], pp->xj[2]);
    for(k=0;k<3;k++){
      double xp, vp;
      convert_predicted_result(&xp,&vp,pp->xj[k],pp->vj[k],
			       xunit, tunit);
      dprintf(1,"(predict_in_chip), i,k,  x, v %d %d %le %le\n",
	      i, k, xp, vp);
    }
  }
}

void force_pipeline_step(int j,
			 struct gchip *chip,
			 int ipipe,
			 int * nbflag,
			 int clear)
{
    struct predicted_particle * pjp;
    struct iparticle * pip;
    struct pipe_output_register_set * pop;
    ULONG r2, unbflag;
    ULONG flags;
    ULONG accflags[7];
    int k;
    pjp = predmem+j;
    pip = &(chip->inreg[ipipe]);
    pop = &(chip->outreg[ipipe]);

	/*    if(pjp->index != pip->index){*/

    
    flags = force(pjp->xj, pjp->vj, pjp->mass,
		  pip->xi, pip->vi, pip->eps2, pip->h2,
		  pip->rscale, pip->fscale, pip->jscale, pip->phiscale,
		  pjp->index,pip->index,
		  clear,
		  pop->acc,pop->jerk, &(pop->phi), &unbflag, &r2,accflags);
    *nbflag = unbflag;
    
    /* set flags... */
    
    /* first, update nearest particle register */
    if(pjp->index != pip->index){
	if (compare_grape_floats(pop->rnnb, r2, INTERACTION_F_LEN_U)){
	    pop->rnnb = r2;
	    pop->innb = pjp->index;
	}
    }else {
	/* force NB flag to null */
    }
    /* set overflow flags etc... */
    for(k=0;k<3;k++){
	if(k == 0){
	    pop->facc_sum_flag |= accflags[k];
	    pop->jacc_sum_flag |= accflags[k+3];
	}else{
	    pop->facc_sum_flag |= accflags[k]<<(k*3);
	    pop->jacc_sum_flag |= accflags[k+3]<<(k*3);
	}
    }
    pop->pacc_sum_flag = accflags[6];
    for(k=0;k<7;k++) pop->sum_raw_flags[k] |= accflags[k];
    print_forcepipe_test_pattern(pjp->xj, pjp->vj, pjp->mass,
				 pip->xi, pip->vi, pip->eps2, pip->h2,
				 pip->rscale, pip->fscale, pip->jscale, pip->phiscale,
				 pjp->index, pip->index,
				 clear,
				 pop->acc, pop->jerk,pop->phi,
				 unbflag, pop->rnnb, pop->innb, pop->sum_raw_flags);
    
}

int g6_check_overflow_(int * aflag, int * jflag, int * pflag)
{
    int flags = 0;
    int k;
    flags |= (*aflag & FADD_OVERFLOW_MASK);
    flags |= (*jflag & FADD_OVERFLOW_MASK);
    flags |= (*pflag & FADD_OVERFLOW_MASK);
    for(k=1;k<3;k++){
	flags |= (((*aflag)>>(k*3)) & FADD_OVERFLOW_MASK);
	flags |= (((*jflag)>>(k*3)) & FADD_OVERFLOW_MASK);
    }
    return flags;
}

void force_chip_pipeline_step(int jindex,
			      struct gchip *chip,
			      int clear)
{
    int nbflags[NPIPEPERCHIP];
    int i, j, k;
    for(i=0;i<NPIPEPERCHIP; i++){
	nbflags[i] =  0;
    }
    for(i=0;i<nip; i++){
	force_pipeline_step(jindex,chip,i, nbflags+i,clear);
    }
    
    for(k=0;k<3; k++){
	struct neighbour_memory * nmp = &(chip->nbmem[k]);
	int anynb = 0;
	for(i=0, j = k*16;i<16; i++, j++){
	    if(nbflags[j]!= 0){
		anynb = 1;
		i = 16;
	    }
	}
	if(anynb){
	    ULONG flagword = ULONG_ZERO;
	    int inb = nmp->nnb;
	    
	    if (inb < NNBMAX){
		for(i=0, j = k*16+15;i<16; i++, j--){
		    flagword <<= 1;
		    flagword |= (nbflags[j] & ULONG_ONE);
		}
		nmp->nbflags[inb] = flagword;
		nmp->index[inb] = (predmem+jindex)->index;
		inb++;
		nmp->nnb = inb;
	    }else{
		nmp->overflown = ULONG_ONE;
	    }
	}
    }
}

void reset_nbmem(struct neighbour_memory * nbmem)
{
    nbmem->nnb = 0;
    nbmem->overflown = 0;
}
void reset_outregs(struct pipe_output_register_set * outreg)
{
    int k;
    for(k=0;k<3;k++){
	outreg->acc[k] = LONG_ZERO;
	outreg->jerk[k] = LONG_ZERO;
    }
    outreg->phi = LONG_ZERO;
    outreg->rnnb = compose_float(INTERACTION_F_LEN_U, (ULONG) 0x3ff, ULONG_ZERO,
				 ULONG_ZERO, (ULONG) 0xffffff);
    outreg->innb = ULONG_ZERO;
    outreg->facc_sum_flag = ULONG_ZERO;
    outreg->jacc_sum_flag = ULONG_ZERO;
    outreg->pacc_sum_flag = ULONG_ZERO;
    for(k=0;k<7;k++){
	outreg->sum_raw_flags[k] = ULONG_ZERO;
    }
}

void g6_run_chip_()
{
  run_chip(&fortran_callable_gchip);
}

/*
 * run_chip : driver routine for simulator main body
 */
  
void run_chip(struct gchip *chip )
{
    int i,j;
    int clear;
    for(i=0;i<NPIPEPERCHIP; i++){
	reset_outregs(&(chip->outreg[i]));
    }
    for(i=0;i<NNBUNITS;i++){
	reset_nbmem(&(chip->nbmem[i]));
    }
    put_ipport_particle(chip,&g6cu, nip, NVIRTUALPIPE);

    for(i=0;i<nip; i++){
	int ivp = i % NVIRTUALPIPE;
	int ipp = i/NVIRTUALPIPE;
	if (dump_mode > 1) print_iparticle(&(chip->inreg[i]));
	put_iparticle(&(chip->inreg[i]), ivp, ipp);
    }
    if (dump_mode > 1) print_global_rscale();
    put_global_rscale();
    if (dump_mode > 1) print_ti();
    write_ti_through_ipw(&g6cu,iti);
    put_ti();
    predict_in_chip();
    set_run_mode(1);
    printf("(run_chip) njp = %d\n", njp);

    write_calc_through_ipw(&g6cu,
			    CALC_N_ADR, njp*8-1);

    do_calc(&g6cu);
    
    clear = 1;
    for(j=njp-1; j>=0; j--){
	if(dump_mode > 1)print_jparticle(jpmem+j);
	put_jparticle(jpmem+j);
	if(dump_mode > 1) print_predicted_particle( predmem+j);
	force_chip_pipeline_step(j,chip, clear );
	clear = 0;
    }
    set_run_mode(0);
    for(i=0;i<nip; i++){
	int ivp = i % NVIRTUALPIPE;
	int ipp = i/NVIRTUALPIPE;
	if (dump_mode > 1)print_pipe_result(&(chip->outreg[i]));
	put_pipe_result(&(chip->outreg[i]), ivp, ipp);
    }
    set_fo_control(&(g6cu.fou), FO_NI_ADR, nip);

    dump_fo_pipe_port(&(g6cu.fou), chip);
    dump_fo_host_port(&(g6cu.fou), chip);

    if (dump_mode > 1)print_neighbour_memory(chip);
    for(i=0;i<NNBUNITS;i++){
	put_nb_result(&(chip->nbmem[i]),i);
    }
}

void g6_get_force_(int *pipeid,
	  double acc[3],
	  double jerk[3],
	  double *phi,
	  int *aflag,
	  int *jflag,
	  int *pflag)
{
  get_force(* pipeid, &fortran_callable_gchip, acc, jerk,phi,aflag,jflag,pflag);
}

void get_force(int pipeid,
	  struct gchip *chip,
	  double acc[3],
	  double jerk[3],
	  double *phi,
	  int *aflag,
	  int *jflag,
	  int *pflag)
{
  int k;
  struct pipe_output_register_set * outreg;
  struct iparticle * inreg;
  outreg = &(chip->outreg[pipeid]);
  inreg = &(chip->inreg[pipeid]);
  dprintf(2, "(get_force)pid  = %ld\n",pipeid);
  for(k=0;k<3;k++){
    dprintf(2, "(get_force)k, fscale, acc  = %d, %ld, %lx\n",
	    k, inreg->fscale-xunit*2+512, outreg->acc[k]);
    acc[k] = CONVERT_GRAPE_INT_POS_TO_DOUBLE(outreg->acc[k],
					     inreg->fscale-xunit*2+512);
    dprintf(2, "(get_force)k, fscale, jerk  = %d, %ld, %lx\n",
	    k, inreg->jscale-xunit*2-tunit+512, outreg->jerk[k]);
    jerk[k] =  convert_grape_fixed_to_double(outreg->jerk[k],
					     (ULONG) J_ACC_LEN,
					     inreg->jscale-xunit*2-tunit+512);
  }
  *phi =  CONVERT_GRAPE_INT_POS_TO_DOUBLE(outreg->phi,inreg->phiscale-xunit+512);
  *aflag = outreg->facc_sum_flag;
  *jflag = outreg->jacc_sum_flag;
  *pflag = outreg->pacc_sum_flag;
  dprintf(1,"(get_force) flags = %ld %ld %ld\n", *aflag, *jflag, *pflag);
}


void init_g6sim()
{
    static int pout_open = 0;
    initialize_chip_control(&g6cu);
    if(pout_open == 0){
	set_output_file(&ftestout, "G6SIM_PIPE_OUT_FILE");
	pout_open = 1;
    }
    reset_cutoff();set_gaussian_cutoff();
    /*  dump_cutoff_table();*/
    dump_cutoff_table_in_host_port(&g6cu);
    put_cutoff_tables();
}
void g6_init_()
{
  init_g6sim();
}

static struct neighbour_memory nbmem[NNBUNITS];

void read_neighbor_list(struct gchip * chip)
{
    int i;
    for(i=0;i<NNBUNITS;i++)
	nbmem[i] = chip->nbmem[i];
}

void g6readnbl_(int * ndummy)
{
    read_neighbor_list(&fortran_callable_gchip);
}

int get_neighbor_list(int index,
		      int nblist[])
{
    int ilocal = index % 16;
    int iunit = index / 16;
    int nbl = 0;
    int i;
    struct neighbour_memory * nbp;
    ULONG mask;
    /*
      printf("get_neighbor index = %d ilocal, iunit = %d %d\n", index,
	   ilocal, iunit);*/
    nbp = &nbmem[iunit];
    mask = ULONG_ONE <<ilocal;
    /*printf("nnb = %d mask = %x\n", nbp->nnb, mask);*/
    for(i= nbp->nnb - 1; i>=0 ; i --){
	/*	printf("i, flags =  %d  %lx\n", i, nbp->nbflags[i]);*/
	
	if( ((int)(nbp->nbflags[i])) & mask){
	    nblist[nbl] = nbp->index[i]; nbp->index[i];
	    nbl ++;
	    /*	printf("found, i, nbl, index =   %d  %d %d\n", i, nbl, nbp->index[i]);*/
	}
    }
    /*    printf("returning %d\n", nbl);*/
    return nbl;
	
}

int g6_getnbl_(int * index, int * nblist)
{
    return get_neighbor_list(*index, nblist);
}

/* The following part of the code is mainly for generating test patterns */

static PREDICTOR_MEMORY_PORT pmport;
static PREDICTOR_REGISTOR_PORT prport;
static FPIPE_INPUT_PORT fiport;

static FPIPE_OUTPUT_REGISTOR_CONTROLS focontrols;
static FPIPE_OUTPUT_DATA foport;

static NBL_OUTPUT_REGISTOR_CONTROLS nbcontrols;
static NBL_OUTPUT_DATA nbport;

static char cycle_label[5][5] = {"JRED","PRWR","IPWR","FRED","NRED"};
#define JRED 0
#define PRWR 1
#define IPWR 2
#define FRED 3
#define NRED 4

static int cycle_index;

void set_dump_mode(int mode)
{
    dump_mode = mode;
    printf("GRAPE-6 dump mode = %d\n",mode);
    fflush(stdout);
}
void g6_set_dump_mode_(int * mode)
{
    set_dump_mode(*mode);
}



void put_predictor_memory_port(PREDICTOR_MEMORY_PORT_PTR port)
{
    struct jparticle *jptr;
    int i, k;
    jptr = &(port->jp);
    k = port->cycle;
    
    fprintf(ftestout,"%16lx ", jptr->ix[k][0]);
    fprintf(ftestout,"%8lx ", jptr->ix[k][1] & 0xffffffffL);
    fprintf(ftestout,"%6lx ", jptr->ix[k][2] & 0xffffffffL);
    fprintf(ftestout,"%5lx ", jptr->ix[k][3] & 0xffffffffL);
    fprintf(ftestout,"%3lx ", jptr->ix[k][4] & 0xffffffffL);


    fprintf(ftestout,"%1lx %2lx %9lx %8lx %1lx %1lx", jptr->tjlsb, jptr->dtjmsb,
	   jptr->mass, jptr->index,
	   1&(port->valid_data), 1&(port->run));
    
}

void put_predictor_register_port(PREDICTOR_REGISTOR_PORT_PTR port)
{
    int i;
    fprintf(ftestout," %8x %1x %1x ", port->din,  port->wetl,  port->weth);
    for(i=0;i<9;i++) {
	fprintf(ftestout,"%1x", port->pass[i]);
    }
}
			        
void put_fpipe_register_port(FPIPE_INPUT_PORT_PTR port)
{
    int i;
    fprintf(ftestout," %9lx %3x %3x ", port->din&0xfffffffffL,  port->adri,  port->adrt);
    
    for(i=0;i<NPHYSPIPE;i++) {
	fprintf(ftestout,"%1x", port->wei[i]);
    }
    fprintf(ftestout," %1x ", port->wet);
    for(i=0;i<15;i++) {
	fprintf(ftestout,"%1x", port->test[i]);
    }
}

void dump_ports()
{
    if(dump_mode){
	fprintf(ftestout,cycle_label[cycle_index]); fprintf(ftestout," ");
	put_predictor_memory_port(&pmport);
	put_predictor_register_port(&prport);
	put_fpipe_register_port(&fiport);
	put_fo_controls(&focontrols);
	put_fodata(&foport);

	put_nb_controls(&nbcontrols);
	put_nbdata(&nbport);
	fprintf(ftestout,"\n");
    }
}

void put_jparticle(struct jparticle * jp)
{
    int i;
    pmport.jp = *jp;
    pmport.valid_data = 1;
    cycle_index = JRED;
    for(i=0;i<3;i++){
	pmport.cycle = i;
	dump_ports();
	pmport.valid_data = 0;
    }
    for(i=3;i<8;i++)    dump_ports();
}

void set_run_mode(int mode)
{
    pmport.run = mode;
}


void print_jparticle(struct jparticle * jp)
{
    int i, k;
    fprintf(ftestout,"Jparticle index %8lx mass %8lx tj %3lx dtjmsb %2lx\n",
	   jp->index, jp->mass, jp->tjlsb, jp->dtjmsb);
    for(i=0;i<5;i++){
	fprintf(ftestout,"ix[%1d] ", i);
	for(k=0;k<3;k++)fprintf(ftestout," %16lx",jp->ix[k][i]);
	fprintf(ftestout,"\n");
    }
}


void print_predicted_particle(struct predicted_particle * pp)
{
    int i, k;
    fprintf(ftestout, "Predicted particle index %8lx mass %8lx\n X ", 	   pp->index, pp->mass);
    for(k=0;k<3;k++)fprintf(ftestout," %16lx",pp->xj[k]);
    fprintf(ftestout,"\n V ");
    for(k=0;k<3;k++)fprintf(ftestout," %16lx",pp->vj[k]);
    fprintf(ftestout,"\n");
}


#define Ip_address(local,ivp) (((ivp)<<4)|(local))

void put_ip_data(ULONG data,
		 int addr,
		 int ipp)
{
    fiport.din = data & 0xfffffffffL;
    fiport.adri = addr;
    fiport.wei[ipp] = 1;
    dump_ports();
    fiport.wei[ipp] = 0;
}

void put_predictor_registor_data(ULONG data, int wetl, int weth)
{
    cycle_index = PRWR;
    prport.din = data & 0xfffffffffL;
    prport.wetl = wetl;
    prport.weth = weth;
    dump_ports();
    prport.wetl =  prport.weth = 0;
}

void put_ti()
{
 
    put_predictor_registor_data(iti, 1, 0);
     put_predictor_registor_data(iti>>32, 0, 1);
}
void print_ti()
{
    fprintf(ftestout, "Ti = %lx\n", iti);
}

void put_iparticle(struct iparticle * ip,
		   int ivp, /* virtual pipe index */
		   int ipp /* physical pipe index */)
{
    int i,j,k;
    ULONG data;
    cycle_index = IPWR;

    for(k=0;k<3;k++){
	data = ((ULONG)ip->xi[k])>>32;
	put_ip_data( data, Ip_address(k*2,ivp), ipp);
	data = (ip->xi[k]) &0xffffffffL;
	put_ip_data( data, Ip_address(k*2+1,ivp), ipp);
    }
    for(k=0;k<3;k++){
	put_ip_data(ip->vi[k], Ip_address(k+6,ivp), ipp);
    }
    put_ip_data(ip->eps2, Ip_address(9,ivp), ipp);
    put_ip_data(ip->h2, Ip_address(10,ivp), ipp);
    put_ip_data(ip->index, Ip_address(11,ivp), ipp);

    data = ((((ULONG)ip->phiscale) & 0x3ffL)<<20)|
	((((ULONG)ip->fscale)&0x3ffL)<<10)|
	(((ULONG)ip->jscale)&0x3ffL);
    

    put_ip_data(data, Ip_address(12,ivp), ipp);
}
void print_iparticle(struct iparticle * ip)
{
    int i,j,k;
    fprintf(ftestout,"Iparticle index = %lx\n XI:", ip->index);
    for(k=0;k<3;k++)fprintf(ftestout," %16lx", ip->xi[k]);
    fprintf(ftestout,"\n VI:");
    for(k=0;k<3;k++)fprintf(ftestout," %16lx", ip->vi[k]);
    fprintf(ftestout,"\n");
    fprintf(ftestout,"eps, h2, scales(phi,f,j) = %9lx %9lx  %lx %lx %lx\n", ip->eps2, ip->h2,  ip->phiscale &0x3ffL,
	   ip->fscale&0x3ffL, ip->jscale&0x3ffL);
}

void put_fo_controls(FPIPE_OUTPUT_REGISTOR_CONTROLS_PTR focontrols)
{
    fprintf(ftestout," %3lx", focontrols->adr);
}

void put_fodata(FPIPE_OUTPUT_DATA_PTR fodata)
{
    int i;
    for (i=0;i<NPHYSPIPE; i++){
	if(fodata->care_this[i]){
	    fprintf(ftestout," %8x", fodata->dout[i]);
	}else{
	    fprintf(ftestout," XXXXXXXX");
	}
    }
}

void put_foport_data(ULONG data, int adr, int ipp)
{
    focontrols.adr = adr;
    foport.dout[ipp] = (unsigned int )(data & 0xffffffffL);
    dump_ports();
}
	    
#define Fo_address(local,ivp) (((ivp)<<4)|(local))
	    
void put_pipe_result(PIPE_OUTOUT_REGISTER_SET * po,
		   int ivp, /* virtual pipe index */
		   int ipp /* physical pipe index */)
     
{
    /* register map:
       0, 1: FXH, FXL
       2,3   FY
       4,5   FZ
       6,7 POTL POTH
       8-10 JX,JY,JZ
       11   RNB
       12   INB
       13   status
       */
    int i,j,k;
    ULONG data;
    cycle_index = FRED;
    foport.care_this[ipp] = 1;
    
    for(k=0;k<3;k++){
	data = ((ULONG)po->acc[k])>>32;
	put_foport_data(data, Fo_address(k*2,ivp), ipp);
	data = ((ULONG)po->acc[k]);
	put_foport_data( data, Fo_address(k*2+1,ivp), ipp);
    }
    data = ((ULONG)po->phi)>>32;
    put_foport_data(data, Fo_address(6,ivp), ipp);
    data = ((ULONG)po->phi);
    put_foport_data(data, Fo_address(7,ivp), ipp);
    for(k=0;k<3;k++){
	put_foport_data((ULONG)(po->jerk[k]), Fo_address(k+8,ivp), ipp);
    }
    put_foport_data((po->rnnb)>>4, Fo_address(11,ivp), ipp);
    put_foport_data(po->innb, Fo_address(12,ivp), ipp);

    data = ((po->pacc_sum_flag)<<18)|
	((po->jacc_sum_flag)<<9)|
	((po->facc_sum_flag));
    put_foport_data(data, Fo_address(13,ivp), ipp);
    foport.care_this[ipp] = 0;
}

void print_pipe_result(PIPE_OUTOUT_REGISTER_SET * po)
{
    /* register map:
       0, 1: FXH, FXL
       2,3   FY
       4,5   FZ
       6,7 POTL POTH
       8-10 JX,JY,JZ
       11   RNB
       12   INB
       13   status
       */
    int i,j,k;
    fprintf(ftestout,"Pipeline calculated result\n");
    fprintf(ftestout,"ACC: ");
    for(k=0;k<3;k++){
	fprintf(ftestout," %16lx",po->acc[k]);
    }
    fprintf(ftestout,"\nJERK:");
    for(k=0;k<3;k++){
	fprintf(ftestout," %8lx",po->jerk[k]);
    }
    fprintf(ftestout,"\nphi, rnnb, innb, flags = %lx %lx %lx %lx %lx %lx\n",
	   po->phi, po->rnnb, po->innb,
	   po->pacc_sum_flag,po->jacc_sum_flag, po->facc_sum_flag);
}




	    
void put_cutoff_data(ULONG data,int addr)
{
    fiport.din = data & 0xfffffffffL;
    fiport.adrt = addr;
    fiport.wet = 1;
    dump_ports();
    fiport.wet = 0;
}

unsigned int cutoff_data(int addr)
{
    int i, j;
    i = addr>>6;
    j = addr & 63;
    if ((i > 1) || (i <0)){
	fprintf(stderr, "(cutoff_data) impossible address %x\n", addr);
	exit(-1);
    }
    return  ((cutoff_table_data(0,i,j)&0x1fffL)<<13)|
		((cutoff_table_data(1,i,j)&0xfffL)<<1)|
		(cutoff_table_data(2,i,j)&0x1L);
}


void put_cutoff_tables()
{
    int i, j, k;
    cycle_index = IPWR;
    if (dump_mode >1) print_cutoff_table(ftestout);
    for(i=0;i<CUTOFF_NUMBER_OF_TABLES; i++){
	/*fprintf(ftestout,"Dumping cutoff table #%d\n", i);*/
	for(j=0;j< CUTOFF_TABLE_SIZE; j++){
	    int adr;
	    ULONG data;
	    adr = (i<<6)|j;
	    data = ((cutoff_table_data(0,i,j)&0x1fffL)<<13)|
		((cutoff_table_data(1,i,j)&0xfffL)<<1)|
		(cutoff_table_data(2,i,j)&0x1L);
	    if (data != cutoff_data(adr)){
		fprintf(stderr, "(cutoff_data) internal error %x %x\n", data);
		exit(-1);
	    }
	    put_cutoff_data(data, adr);
	    /*	    fprintf(ftestout,"table %d %2x %4lx %4lx %4lx\n",
		    i, j, cutoff_table_data(0,i,j),
		    cutoff_table_data(1,i,j),
		    cutoff_table_data(2,i,j));*/

	}
    }
}

void put_global_rscale()
{
    cycle_index = IPWR;
    put_cutoff_data(global_rscale, 0x80);
}

void print_global_rscale()
{
    fprintf(ftestout,"Rscale = %lx\n",global_rscale);
}

void print_neighbour_memory(struct gchip *chip )
{
    int i;
    struct neighbour_memory *nbp;
    for(i=0;i<NNBUNITS;i++){
	int k;
	nbp = &(chip->nbmem[i]);
	fprintf(ftestout,"Neighbor unit %d NNB= %d OVERFLOWN= %d\n", i,
	       (int)nbp->nnb,(int)nbp->overflown);
	if (nbp->nnb > 0){
	    fprintf(ftestout," addr  index   flag\n");
	    for(k=0;k<nbp->nnb;k++){
		fprintf(ftestout," %4ld   %4ld   %4lx\n", k, nbp->index[k],nbp->nbflags[k]);
	    }
	}
    }
}


void put_nb_controls(NBL_OUTPUT_REGISTOR_CONTROLS_PTR nbcontrols)
{
    fprintf(ftestout," %3lx", nbcontrols->adr);
}

void put_nbdata(NBL_OUTPUT_DATA_PTR nbdata)
{
    int i;
    for (i=0;i<NNBUNITS; i++){
	if(nbdata->care_this[i]){
	    fprintf(ftestout," %8x", nbdata->dout[i]);
	}else{
	    fprintf(ftestout," XXXXXXXX");
	}
    }
}



void put_nbport_data(ULONG data, int adr, int iunit)
{
    nbcontrols.adr = adr;
    nbport.dout[iunit] = (unsigned int )(data & 0xffffffffL);
    nbport.care_this[iunit] = 1;
    dump_ports();
    nbport.care_this[iunit] = 0;
}
	    




	    
void put_nb_result(struct neighbour_memory  * pn,
		   int iunit)
{
    /* register map:
       b1000000000 : nwords
       b0xxxxxxxx0 : flag + ovfl flag
       b0xxxxxxxx1 : jindex
       */
    int i,j,k, nnb;
    ULONG data;
    cycle_index = NRED;
    data = pn->nnb;
    nnb = data;
    put_nbport_data(data,  0x200, iunit);
    for(i=0;i<nnb;i++){
	j = i*2;
	data = ((pn->nbflags[i])&0xffffL) | ((pn->overflown & ULONG_ONE)<<16);
	put_nbport_data(data, j, iunit);
	j++;
	data = pn->index[i];
	put_nbport_data(data, j, iunit);
    }
}


