/*
 * test_fo_unit.c
 *
 * create systematic and/or random test pattern for FO reduction unit
 *
 * Copyright Jun Makino 1999
 *
 * Version 1.0 1998/09/18

 *
 * location of overflow flag: middle!
 * flag order: x y z jx jy jz phi from lsb
 */

#include "grape6sim.h"
#include "g6chip.h"
#include "ecc64.h"
#include "g6control.h"


#define NPORTS 4


int rst = 1;
struct fo_host_port  fpp[NPORTS];
struct ipw_host_port  ipp;
int dwait;

unsigned int rand32()
{
    unsigned int ran1, ran2;
    ran1 = random();
    ran2 = random();
    ran2 = ((ran2 &1) <<31) | ran1;
    return  ran2;
}

ULONG rand64()
{
    unsigned long int ran1, ran2, ran3;
    ran1 = random();
    ran2 = random();
    ran3 = random();
    ran3 = ((ran3 &3L)<<62) | (ran2 << 31) | ran1;
    /*    printf("rand 64 returns %lx\n", ran3);*/
    return  ran3;
}



void printhex(ULONG data,
	      int length)
{
    int i;
    char format[255];
    char buf[255];
    sprintf(format,"%%%dlx",length);
    sprintf(buf,format,data & ((1L<<(length*4))-1));
    for(i=0;i<255;i++){
	if (buf[i] == ' ') buf[i] = '0';
    }
    printf("%s",buf);
}
    

void print_fo_unit_input()
{
    int i, vd, nd, active;
    printf("FOIN %1d ",rst);
    for(i=0;i<NPORTS;i++){
	printf(" ");
	printhex(fpp[i].data,8);
    }

    vd = nd = active = 0xffffffff;
    for(i=0;i<NPORTS;i++){
	if (fpp[i].vd == 0)  vd ^= (1 << i);
	if (fpp[i].nd == 0)  nd ^= (1 << i);
	if (fpp[i].active == 0)  active ^= (1 << i);
    }
    printf(" %1x %1x %1x", vd &0xf, nd&0xf, active&0xf);
    printf(" %1d", dwait & 1);
    printf(" %1d ", (ipp.we & 1));
    printhex(ipp.data &0xffffL,4);
    printf("\n");
    
}

reset_all_ports()
{
    int i;
    rst = 1;
    for(i=0;i<NPORTS;i++){
	fpp[i].data = 0x0ffffffffL;
	fpp[i].vd =1;
	fpp[i].nd =1;
	fpp[i].active =0;
    }
    dwait = 0;
    ipp.we = 1;
    ipp.data = 0xffffL;
}    

void print_fo_unit_reset()
{
    reset_all_ports();
    rst = 0;

    print_fo_unit_input();
    rst = 1;
}

void print_ip_single_word(int address,
			  int data)
{

    reset_all_ports();
    ipp.we = 0;
    ipp.data = address;
    print_fo_unit_input();
    ipp.we = 0;
    ipp.data = 1;
    print_fo_unit_input();
    ipp.we = 0;
    ipp.data = data;
    print_fo_unit_input();
    ipp.we = 1;
    print_fo_unit_input();
    
}

void dummy_cycle(int nwait)
{
    int i;
    reset_all_ports();
    for(i=0;i<nwait;i++){
	print_fo_unit_input();
    }
}

typedef struct fo_data_pack{
    ULONG  acc[3];
    ULONG  jerk[3];
    ULONG  phi;
    ULONG rnnb;
    ULONG innb;
    ULONG flag;
}FO_DATA_PACK;

void     insert_wait(int with_delay)
{
    int i;

    if (with_delay == 0){
	return;
    }
    for(i=0;i<NPORTS;i++){
	fpp[i].nd = 1;
    }
    while ( (rand32() & 3) == 3){
	for(i=0;i<NPORTS;i++) fpp[i].data = rand32();
	print_fo_unit_input();
    }
    for(i=0;i<NPORTS;i++){
	fpp[i].nd = 0;
    }
    
}
    
    

send_fo_data_with_random_delay(struct fo_data_pack fo_pack[NPORTS],
			       int with_delay)
{
    int i,k;
    /* note that here we print the data in LSB-fast order!! */
    reset_all_ports();
    for(i=0;i<NPORTS;i++){
	fpp[i].vd = 0;
	fpp[i].nd = 0;
    }
    for(k=0;k<3;k++){
	for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].acc[k]&0xffffffffL;
	print_fo_unit_input();
	insert_wait(with_delay);
	for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].acc[k]>>32;
	print_fo_unit_input();
	insert_wait(with_delay);
    }
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].phi&0xffffffffL;
    print_fo_unit_input();
    insert_wait(with_delay);
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].phi>>32;
    print_fo_unit_input();
    insert_wait(with_delay);
    for(k=0;k<3;k++){
	for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].jerk[k]&0xffffffffL;
	print_fo_unit_input();
	insert_wait(with_delay);
    }
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].rnnb;
    print_fo_unit_input();
    insert_wait(with_delay);
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].innb;
    print_fo_unit_input();
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].flag;
    print_fo_unit_input();
}


send_fo_data(struct fo_data_pack fo_pack[NPORTS])
{
    int i,k;
    /* note that here we print the data in LSB-fast order!! */
    reset_all_ports();
    for(i=0;i<NPORTS;i++){
	fpp[i].vd = 0;
	fpp[i].nd = 0;
    }
    for(k=0;k<3;k++){
	for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].acc[k]&0xffffffffL;
	print_fo_unit_input();
	for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].acc[k]>>32;
	print_fo_unit_input();
    }
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].phi&0xffffffffL;
    print_fo_unit_input();
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].phi>>32;
    print_fo_unit_input();
    for(k=0;k<3;k++){
	for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].jerk[k]&0xffffffffL;
	print_fo_unit_input();
    }
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].rnnb;
    print_fo_unit_input();
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].innb;
    print_fo_unit_input();
    for(i=0;i<NPORTS;i++) fpp[i].data = fo_pack[i].flag;
    print_fo_unit_input();
}




put_fo_onedata(ULONG data)
{
    ULONG p, d;
    data &= 0xffffffffl;
    p = generate_byte_parity((unsigned int) data);
    d = data | p<<32;
    printf("FOOUT ");
    printhex(d,9);
    printf("\n");
}
    
put_fo(struct fo_data_pack *  fo_pack)
{
    int i,k;
     for(k=0;k<3;k++){
	put_fo_onedata(fo_pack->acc[k]);
	put_fo_onedata(fo_pack->acc[k]>>32);
    }
    put_fo_onedata(fo_pack->phi);
    put_fo_onedata(fo_pack->phi>>32);

    for(k=0;k<3;k++){
	put_fo_onedata(fo_pack->jerk[k]);
    }
    put_fo_onedata(fo_pack->rnnb);
    put_fo_onedata(fo_pack->innb);
    put_fo_onedata(fo_pack->flag);
}

void accumulate64_with_extended(ULONG * accum,
				ULONG * accum_high,
				ULONG in,
				int clear)
{
    ULONG in_high, carry, acc_check;
    if (clear){
	*accum = *accum_high = 0;
    }
    /*    printf("accum, in, clear = %lx %lx %d\n", *accum, in, clear);*/
    if (in & 0x8000000000000000L){
	/* in is negative */
	in_high = 0xffffffffffffffffL;
    }else{
	in_high = 0;
    }
    /* mask MSB of accum and in, which have been saved in LSB of _high */
    acc_check = *accum + in;
    *accum &= 0x7fffffffffffffffL;
    in &= 0x7fffffffffffffffL;
    *accum += in;
    carry = *accum >>63;
    *accum_high += in_high+carry;
    *accum = ((*accum) & 0x7fffffffffffffffL) | (((*accum_high)&1L)<<63);
    if (acc_check != *accum){
	printf( "accum error... \n");
	printf("accum, chk, xor = %lx %lx %lx\n", *accum, acc_check, *accum ^ acc_check);
    }
}

int overflown32(ULONG data)
{
    int iret ;
    data >>= 31;
    if ((data != 0L) && (data != 0x1ffffffffL)){
	iret= 1;
    }else{
	iret= 0;
    }
    return iret;
}


void accumulate32_with_extended(ULONG * accum,
				ULONG in,
				int clear)
{
    ULONG in_extended;
    if (clear){
	*accum = 0;
    }
    /*    printf("(32) acc in = %lx %lx\n", *accum, in);*/
    if (in & 0xffffffff00000000L){
	printf("(32) invalid input: %lx\n", in);
	exit(1);
    }
	
    if (in & 0x80000000L){
	in_extended = 0xffffffff00000000L;
    }else{
	in_extended = 0L;
    }
    in_extended |= in;
    *accum += in_extended;
    /*    printf("(32) accout = %lx, ovfl = %d\n", *accum, overflown32(*accum));*/
}

simulate_fo_operation(struct fo_data_pack fo_pack[NPORTS],
		      struct fo_data_pack* fop)
{
    int i,k;
    int acc_flag[3], phi_flag,jerk_flag[3];
    ULONG accm_high, long_jerk;
    ULONG local_flag = 0;

    for(k=0;k<3;k++){
	for (i=0;i<NPORTS;i++){
	    accumulate64_with_extended(&(fop->acc[k]),&accm_high,fo_pack[i].acc[k],(i==0));
	}
	if ((accm_high != 0L) && (accm_high != 0xffffffffffffffffL)){
	    /* implies overflow */
	    acc_flag[k] = 1;
	}else{
	    acc_flag[k] = 0;
	}
    }
    for (i=0;i<NPORTS;i++){
	accumulate64_with_extended(&(fop->phi),&accm_high,fo_pack[i].phi,(i==0));
    }
    if ((accm_high != 0L) && (accm_high != 0xffffffffffffffffL)){
	/* implies overflow */
	phi_flag = 1;
    }else{
	phi_flag = 0;
    }
    for(k=0;k<3;k++){
	for (i=0;i<NPORTS;i++){
	    accumulate32_with_extended(&(fop->jerk[k]),fo_pack[i].jerk[k],i==0);
	}
	jerk_flag[k] = overflown32(fop->jerk[k]);
    }
    printf ("overflown: %d %d %d %d %d %d %d\n",
	    acc_flag[0],acc_flag[1],acc_flag[2],phi_flag,
	    jerk_flag[0],jerk_flag[1],jerk_flag[2]);

    local_flag = (ULONG)((acc_flag[0]<<1) | (acc_flag[1]<<4) | (acc_flag[2]<<7) |
			 (jerk_flag[0]<<10) | (jerk_flag[1]<<13) | (jerk_flag[2]<<16) |
			 (phi_flag<<19) );
    fop->flag =fo_pack[0].flag | local_flag;
    fop->rnnb = fo_pack[0].rnnb;
    fop->innb = fo_pack[0].innb;
    
    printf ("RNBS: ");
    for (i=0;i<NPORTS;i++){
	printf(" %18.8g", convert_grape_float_to_double(fo_pack[i].rnnb, 20L));
    }
    printf ("\n");
    for (i=1;i<NPORTS;i++){
	fop->flag |=fo_pack[i].flag;
	if (compare_grape_floats(fo_pack[i].rnnb,fop->rnnb,20L)==0) {
	    fop->rnnb = fo_pack[i].rnnb;
	    fop->innb = fo_pack[i].innb;
	}
    }
}


/* fo_test_1
 * do some simple tests....
 */
fo_test_1()
{
   struct fo_data_pack fo_pack[NPORTS];
   struct fo_data_pack fo_result;
    int i,k;
    for(i=0;i<NPORTS;i++){
	for(k=0;k<3;k++){
	    fo_pack[i].acc[k] =  k+1;
	    fo_pack[i].jerk[k] = k+1;
	}
	fo_pack[i].phi = 1L<<(i*16);
	fo_pack[i].rnnb = i;
	fo_pack[i].innb = i<<4;
	fo_pack[i].flag = (1<<(i*4))-1;
    }
    fo_pack[1].rnnb = 0;
    send_fo_data(fo_pack);
    simulate_fo_operation(fo_pack,&fo_result);
    put_fo(&fo_result);
}

fo_test_2()
{
   struct fo_data_pack fo_pack[NPORTS];
   struct fo_data_pack fo_result;
    int i,k;
    for(i=0;i<NPORTS;i++){
	for(k=0;k<3;k++){
	    fo_pack[i].acc[k] = (1L<(i*10)) | k;
	    fo_pack[i].jerk[k] = (1<<(i*7)) | k;
	}
	fo_pack[i].phi = 1L<<(i*15);
	fo_pack[i].rnnb = i*2;
	fo_pack[i].innb = i;
	fo_pack[i].flag = 1<<(i*3);
    }
    send_fo_data(fo_pack);
    simulate_fo_operation(fo_pack,&fo_result);
    put_fo(&fo_result);
}



     

fo_test_3(int with_random_delay)
{
   struct fo_data_pack fo_pack[NPORTS];
   struct fo_data_pack fo_result;
    int i,k;
    for(i=0;i<NPORTS;i++){
	for(k=0;k<3;k++){
	    fo_pack[i].acc[k] = rand64();
	    fo_pack[i].jerk[k] = rand32();
	}
	fo_pack[i].phi = rand64();
	fo_pack[i].rnnb = rand32();
	fo_pack[i].innb = rand32();
	/*	fo_pack[i].flag = rand32();*/
	fo_pack[i].flag = 0;
    }
    send_fo_data_with_random_delay(fo_pack,with_random_delay);
    simulate_fo_operation(fo_pack,&fo_result);
    put_fo(&fo_result);
}

random_test_for_multiple_data(int    n)
{
    int i;
    print_ip_single_word(0x1012,n); /* set n  */
    print_ip_single_word(0x1402,1); /* set calc mode */
    dummy_cycle(4);
    for(i=0;i<n;i++)fo_test_3(1);
    dummy_cycle(10);
}

main(int argc,
     char ** argv)
{
    int seed = 12345;
    int nvector;
    int i;
    
    fprintf(stderr,"argc = %d\n", argc);
    if(argc < 2){
	fprintf(stderr,"usage : test_fo_unit n_p [seed]\n");
	exit(1);
    }
    sscanf(*(argv+1),"%d", &nvector);
    fprintf(stderr,"N_vector = %d\n", nvector);
    
    if(argc>=3){
	sscanf(*(argv+2),"%d", &seed);
	fprintf(stderr,"New seed = %d\n", seed);
    }
    srandom(seed);
    print_fo_unit_reset();
    
    random_test_for_multiple_data(nvector);
    random_test_for_multiple_data(nvector);

    print_ip_single_word(0x1012,1); /* set n to 1 */
    print_ip_single_word(0x1402,1); /* set calc mode */
    dummy_cycle(4);
    fo_test_1();
    dummy_cycle(10);
    print_ip_single_word(0x1402,1); /* set calc mode */
    dummy_cycle(4);
    fo_test_2();
    dummy_cycle(10);
    print_ip_single_word(0x1402,1); /* set calc mode */
    dummy_cycle(4);
    fo_test_3(0);
    dummy_cycle(10);
    print_ip_single_word(0x1402,1); /* set calc mode */
    dummy_cycle(4);
    fo_test_3(0);
    dummy_cycle(10);
}

