#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include "/opt/mpich/include/mpi.h"
#include "ask2.h"


int cpp; /*Columns per process*/
int numt, nproc;
int tids[4];
int left,right,i,j,k;
float tbl[SIZE][SIZE];
float   A[SIZE][SIZE],L[SIZE],LL[SIZE],LR[SIZE],LS[SIZE];
MPI_Status status;

/*
int	B ;
unsigned long    LOOPS;
float	C;
float	a1, a2, a3, a4;
*/

void  cleanup();
void expandHeat(int,int ) ;
float getM(int ,int ,int ,int ,int ) ;

void getinput() {

	if (B<0)
		do {
			printf("Grid size (B) : ");
			fflush(stdout);
			scanf("%u", &B);
			if (B<2)
				printf("\nB >= 2\n");
		} while (B<2);

	if(C<0)
		do {
			printf("Dt/Dx^2 (C) : ");
			fflush(stdout);
			scanf("%f", &C);
		} while ((C<=0.0) || (C>=0.5));

	if (LOOPS<0)
		do {
			printf("Loops (LOOPS) : ");
			fflush(stdout);
			scanf("%ld", &LOOPS);
		} while (LOOPS<0);

	if (a1<0){
		printf("A(t,B,y) = ");
		fflush(stdout);
		scanf("%f", &a1);
	}

	if (a2<0){
		printf("A(t,0,y) = ");
		fflush(stdout);
		scanf("%f", &a2);
	}

	if (a3<0){
		printf("A(t,x,0) = ");
		fflush(stdout);
		scanf("%f", &a3);
	}

	if (a4<0){
		printf("A(t,x,B) = ");
		fflush(stdout);
		scanf("%f", &a4);
	}
}

void initmatrix() {
	int     i,j, B2 = B*B;
	/*
	        printf("Allocating memory...\n");
	        fflush (stdout);
	        A = (float *) malloc(sizeof(float)*B*B);
	        L = (float *) malloc(sizeof(float)*B);
	        LL = (float *) calloc(B,sizeof(float));
	        LR = (float *) calloc(B,sizeof(float));
	        LS = (float *) calloc(B,sizeof(float));
	 
	 
	 
	        if (A==NULL||L==NULL||LL==NULL||LR==NULL||LS==NULL) {
	                printf("slave:Not engough free memory.\n");
	                cleanup();
	                exit(4);
	        }
	        for (i=0; i<B2; i++)  *(A + i)= 0;
	*/

	for (i=0; i<B; i++)
		for (j=0; j<B; j++)  A[i][j] = 0;


	for (i=0; i<B; i++) {
		A[B-1][i] = a1; /*Down*/
		A[0][i] = a2;   /*Up*/
	}

	for (i=0; i<B; i++) {
		A[i][0] = a3;   /*Left*/
		A[i][B-1] = a4; /*Right*/
	}


	/*Set corner values*/
	A[0][0]=(A[0][1]+A[1][0])/2.0;
	A[B-1][B-1]=(A[B-2][B-1]+A[B-1][B-2])/2.0;
	A[0][B-1]=(A[0][B-2]+A[1][B-1])/2.0;
	A[B-1][0]=(A[B-2][0]+A[B-1][1])/2.0;
}

void cleanup() {
	if (A) free(A);
	fprintf(stderr,"\nmaster:Freed allocated memory...\n");
}

void printMatrix() {
	int	x,y;
	float	*curr;
	int	fsz;
	FILE *fp;

	if ((fp=fopen("/tmp/out.dat","w"))==0) {
		perror("ERROR:out.dat");
		exit(44);
	}
	for (x=0; x<B; x++) {
		printf("\n%02d \n",x);
		fflush(stdout);
		for (y=0; y<B; y++) {
			fprintf(fp,"%2.2f\n", A[x][y]);
			/*printf("%2.2f\n", A[x][y]);*/
			fflush(stdout);
		}
		fprintf(fp,"\n");
		fflush(stdout);
	}

}


main(int argc, char **argv) {
	/*
	  cpp : columns per process
	*/
	char mesg[20];
	int rank, init=99, size;

	/*getinput();*/
	initmatrix();

	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	nproc = size - 1;


	if ((B-2)%nproc) {
		fprintf(stderr,"B-2=%d not dividable by %d\n",B-2,nproc);
		exit(6);
	}

	cpp=(B-2)/nproc;
	fprintf(stderr, "nproc is %d\n", nproc);
	fflush(stdout);


	if (rank == 0) {
		/*Send general info + table A + size + loops*/
		for (i=1; i<= nproc; i++ ) {
			/*MPI_Bcast(&B, 1, MPI_INT,0, MPI_COMM_WORLD);
			  MPI_Bcast(&C, 1, MPI_FLOAT,0, MPI_COMM_WORLD);
			  MPI_Bcast(A, B*B, MPI_FLOAT,0, MPI_COMM_WORLD);
			  MPI_Bcast(&LOOPS, 1, MPI_LONG, 0, MPI_COMM_WORLD);*/
			MPI_Ssend(&B, 1, MPI_INT,i,init, MPI_COMM_WORLD);
			MPI_Ssend(&C, 1, MPI_FLOAT,i, init, MPI_COMM_WORLD);
			MPI_Ssend(A, B*B, MPI_FLOAT,i,init, MPI_COMM_WORLD);
			MPI_Ssend(&LOOPS, 1, MPI_LONG, i, init, MPI_COMM_WORLD);
		}



		/*Send borders to each child*/
		for (k=1,i=1;i<=nproc;i++) {
			left=k;
			right=k+cpp-1;
			MPI_Ssend(&left, 1, MPI_INT, i, init, MPI_COMM_WORLD);
			MPI_Ssend(&right, 1, MPI_INT, i, init, MPI_COMM_WORLD);
			k=k+cpp;
		}


		/*Receive results from each child*/
		for (k=1,i=1;i<=nproc;i++) {
			fprintf(stderr, "Before received %d\n", i);
			fflush(stdout);
			MPI_Recv(tbl, B*B, MPI_FLOAT, i, 10, MPI_COMM_WORLD, &status);
			fprintf(stderr, "I received %d\n", i);
			fflush(stdout);
			updateMCols(k,k+cpp-1);
			k=k+cpp;
		}

		fflush(stdout);
		printMatrix();
		cleanup();
	}
	else {
		MPI_Recv(&B, 1, MPI_INT, 0, init, MPI_COMM_WORLD, &status);
		MPI_Recv(&C, 1, MPI_FLOAT, 0, init, MPI_COMM_WORLD, &status);
		MPI_Recv(&A[0][0], B*B, MPI_FLOAT, 0, init, MPI_COMM_WORLD, &status);
		MPI_Recv(&LOOPS, 1, MPI_LONG, 0, init, MPI_COMM_WORLD, &status);


		fprintf(stderr,"0:nproc:%d,B:%d,C:%f,LOOPS:%ld\n",nproc,B,C,LOOPS);

		MPI_Recv(&left, 1, MPI_INT, 0, init, MPI_COMM_WORLD, &status);
		MPI_Recv(&right, 1, MPI_INT, 0, init, MPI_COMM_WORLD, &status);

		fprintf(stderr,"1:left:%d,right:%d\n",left,right);

		if (left==1)
			for (j=0;j<B;j++) LL[j]=A[j][0];

		if (right==B-2)
			for (j=0;j<B;j++) LR[j]=A[j][B-1];


		for (i=0; i<LOOPS; i++) {
			expandHeat(left,right);
			if (left!=1)    sendL(rank-1);
			if (right!=B-2) getVR(rank+1);
			if (right!=B-2) sendR(rank+1);
			if (left!=1)    getVL(rank-1);
		}

		MPI_Ssend(A, B*B, MPI_FLOAT, 0, 10, MPI_COMM_WORLD);
	}
	MPI_Finalize();
}

void expandHeat(int leftcol,int rightcol) {
	int j,k;
	float prevMjk=-5.00;


	for(j=0;j<B-1;j++) {
		/*N(k) saves last value to be available for next heatSpot*/
		for (k=leftcol;k<=rightcol;k++)
		{

			prevMjk=A[j][k]; /*Nk contains A[j-1],[k] (Up) needed below */
			/*So don't overwrite it now*/
			/* printf("Prev%d,%d=%f\n",j,k,prevMjk);*/
			if (j) heatSpot(j,k,leftcol,rightcol);
			L[k]=prevMjk; /*Update Nk,now contains M(j,k)*/
		}
	}
}

heatSpot(int j,int k,int leftcol,int rightcol) {

	/*
	  printf("Before:%d,%d\nThis:%f,\nDown:%f,\nUp  :%f,\nLeft:%f,\nRight%f\n",j,k,
	                M(j,k),
	                M(j+1,k),
	                getM(j-1,k,1,leftcol,rightcol),
	                getM(j,k-1,2,leftcol,rightcol),
	                getM(j,k+1,3,leftcol,rightcol));
	 
	fflush(stdout);
	*/

	A[j][k]=(A[j][k]+(float)(C)*(
	    A[j+1][k]+                       /*Down*/
	getM(j-1,k,1,leftcol,rightcol)+ /*Up*/
	getM(j,k-1,2,leftcol,rightcol)+ /*Left*/
	getM(j,k+1,3,leftcol,rightcol)- /*Right*/
	(float)4.0*A[j][k]));
	/*
	  printf("After:%d,%d\nThis:%f,\nDown:%f,\nUp  :%f,\nLeft:%f,\nRight%f\n\n",j,k,
	                A[j][k],
	                A[j+1][k],
	                getM(j-1,k,1,leftcol,rightcol),
	                getM(j,k-1,2,leftcol,rightcol),
	                getM(j,k+1,3,leftcol,rightcol));
	 
	fflush(stdout);
	 
	*/
	return 1;
}

float getM(int j,int k,int place,int leftcol,int rightcol) {

	if (k==0 || k==B-1)
		return A[j][k];
	else if (place==1) {  /*Up*/
		if (j==0)
			return A[j][k];
			else
			return L[k];
	}
	else if (place==2) { /*Left*/
		if (k+1==leftcol)
			return LL[j];
			else
			return L[k];
	}
	else if (place==3) { /*Right*/
		if (k-1==rightcol) {
			return LR[j];
		}
		else
			return A[j][k];
	}
	else {
		printf("\nError 1: j:%d,k:%d,leftcol:%d,rightcol:%d\n",
		    j,k,leftcol,rightcol);
		fflush(stdout);
		return -2;
	}
}

getVL(int org) { /*3*/

	MPI_Recv(LL, B, MPI_FLOAT, org, 3, MPI_COMM_WORLD, &status);

}

getVR(int org) { /*2*/

	MPI_Recv(LR, B, MPI_FLOAT, org, 2, MPI_COMM_WORLD, &status);

}


sendL(int dest){ /*2*/
	int j;
	for(j=0;j<B-1;j++) {
		LS[j]=A[j][left];
	}
	MPI_Ssend(LS, B, MPI_FLOAT, dest, 2, MPI_COMM_WORLD);
}

sendR(int dest){ /*3*/
	int j;
	for(j=0;j<B-1;j++) {
		LS[j]=A[j][right];
	}
	MPI_Ssend(LS, B, MPI_FLOAT, dest, 3, MPI_COMM_WORLD);
}

updateMCols(int leftcol,int rightcol) {
	int i,j,k;

	for(j=0;j<B-1;j++) {
		for (k=leftcol;k<=rightcol;k++)
			A[j][k]=tbl[j][k];
	}
}

