c     assign9.F
c     Daniel Wedul
c     last modified 12/13/01

ccc This is a program that solves a 1 dimensional heat
c   diffusion problem using parallel processes

ccccccccccccccccccccccccccccccccccccccccccccccccc
ccccccccccc     WARNING!!     ccccccccccccccccccc
ccccccccccccccccccccccccccccccccccccccccccccccccc
c     this program must be compiled useing mpif77
c     the recomended compile expression is
c       mpif77 -o 9 assign9.F
c     here 9 is the name of this compiled program
c
ccc END OF NOTES SECTION, THE ACTUAL PROGRAM STARTS HERE ccc
      PROGRAM assign9
      IMPLICIT NONE

      INCLUDE 'mpif.h'
      
c   the variable output is for debugging.
c   OUTPUT = 0	   only runtime is outputted
c   OUTPUT = 1     data is outputted at the end of the program
c   OUTPUT = 2     data is outputted at each timestep
      INTEGER OUTPUT
      PARAMETER(OUTPUT = 0)
      INTEGER 	error_code,	!error code
     &		numprocs,	!number of procs
     &		myid,		!which proc I am
     &		mysize,		!number of nodes each proc will do
     &		status		!recv error stuff
	
      DOUBLE PRECISION  myUold(0:1000000),	!old temp values
     & 			myUnew(0:1000000),	! new temp values
     &			k,		!thermal conductivity of medium
     &			c,		!heat capacity of medium
     &			rho,		!density if medium
     &			L, 		!the length of the medium
     &			h, 		!length between two points
     &			t0,		!start time
     &			tn,		!end time
     &			dt,		!change in time
     &			initval,	!initial starting condition
     &			leftBC,		!left Boundry
     &			rightBC,	!right Boundry 
     &			r,		!conbination of variables
     &			wtstart,	!start time
     &			wtstop,		!end time
     &			actualt		!actual value of timestep
				
      DOUBLE PRECISION  tin(100),	!input variable for times (t in)
     &			tave		!average of times
				
      INTEGER 	n,	!number of nodes in whole problem
     &		t,	!timestep variable
     & 		i,	!loop variable
     &		istart, !beginning index of myU*
     &		iend	!end indes of myU*
			
C  set model parameters
      n = 800000
      L = n	!this helps keep r<.5
      leftBC = 100
      initval = 10
      rightBC = 0
      k = 1.0
      c = 2.0
      rho = 5.0
      t0 = 0.0
      tn = 25.0
      dt = 0.2

C start up MPI
      CALL MPI_INIT(error_code)
      CALL MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, error_code)      
      CALL MPI_COMM_RANK(MPI_COMM_WORLD, myid, error_code)

c initialize variables
      !set main values
      h = L/(n-1)
      r = k * dt / (c * rho * h*h)
      mysize = n/numprocs
      DO i = 0, mysize+1
	myUold(i) = initval
	myUnew(i) = initval
      ENDDO
      istart = 1
      iend = mysize 
      !take care of left and right boundry conditions
      IF (myid .EQ. 0) THEN
	myUold(1) = leftBC
	myUnew(1) = leftBC
	istart = 2	
      ENDIF
      IF (myid .EQ. (numprocs - 1)) THEN
	myUold(mysize) = rightBC
	myUnew(mysize) = rightBC
	iend = mysize - 1
      ENDIF

c     start timer
      wtstart = MPI_Wtime()
      
ccc   start calculating
      DO t=0,( (tn-t0)/dt + 1 )
	actualt = t*dt + t0
    ! maybe output results
	IF ((mysize .LE. 6) .OR. (OUTPUT .EQ. 2)) THEN
	  WRITE(6,1000) myid, actualt, (myUold(i), i=1,mysize)
	ENDIF
1000  FORMAT(I2, " --> t=", F5.2,": " <mysize>F10.6)

    ! do the processor communication
	! send myUold(mysize) to (myid+1)mod numprocs
	CALL MPI_SEND(myUold(mysize), 1, MPI_DOUBLE_PRECISION, 
     &		mod((myid+1), numprocs), 10, MPI_COMM_WORLD,
     &		error_code)
	! recieve myUold(0) from (myid+numprocs-1) mod numprocs
	CALL MPI_RECV(myUold(0), 1, MPI_DOUBLE_PRECISION,
     &		mod(myid+numprocs-1, numprocs), 10, MPI_COMM_WORLD,
     &		status, error_code)
	! send myUold(1) to (myid + numprocs - 1) mod numprocs
	CALL MPI_SEND(myUold(1), 1, MPI_DOUBLE_PRECISION, 
     &		mod(myid+numprocs-1, numprocs), 20, MPI_COMM_WORLD,
     &		error_code)
	! receive nuUold(mysize+1) from (myid+1)mod numprocs
	CALL MPI_RECV(myUold(mysize+1), 1, MPI_DOUBLE_PRECISION,
     &		mod(myid+1, numprocs), 20, MPI_COMM_WORLD,
     &		status, error_code)

	! do the actual calculations
	DO i=istart,iend
	  myUnew(i)=r*(myUold(i+1)-2*myUold(i)+myUold(i-1))+myUold(i)
	ENDDO

	! copy myUnew to myUold
	DO i=istart,iend
	  myUold(i)=myUnew(i)
	ENDDO
      ENDDO

c     stop timer
      wtstop= MPI_Wtime()
      
      IF (OUTPUT .EQ. 1) THEN
	WRITE(6,1020) (myUold(i), i=1,mysize)
      ENDIF
1020  FORMAT(<mysize>F10.6)

      tave = wtstop-wtstart
      WRITE(6,1010) myid, tave
1010  FORMAT(I2, ": time = ", F20.15, " seconds")
      
c make all processors wait here
      CALL MPI_BARRIER(MPI_COMM_WORLD, error_code)

ccc calculate and output mean time for program
      CALL MPI_GATHER(tave, 1, MPI_DOUBLE_PRECISION,
     &		tin, 1, MPI_DOUBLE_PRECISION,
     &		0, MPI_COMM_WORLD, error_code)
      IF (myid .EQ. 0) THEN
	tave = 0
	DO i=1,numprocs
	  tave = tave + tin(i)
	ENDDO
	WRITE(6,1040) tave
	WRITE(6,1050) (tave/numprocs)
      ENDIF
1040  FORMAT("  total processor time was ", F20.15, " seconds")
1050  FORMAT("average processor time was ", F20.15, " seconds")

c  close MPI
      CALL MPI_FINALIZE(error_code)

      END
