ccc   nbody.F
c     Daniel Wedul
c     last modified 12/14/01

ccc   This is a program to calculate the force due to gravity on 
c 	a group of particles with mass.

ccc   This program uses mpi and must be compiled using an mpi compiler
c     The recomended compile expression is 
c
c     mpif77 -o go_nbody nbody.F
c
c     in this case go_nbody is the name of the executable and
c	nbody.F is the name of this file

ccc   The file to send through qsub should look like this
c  #PBS -j oe
c  #PBS -lnodes=<numprocs>:compute
c
c  cd $PBS_O_WORKDIR
c
c  /usr/local/bin/mpprun -n <numprocs> go_nbody "< /workdir/input"

c  In this example <numprocs> is to be replaced by an integer which is
c	the number of processors to be used.
c	Also, go_nbody is the name of this code compiled.
c	/workdir/input should be replaced with the path and name 
c	of the input file.

c  There is a c++ program that is compiled and in the same directory as
c  this source code.  It is called makenumbers.  It outputs random numbers
c  in the format needed for this program.  The source for it will be
c  included in this file at the end of the fortan code.  To run it type
c  	./makenumbers 10 >input
c  here, 10 is the number of particles to create and input is the name of
c  the file the numbers are to be stored in.

ccc END OF NOTES SECTION, THE ACTUAL PROGRAM STARTS HERE ccc 

      PROGRAM nbody
      IMPLICIT NONE

      INCLUDE 'mpif.h'

      DOUBLE PRECISION G
      PARAMETER(G=6.67D-11)

      INTEGER MAX_PARTICLES
      PARAMETER(MAX_PARTICLES = 80000)
				
      DOUBLE PRECISION  m(MAX_PARTICLES),
     &			x(MAX_PARTICLES),
     &			y(MAX_PARTICLES),
     &			F(MAX_PARTICLES),
     &			THETA(MAX_PARTICLES),
     &			mym(MAX_PARTICLES),
     &			myx(MAX_PARTICLES),
     &			myy(MAX_PARTICLES),
     &			mytempm(MAX_PARTICLES),
     &			mytempx(MAX_PARTICLES),
     &			mytempy(MAX_PARTICLES),
     &			mySFx(MAX_PARTICLES),
     &			mySFy(MAX_PARTICLES),
     &			myF(MAX_PARTICLES),
     &			myTHETA(MAX_PARTICLES),
     &			F_ij, F_ijx, F_ijY, theta_ij, r_ij,
     &			wt1, mywt, wttot

      INTEGER n, myn
      INTEGER i, j, k

      INTEGER error_code, numprocs, myid, status
      
      CALL MPI_INIT(error_code)
      CALL MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, error_code)
      CALL MPI_COMM_RANK(MPI_COMM_WORLD, myid, error_code)

      ! get the number of particles
      IF(myid .EQ. 0) THEN
	READ(5,*), n
      ENDIF

      ! tell all processors how many particles there are
      CALL MPI_BCAST(n, 1, MPI_INTEGER, 0, MPI_COMM_WORLD,
     &			error_code)

      !stop the processors if n is too big
      IF (n .gt. MAX_PARTICLES) THEN
	WRITE(6,1000) myid
1000	FORMAT(I2, ": Fatal error: too many particles")
        CALL MPI_FINALIZE(error_code)
	STOP
      ENDIF

      !make sure number of particles is evenly divisible by procs
      IF (MOD(n,numprocs) .NE. 0) THEN
	WRITE(6,1010) myid, n, numprocs
1010	FORMAT(I2, ": Fatal error: " I5 " not divisible by ", I2)
        CALL MPI_FINALIZE(error_code)
	STOP
      ENDIF

      !initialize all arrays
      DO i=1,MAX_PARTICLES
	m(i) = 0
	x(i) = 0
	y(i) = 0
	F(i) = 0
	THETA(i) = 0
	mym(i) = 0
	myx(i) = 0
	myy(i) = 0
	mytempm(i) = 0
	mytempx(i) = 0
	mytempy(i) = 0
	mySFx(i) = 0
	mySFy(i) = 0
	myF(i) = 0
	myTHETA(i) = 0
      ENDDO

      !read in the particles
      IF (myid .EQ. 0) THEN
	DO i=1,n
	  READ(5,*) x(i), y(i), m(i)
	ENDDO
      ENDIF

c get start time
      wt1 = MPI_Wtime()

      myn = n/numprocs

      !distribute the particles
      CALL MPI_SCATTER( x,   myn, MPI_DOUBLE_PRECISION,
     &		        myx, myn, MPI_DOUBLE_PRECISION,
     &			0, MPI_COMM_WORLD, error_code  )
      CALL MPI_SCATTER( y,   myn, MPI_DOUBLE_PRECISION,
     &		        myy, myn, MPI_DOUBLE_PRECISION,
     &			0, MPI_COMM_WORLD, error_code  )
      CALL MPI_SCATTER( m,   myn, MPI_DOUBLE_PRECISION,
     &		        mym, myn, MPI_DOUBLE_PRECISION,
     &			0, MPI_COMM_WORLD, error_code  )

      !do calculations on my particles
      DO i=1,myn
	mySFx(i) = 0
	mySFy(i) = 0
	DO j=1,myn
	  IF (i .NE. j) THEN
	    r_ij = DSQRT( (myx(j)-myx(i))**2 +
     &			  (myy(j)-myy(i))**2  )
	    F_ij = G*mym(i)*mym(j) / (r_ij**2)
	    theta_ij = DATAN2( myy(j)-myy(i), myx(j)-myx(i))
	    F_ijx = F_ij*COS(theta_ij)
	    F_ijy = F_ij*SIN(theta_ij)

	    mySFx(i) = mySFx(i) + F_ijx
	    mySFy(i) = mySFy(i) + F_ijy
	  ENDIF
	ENDDO
      ENDDO

cc    calculate the total forces from other procs particles
      DO k = 1, numprocs-1
	!send myx to another proc and get another procs x's
	CALL MPI_SEND(myx, myn, MPI_DOUBLE_PRECISION, 
     &		      MOD(myid+k, numprocs), 10, MPI_COMM_WORLD,
     &		      error_code			        )
	CALL MPI_RECV(mytempx, myn, MPI_DOUBLE_PRECISION,
     &		      MOD(myid+numprocs-k, numprocs), 10, MPI_COMM_WORLD,
     &		      status, error_code				 )

	!send myy to another proc and get another procs y's
	CALL MPI_SEND(myy, myn, MPI_DOUBLE_PRECISION, 
     &		      MOD(myid+k, numprocs), 10, MPI_COMM_WORLD,
     &		      error_code			        )
	CALL MPI_RECV(mytempy, myn, MPI_DOUBLE_PRECISION,
     &		      MOD(myid+numprocs-k, numprocs), 10, MPI_COMM_WORLD,
     &		      status, error_code				 )

	!send mym to another proc and get another procs m's
	CALL MPI_SEND(mym, myn, MPI_DOUBLE_PRECISION, 
     &		      MOD(myid+k, numprocs), 10, MPI_COMM_WORLD,
     &		      error_code			        )
	CALL MPI_RECV(mytempm, myn, MPI_DOUBLE_PRECISION,
     &		      MOD(myid+numprocs-k, numprocs), 10, MPI_COMM_WORLD,
     &		      status, error_code				 )


	!do calculations with these particles
	DO i=1,myn
	  DO j=1,myn
	    r_ij = dsqrt( ( mytempx(j)-myx(i) )**2 +
     &	    		  ( mytempy(j)-myy(i) )**2  )
	    F_ij = (G * mym(i) * mytempm(j)) / (r_ij**2)
	    theta_ij = DATAN2( mytempy(j)-myy(i),mytempx(j)-myx(i) )
	    F_ijx = F_ij*cos(theta_ij)
	    F_ijy = F_ij*sin(theta_ij)

	    mySFx(i) = mySFx(i) + F_ijx
	    mySFy(i) = mySFy(i) + F_ijy
	  ENDDO
	ENDDO

      ENDDO

      !convert cartisian to polar
      DO i=1,myn
	myF(i) = DSQRT( mySFx(i)**2 + mySFy(i)**2 )
	myTHETA(i) = DATAN2( mySFy(i), mySFx(i) )
      ENDDO

      !send all particles to the main processor
      CALL MPI_GATHER(myF, myn, MPI_DOUBLE_PRECISION,
     &			F, myn, MPI_DOUBLE_PRECISION,
     &			0, MPI_COMM_WORLD, error_code )
      CALL MPI_GATHER(myTHETA, myn, MPI_DOUBLE_PRECISION,
     &			THETA, myn, MPI_DOUBLE_PRECISION,
     &			0, MPI_COMM_WORLD, error_code    )

      !this is the end my friend of the calculation stuff
      !get end time
      mywt = MPI_Wtime() - wt1

      IF ( (myid .EQ. 0) .AND. (n .LE. 24) ) THEN
	WRITE(6,2000)
	DO i=1,n
	  WRITE(6,2010) i, F(i), THETA(i),x(i),y(i),m(i)
	ENDDO
      ENDIF
c 2000  FORMAT("  PARTICLE       F           THETA")
c 2010  FORMAT("  ", I5, "   ", E15.5, "   ", F7.3) 
c   #####  #.#########E-##  ##.####### | ##########.  ##########.  ##########.
c PARTICLE  F                THETA     | x            y            m
2000  FORMAT("PARTICLE",2X,"F",16X,"THETA",5X,"| x",12X,"y",12X,"m")
2010  FORMAT(2X,I5,2X,E15.9,2X,F10.7," | ",F11.0,2X,F11.0,2X,F11.0)
c   do time stuff
      !output processor times
      WRITE(6,3000) myid, mywt
3000  FORMAT(I2, ": time = ", F20.15, " seconds")

ccc   calculate and output mean time for all procs
      CALL MPI_REDUCE(mywt, wttot, 1, MPI_DOUBLE_PRECISION,
     &			MPI_SUM, 0, MPI_COMM_WORLD, error_code)

      IF (myid .EQ. 0) THEN
	WRITE(6, 3010) wttot
	WRITE(6, 3020) (wttot/numprocs)
      ENDIF
3010  FORMAT("  total processor time was ", F20.15, " seconds.")
3020  FORMAT("average processor time was ", F20.15, " seconds.")

      CALL MPI_FINALIZE(error_code)

      END

ccc below is C++ source code for a program to create random particles
c
c  #include<iostream.h>
c  #include<stdlib.h>
c
c  int main(int argc, char *argv[]) 
c  {
c       int i;
c       int n = atoi(argv[1]);
c       cout << n << endl;
c       for(i=0; i<n; i++) 
c         { cout << rand() << " " << rand() << " " << rand() << endl; }
c       return 0; 
c  }  // END main()
