!caf Ising03.f90 -o is; echo 12 0.20 1000 1 |  cafrun -n 4 ./is
!
!If you need more processes than the cores of your CPU, use:
!cafrun -n 20 -oversubscribe ./is
!
!================== Ising03.f90 ===================
!--------------------------------------------------
!
module Ising03
 use, intrinsic :: iso_fortran_env
 implicit none
 integer , parameter      :: dp = real64
 integer                  :: L, Lloc, Limg, N, Nloc, Nimg
 integer , allocatable    :: s(:,:)[:,:]
 real(dp), dimension(0:4) :: prob
 real(dp)                 :: beta, acceptance, r
 integer                  :: nsweep, start
 integer                  :: myimg, myi, myj, fout
 character(1000)          :: fname_out
!--------------------------------------------------
contains
!--------------------------------------------------
 subroutine init
  integer :: i, iloc, jloc
  Nimg  = num_images()
  myimg = this_image()
  if(myimg == 1) then
   print '(A)', '# Enter L, beta, nsweep, start (0 cold/1 hot):'
   read *, L, beta, nsweep, start
  end if
  call co_broadcast(L     , 1); call co_broadcast(beta , 1)
  call co_broadcast(nsweep, 1); call co_broadcast(start, 1)
    
  Limg = int(sqrt(Nimg+1.0e-6_dp))
  Lloc = L/Limg
  Nloc = Lloc*Lloc
  N    = L*L
    
  ! Allocate with halo/ghost cells: 0 and Lloc+1
  allocate(s(0:Lloc+1, 0:Lloc+1)[Limg, *])
  myi = this_image(s, 1); myj = this_image(s, 2)

  if(myimg == 1) then
   write(fname_out,'(A,I0.3,A,F5.3,A)') 'L',L,'b',beta,'.out'
   open(newunit=fout, file=fname_out)
  end if
    
  call random_init(.false., .true.) ! random_init(REPEATABLE, IMAGE_DISTINCT)
  !initialize probabilities for E_\nu > E_mu
  prob = 0.0_dp
  ! Sum_<ik> s_i^\mu = -4, -2, 0, +2, +4
  ! Need:     exp(-2*beta*4), exp(- 2*beta*2) 
  ! and not:  exp( 2*beta*0), exp(  2*beta*2),exp(2*beta*4)
  do i=2, 4, 2 !i = dE/2 = (E_nu-E_mu)/2=2,4
   prob(i) = exp(-2.0_dp * beta * i)
  end do

  !initial configuration:
  select case(start)
  case(0)
   s = 1
  case(1)
   do  jloc=1, Lloc
    do iloc=1, Lloc
     call random_number(r)
     if(r < 0.5_dp) then; 
      s(iloc,jloc) = 1; 
     else; 
      s(iloc,jloc) = -1; 
     end if
    end do
   end do
  end select
  acceptance = 0.0_dp
 end subroutine init
!--------------------------------------------------
 subroutine update_halos()
  integer :: ni, nj
  ! Sync before communication to ensure all images have finished updates
  sync all
  ! Bulk communication of boundaries (East/West/North/South)
  ! East-West
  ni = myi + 1; if(ni > Limg) ni = 1
  s(Lloc+1, 1:Lloc) = s(1   , 1:Lloc)[ni, myj] 
  ni = myi - 1; if(ni < 1) ni = Limg
  s(0     , 1:Lloc) = s(Lloc, 1:Lloc)[ni, myj]
    
  ! North-South
  nj = myj + 1; if(nj > Limg) nj = 1
  s(1:Lloc, Lloc+1) = s(1:Lloc, 1   )[myi, nj]
  nj = myj - 1; if(nj < 1) nj = Limg
  s(1:Lloc, 0     ) = s(1:Lloc, Lloc)[myi, nj]
    
  ! Sync after communication to ensure halos are populated before calculation
  sync all
 end subroutine update_halos
!--------------------------------------------------
 subroutine met()
  integer :: p, iloc, jloc, iglob, jglob, snn, dE
    
  sweep: do p = 0, 1

   call update_halos()

   do  jloc = 1, Lloc
    do iloc = 1, Lloc

     iglob  = (myi - 1) * Lloc + iloc
     jglob  = (myj - 1) * Lloc + jloc
     if (mod(iglob + jglob, 2) /= p) cycle

     ! Clean stencil: No IFs or co-indexing in hot loop 
     snn = s(iloc+1, jloc  ) + s(iloc-1, jloc  ) + &
           s(iloc  , jloc+1) + s(iloc  , jloc-1)

     dE  = snn * s(iloc, jloc)

     if (dE <= 0) then
      s(iloc, jloc) = -s(iloc, jloc)
      acceptance = acceptance + 1.0_dp
     else
      call random_number(r)
      if (r < prob(dE)) then
       s(iloc, jloc) = -s(iloc, jloc)
       acceptance = acceptance + 1.0_dp
      end if
     end if

    end do ! do iloc = 1, Lloc
   end do  ! do jloc = 1, Lloc

  end do sweep
 end subroutine met
!--------------------------------------------------
 function E()
  integer :: E, en, iloc, jloc

  call update_halos()

  en = 0
  do  jloc = 1, Lloc
   do iloc = 1, Lloc

     ! Forward neighbors only to avoid double counting
     en = en + (s(iloc+1, jloc) + s(iloc, jloc+1)) * s(iloc, jloc)

    end do
   end do

   call co_sum(en)
   E = -en

  end function E
!--------------------------------------------------
  function M()
   integer :: M, mag

   mag = sum(s(1:Lloc, 1:Lloc))

   call co_sum(mag)
   M = mag

  end function M
!--------------------------------------------------
  subroutine measure()
   integer :: ene, mag

   ene = E(); mag = M()

   if(myimg == 1) write(fout, '(2I12)') ene, mag

  end subroutine measure
!--------------------------------------------------
  subroutine endsim()
   !Exercise: save configuration in a file
    call co_sum(acceptance)
    if(myimg == 1) write(fout,'(A,F15.6)') '# acceptance= ', acceptance/N/nsweep
  end subroutine endsim
!--------------------------------------------------
  subroutine co_abort(msg)
    character(*), intent(in) :: msg
    write(error_unit,'(A,I3)') 'Error: '//trim(msg)//' Image: ', myimg
    error stop 1
  end subroutine co_abort
!--------------------------------------------------
end module Ising03
!--------------------------------------------------
program ising2d
  use Ising03
  implicit none
  integer :: isweep
  call init()
  do isweep = 1, nsweep
    call met
    call measure
  end do
  call endsim
end program ising2d
!--------------------------------------------------
