! 
! Copyright (c) 2008 Kristopher L. Kuhlman (kuhlman at hwr dot arizona dot edu)
! 
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
! 
! The above copyright notice and this permission notice shall be included in
! all copies or substantial portions of the Software.
! 
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
! THE SOFTWARE.
! 
! Malama, B., A. Revil, and K.L. Kuhlman, 2009. A semi-analytical solution 
!  for transient streaming potentials associated with confined aquifer 
!  pumping tests, Geophysical Journal International, 176(3), 1007–1016.
! http://dx.doi.org/10.1111/j.1365-246X.2008.04014.x
!
! Malama, B., K.L. Kuhlman, and A. Revil, 2009. Theory of transient 
!  streaming potentials associated with axial-symmetric flow in unconfined 
!  aquifers, Geophysical Journal International, 179(2), 990–1003.
! http://dx.doi.org/10.1111/j.1365-246X.2009.04336.x
!

Program Driver
  ! $Id: driver4.f90,v 1.2 2009/03/22 20:06:54 kris Exp kris $

! needed for intel compiler
#ifdef INTEL
  use ifport, only : dbesj0, dbesj1     
#endif

  ! Pass data to routines using modules
  use shared_data         ! utility.f90

  ! constants and coefficients
  use constants, only : DP, SMALL, RONE, RZERO, PI
 
  use invlap_fcns, only : stehfest_coeff
  
  !! three-layer solution
  use three_layer

  implicit none

  real(DP), allocatable :: j0zero(:)
  real(DP) :: x, dx

  character(23) :: fmt
  character(75) :: outfile, infile
  integer :: i, j, w, numt, terms, minlogsplit, maxlogsplit
  integer :: splitrange, zerorange
  real(DP), allocatable :: GLx(:),GLw(:)
  
  ! vectors of results and intermediate steps
  integer, allocatable :: splitv(:)
  real(DP), allocatable  :: ts(:), td(:), dt(:) , fa(:)
  real(DP), allocatable :: finOneAqif(:,:), infOneAqif(:,:), totOneAqif(:,:)
  real(DP), allocatable :: oneAqifD(:), deriv(:)

  ! coordinates of pumping, injecting, image, and observation wells
  ! general reflection matrix
  ! distances from observation point to real & image wells
  real(DP), allocatable :: coord(:,:), r(:)
  real(DP), dimension(2) :: obs
  real(DP) :: depth, distb
  integer :: npts,nwells,BC, nn
  logical :: Lbdry

  !! k parameter in tanh-sinh quadrature (order is 2^k - 1)
  integer :: tsk, tsN, nst
  real(DP), allocatable :: tsw(:), tsa(:), sgn(:), tshh(:), tmp(:)
  integer, allocatable :: tskk(:), tsNN(:), ii(:)
  integer :: GLorder, j0split(1:2), naccel, err, m

  ! realted to dimensional solution
  Logical :: quiet, dipole
  real(DP) :: HC, PhiC, Qrate, PolErr

  ! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  ! read input parameters from file some minimal error checking
  infile = 'input.dat'
  open(unit=19, file=infile, status='old', action='read')

  ! suppress output to screen, T/F dipole test, integer BC type,
  ! perpendicular distance to boundary from pumping well
  read(19,*) quiet, dipole, BC, distb

  ! dipole test has 2 pumping wells 
  if (dipole) then
     nwells = 2
  else
     nwells = 1
  end if

  ! not actually used if not dipole test (pumping well at origin automatically)
  allocate(coord(2,nwells))

  if (BC > 2 .or. BC < 0) then
     print *, 'BC = 0 for no boundary, 1 constant head, 2 for no-flow' 
     stop
  end if
  
  coord(1:2,1) = (/ 0.0,0.0 /) ! pumping well coordinates 
  if (nwells == 1) then
     read(19,*)
     read(19,*)
  else
     read(19,*) coord(1,2:nwells)  ! read in x-coordinates of pumping wells
     read(19,*) coord(2,2:nwells)  !  ""     y- ""               ""
  end if
  
  if (BC == 1 .or. BC == 2) then
     ! if constant head (1) or no-flow (2) BC, need image wells
     Lbdry = .true.
     allocate(r(2*nwells),sgn(2*nwells))
  else
     Lbdry = .false.
     allocate(r(nwells),sgn(nwells))
  end if

  read(19,*) Qrate  ! injection is -Qrate, pumping is +Qrate
  read(19,*) b(1:3) ! layer thicknesses
  read(19,*) Kr, kappa, Ss, Sy  
  read(19,*) sigma(1:3)
  read(19,*) gamell(1:3)  !! rho*g*ell = gamma*ell = C (only used to re-dimensionalize)
  read(19,*) numt
  read(19,*) obs(1:2),depth ! x,y,depth of observation point

  if (.not. quiet) then
     write(*,'(2(A,L1),A,I1,A,ES10.3)') 'quiet?', quiet, ' dipole?',dipole, ' BC:',BC, ' dist Q -> bdry:',distb
     write(*,111) 'layer b:',b(1:3)
     write(*,'(A,4(1X,ES10.3))') 'Kr,kappa,Ss,Sy:', Kr, kappa, Ss, Sy
     write(*,111) 'sigma:',sigma(1:3)
     write(*,111) 'gamma*ell:',gamell(1:3)
     write(*,'(A,1X,I4)') '# times',numt
     write(*,222) '(x_Q,y_Q): (',coord(1,1),', ',coord(2,1), ')'
     if (dipole) then
        write(*,222) '(x_I,y_I: (',coord(1,2),', ',coord(2,2),')'
     end if
     write(*,222) '(x_obs,y_obs): (',obs(1),', ',obs(2),')'
     write(*,'(A,ES10.3)') 'obs depth:',depth
  end if

111 format(A,3(1X,ES10.3))
222 format(2(A,ES10.3),A)  

  ! need some error-checking for electrical parameters
  if(any((/b(:),Kr,kappa,Ss,Sy,sigma(:)/) < SMALL)) then
     print *, 'input error: zero or negative aquifer parameters' 
     stop
  end if
  
  if(distb <= 0.0) then
     print *, 'distb should be positive' 
     stop
  end if
  
  if(obs(1) < -distb) then
     print *, 'observation point must be on this side of boundary' 
     stop
  end if

  if (dipole) then
     if (coord(1,2) < -distb) then
        print *, 'injection point must be on this side of boundary' 
        stop
     end if
  end if
  
  
  ! dimensionless thicknesses
  bd1 = b(1)/b(2) 
  bd3 = b(3)/b(2) 

  sgn(:) = -999.99

  ! Q -> obs
  r(1) = sqrt(obs(1)**2 + obs(2)**2)
  sgn(1) = +1.0
  
  if (.not. quiet)  write(*,*)  '---------------------------'

  if (Lbdry) then
     if (dipole) then
        ! I -> obs
        r(2) = sqrt((obs(1)-coord(1,2))**2 + (obs(2)-coord(2,2))**2)
        sgn(2) = -1.0
        ! I image -> obs
        r(4) = sqrt((obs(1)+2.0_DP*distb+coord(1,2))**2 + (obs(2)-coord(2,2))**2)
        if (BC == 2) then
           sgn(4) = -1.0
        elseif (BC == 1) then
           sgn(4) = +1.0
        end if
        
        ! Q image -> obs
        r(3) = sqrt((obs(1)+2.0_DP*distb)**2 + obs(2)**2)
        if (BC == 2) then
           sgn(3) = +1.0
        elseif (BC == 1) then 
           sgn(3) = -1.0
        end if

        npts = 4
        if (.not. quiet) then
           write(*,'(A)') '          '//'     Q     '//'     Inj   '//&
                & '    Q img  '//'    I Img  '
        end if
     else
        ! Q image -> obs 
        r(2) = sqrt((obs(1)+2.0_DP*distb)**2 + obs(2)**2)
        if (BC == 2) then
           sgn(2) = +1.0
        elseif (BC == 1) then
           sgn(2) = -1.0
        end if
        
        npts = 2
        if (.not. quiet) then
           write(*,'(A)') '          '//'     Q     '//'    Q img  '
        end if
     end if
  else
     if (dipole) then
        ! I -> obs
        r(2) = sqrt((obs(1)-coord(1,2))**2 + (obs(2)-coord(2,2))**2)
        sgn(2) = -1.0
        npts = 2
        if (.not. quiet) then
           write(*,'(A)') '          '//'     Q     '//'     I     '
        end if
     else
        npts = 1
        if (.not. quiet) then
           write(*,'(A)') '          '//'     Q     '
        end if
     end if
  end if
     
  ! non-dimensionalize all distances by aquifer thickness (second layer)
  r(:) = r(:)/b(2)

  ! zd is positive up from bottom of aquifer; depth down from surface
  zd = 1.0_DP + (b(1) - depth)/b(2)
  
  if (.not. quiet) then
     fmt = '(A, (ES10.3,1X))       '
     write(fmt(4:4),'(I1.1)') npts
     write(*,fmt) 'r_D:      ',r
     fmt(5:15) = '(1I10,001X)'
     write(*,fmt) 'sign of Q:',nint(sgn)
     write(*,*)  '---------------------------'
     write(*,'(A,ES10.3)') 'z_D:      ',zd
  end if

  ! perform some error checking on coordinates
  !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  layer = .false.

  if(zd > bd1 + 1.0 .or. zd < -bd3) then
     print *, 'input error: z coordinate out of range'
     stop  
  elseif(zd >= 1.0) then ! upper unpumped vadose zone
     layer(1) = .true.
     if(.not. quiet) then
        print *, 'observation point in vadose zone above unconfined layer'
     end if
  elseif(zd > 0.0) then  ! middle pumped unconfined aquifer
     layer(2) = .true.
     if(.not. quiet) then     
        print *, 'observation point in pumped unconfined aquifer'
     end if
  else                   ! lower unpumped aquiclude
     layer(3) = .true.
     if(.not. quiet) then
        print *, 'observation point in lower unpumped aquiclude'
     end if
  end if

  read(19,*) stehfestN
  allocate(stehfestV(stehfestN))
  stehfestV(:) = stehfest_coeff(stehfestN) 

  read(19,*) tsk, j0split(1:2), naccel, GLorder, nst 

  if(tsk - nst < 2) then
     print *, 'Tanh-Sinh k is too low (',tsk,') for given level&
          & of Richardson extrapolation (',nst,').  Increase tsk or decrease nst.'
     stop
  end if  

  if(any((/j0split(:),naccel, tsk/) < 1)) then
     print *, 'max/min split, # accelerated terms and tanh-sinh k must be >= 1' 
     stop
  end if
  
  ! solution vectors
  allocate(oneAqifD(numt), finOneAqif(numt,npts), infOneAqif(numt,npts))
  allocate(deriv(numt), totOneAqif(numt,npts))
  allocate(ts(numt), td(numt), splitv(numt), dt(numt)) !! ,ud(numt))

  terms = maxval(j0split(:))+naccel+1
  allocate(j0zero(terms))

  ! time is now read as a column rather than a long row,
  ! to accomidate the fact that PEST has a maximum row length
  do i=1,numt
     read(19,*) ts(i)
  end do
  
  read (19,*) outfile
  if(any(ts <= SMALL)) then
     print *, 'all times must be > 0' 
     stop
  end if

  alphaR = Kr/Ss
  Kz = kappa*Kr
  alphaY = b(2)*Kz/Sy
  alphaD = alphaY/alphaR
  sigmaD(1:3) = [ sigma(1)/sigma(2),1.0_DP,sigma(3)/sigma(2) ]
  td(1:numt) = alphaR*ts(1:numt)/b(2)**2

#ifdef DEBUG
  write(*,333) 'alpha_r:',alphaR
  write(*,333) 'K_z:', Kz
  write(*,333) 'alpha_y:',alphaY
  write(*,333) 'alpha_D:',alphaD
  write(*,'(A,3(1X,ES10.3))') 'sigma_D:',sigmaD
  write(*,333) 'b_{D,1}:',bd1
  write(*,333) 'b_{D,3}:',bd3
333 format(A,ES10.3)
#endif

  ! compute zeros of J0 bessel function
  !************************************
  ! asymptotic estimate of zeros - initial guess
  j0zero = (/((real(i-1,DP) + 0.75_DP)*PI,i=1,terms)/)
  do i=1,TERMS
     x = j0zero(i)
     NR: do
        ! Newton-Raphson
        dx = dbesj0(x)/dbesj1(x)
        x = x + dx
        if(abs(dx) < spacing(x)) then
           exit NR
        end if
     end do NR
     j0zero(i) = x
  end do

  ! split between finite/infinite part should be 
  ! small for large time, large for small time
  do i=1,numt
     zerorange = maxval(j0split(:)) - minval(j0split(:))
     minlogsplit = floor(minval(log10(td)))   ! min log(td) -> maximum split
     maxlogsplit = ceiling(maxval(log10(td))) ! max log(td) -> minimum split
     splitrange = maxlogsplit - minlogsplit + 1 
     splitv(i) = minval(j0split(:)) + &
          & int(zerorange*((maxlogsplit - log10(td(i)))/splitrange))
  end do

#ifdef DEBUG
  print *, 'computed splits (integer zeros): ',splitv(:)
  print *, 'location of splits (zeros of J0):'
  do i= minval(splitv(:)),maxval(splitv(:))
     print *, i,j0zero(i)
  end do
#endif
 
!!$  open(unit=222,file='debug.dbg',action='write',status='replace')

  do w = 1,npts
     rD = r(w)
     if (.not. quiet) then
        write(*,'(A,I3,A,ES10.3)') 'integrating obs -> well ',w,' rD:',rD
     end if

     !$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
     ! finite portion of Hankel integral for each time level

     if(.not. quiet) then
        write(*,*) 'integrate finite portion:'
        write(*,*) '=========================='
     end if

     if(.not. allocated(tsw)) then
        tsN = 2**tsk - 1
        allocate(tsw(tsN),tsa(tsN),fa(tsN),ii(tsN))
        allocate(tskk(nst),tsNN(nst),tshh(nst),tmp(nst))
     end if
     
     do i = 1, numt
        if(.not. quiet) then
           if (i == 1) then
              write (*,888) 'upper bound: ',j0zero(splitv(1))/rD
           elseif(splitv(i) /= splitv(i-1)) then
              write (*,888) 'upper bound: ',j0zero(splitv(i))/rD
           end if
        end if

        tsval = td(i)

        ! three-layer unconfined electrokinetic system
        !===========================
        
        tskk(1:nst) = (/ (tsk-m,m=nst-1,0,-1)/)
        tsNN(1:nst) = 2**tskk - 1
        tshh(1:nst) = 4.0_DP/(2**tskk)

        ! compute abcissa and for highest-order case (others are subsets)
        call tanh_sinh_setup(tskk(nst),j0zero(splitv(i))/rD,tsw(1:tsNN(nst)),tsa(1:tsNN(nst)))

        ! compute solution at densest set of abcissa
        do nn=1,tsNN(nst)
           fa(nn) = unconfined_aquifer_sp( tsa(nn) )
        end do

        tmp(nst) = (j0zero(splitv(i))/rD)/2.0_DP*sum(tsw(:)*fa(:))
        
        do j=nst-1,1,-1
           !  only need to re-compute weights for each subsequent step
           call tanh_sinh_setup(tskk(j),j0zero(splitv(i))/rD,tsw(1:tsNN(j)),tsa(1:tsNN(j)))
           
           ! compute index vector, to slice up solution 
           ! for nst'th turn count regular integers
           ! for (nst-1)'th turn count even integers
           ! for (nst-2)'th turn count every 4th integer, etc...
           ii(1:tsNN(j)) = (/( m* 2**(nst-j), m=1,tsNN(j) )/)
           
           ! estimate integral with subset of function evaluations and appropriate weights
           tmp(j) = (j0zero(splitv(i))/rD)/2.0_DP*sum(tsw(1:tsNN(j))*fa(ii(1:tsNN(j))))
        end do

        if (nst > 1) then
           call polint(tshh(1:nst),tmp(1:nst),0.0_DP,finOneAqif(i,w),PolErr)
        else
           finOneAqif(i,w) = tmp(1)
        end if
                
        if(.not. quiet) then
           write(*,777) 'tD=', td(i),' ',finOneAqif(i,w)
        end if

     end do
777  format(A,ES8.2,A,ES14.6E3)
888  format(A,ES9.2)  

     !$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
     ! "infinite" portion of Hankel  integral for each time level
     ! integrate between zeros of Bessel function, extrapolate 
     ! area from series of areas of alternating sign

     if(.not. quiet) then
        write(*,*)
        write (*,*) 'integrate infinite portion'
        write(*,*) '============================='
     end if

     if(.not. allocated(GLx)) then
        allocate(Glx(GLorder-2),GLw(GLorder-2))
        call gauss_lobatto_setup(GLorder,GLx,GLw)  ! get weights and abcissa
     end if
     
     do i = 1, numt

        if(.not. quiet) then
           if (i == 1) then
              write(*,888) 'lower bound:', j0zero(splitv(1))/rD
           elseif(splitv(i) /= splitv(i-1)) then
              write(*,888) 'lower bound:', j0zero(splitv(i))/rD
           end if
        end if

        tsval = td(i)

        ! new three-layer unconfined system
        !===========================
        infOneAqif(i,w) = inf_integral(unconfined_aquifer_sp,splitv,naccel,&
             &glorder,j0zero,GLx,GLw)
        if(.not. quiet) then
           write(*,777) 'tD=', td(i),' ',infOneAqif(i,w)
        end if

     end do
  end do

  ! combine solutions and re-dimensionalize
  HC = Qrate/(4.0_DP*PI*b(2)*Kr)
  PhiC = gamell(2)*HC/sigma(2)
  totOneAqif(1:numt,1:npts) = (finOneAqif(:,:) + infOneAqif(:,:))*PhiC
  oneAqifD = 0.0_DP

  ! just add all wells (real and image), sign of wells is handled above
  ! use npts, rather than nwells to handle both dipole and no dipole cases
  do j=1,npts
     oneAqifD(1:numt) = oneAqifD + sgn(j)*totOneAqif(:,j)
  end do
  
  ! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  ! compute logarithmic deriviative ds/d(ln(t)) = t*(ds/dt)
  ! using finite-differences in time

  ! use backwards diff for all but first step
  deriv(2:) = ts(2:)*(oneAqifD(2:) - oneAqifD(1:))/(ts(2:) - ts(1:numt-1))
  deriv(1) = ts(1)*(oneAqifD(1) - oneAqifD(2))/(ts(1) - ts(2))


  ! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
  ! write results to file 

  ! open output file or die
  open (unit=20, file=outfile, status='replace', action='write', iostat=err)
  if (err /= 0) then
     print *, 'cannot open output file for writing' 
     stop
  end if

  ! echo input parameters
  write (20,'(1(A,L2))') '# suppress output?:',quiet
  write (20,'(A,1(ES10.3,1X))') '# pumping rate Q', Qrate
  write (20,'(A,3(1X,ES10.3))') '# b:',b(1:3)
  write (20,'(A,3(1X,ES10.3))') '# sigma:',sigma(1:3)
  write (20,'(A,3(1X,ES10.3))') '# gamma*ell:',gamell(1:3)
  write (20,'(2(A,ES10.3))') '# Ss:',Ss, ' Kr:',Kr
  write (20,'(2(A,ES10.3))') '# Sy:',Sy, ' Kz:',Kz
  write (20,'(3(A,ES10.3))') '# obs zd:',zd 
  fmt = '(A,I2,A,  (ES10.3,1X))'
  write(fmt(9:10),'(I2.2)') stehfestN
  write (20,fmt) '# L^{-1} parameter N:',stehfestN, ' V:', stehfestV
  write (20,'(A,I4)') '# tanh-sinh quadrature order:',tsN
  write (20,'(A,2I3,ES10.3,A,I4)') &
       &'# min/max J0(x) zero split:', minval(j0split(:)),maxval(j0split(:)), &
       & j0zero(splitv(1)), ' # 0s to accelerate:', naccel
  write (20,'(A,I2)') '# order of Gauss-Lobatto quad:', GLorder 
  write (20,'(A)') '#'
  
  write (20,'(A)') '#         t_D          &
       &      H^{-1}[ L^{-1}[ f_D ]]           &
       &   log-deriv          '
  write (20,'(A)') '#----------------------------&
       &------------------------------------------------'

  do i = 1, numt
     write (20,'(3(1x,ES24.15E3))') ts(i), oneAqifD(i), deriv(i)
  end do

  deallocate(stehfestV)

contains

  !! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  
  !!   end of program flow
  !! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$  

  !! ###################################################
  function inf_integral(integrand,split,num,ord,zeros,GLx,GLw) result(integral)
    use constants, only : DP, HALF
    use shared_data, only : rd
    implicit none

    interface 
       function integrand(a) result(H)
         use constants, only : DP
         real(DP), intent(in) :: a
         real(DP) :: H
       end function integrand
    end interface

    integer, dimension(:), intent(in) :: split
    integer, intent(in) :: num,ord
    real(DP), dimension(:), intent(in) :: zeros

    real(DP), dimension(ord-2) :: GLy, GLz
    real(DP), dimension(:), intent(in) :: GLx,GLw
    real(DP), dimension(num) :: area
    real(DP) :: lob,hib,width,integral
    integer :: j, k

    do j = split(i)+1, split(i)+num
       lob = zeros(j-1)/rd ! lower bound
       hib = zeros(j)/rd   ! upper bound
       width = hib - lob
       
       ! transform GL abcissa to global coordinates
       GLy(:) = HALF*(width*GLx(:) + (hib + lob))
       do k=1,ord-2
          GLz(k) = integrand( GLy(k) )
       end do
       
#ifdef DEBUG       
       if (any(GLz /= GLz)) then
          print *, 'NaN in GLz'
       elseif (any(abs(GLz) >= huge(1.0D1))) then
          print *, '+/- infinity in GLz'
       end if
#endif

       area(j-split(i)) = HALF*width*sum(GLz(1:ord-2)*GLw(1:ord-2))
    end do

    integral = wynn_epsilon(area(1:num))

  end function inf_integral

  subroutine tanh_sinh_setup(k,s,w,a)
    use constants, only : PIOV2
    implicit none
    
    integer, intent(in) :: k
    real(DP), intent(in) :: s
    real(DP), intent(out), dimension(2**k-1) :: w, a

    integer :: N,r,i
    real(DP) :: h
    real(DP), allocatable :: u(:,:)

    select case(k)
    
    case(5) ! N=31
       a(1:16) = (/ -0.9999093846951440_DP, -0.9996882640283532_DP, -0.9990651964557858_DP, &
            -0.9975148564572244_DP, -0.9940555066314022_DP, -0.9870405605073769_DP, -0.9739668681956775_DP, &   
            -0.9513679640727469_DP, -0.9148792632645746_DP, -0.8595690586898966_DP, -0.7806074389832004_DP, &   
            -0.6742714922484359_DP, -0.5391467053879677_DP, -0.3772097381640342_DP, -0.1943570033249354_DP, &  
            0.000000000000000_DP /)    
       a(17:31) = -a(15:1:-1)
       w(1:16) = (/ 0.1187484378271595D-03, 0.3628302624680807D-03, 0.9678665925484080D-03, &
            0.2292994107706537D-02, 0.4897085482322761D-02, 0.9548627018272046D-02, 0.1717849940927817D-01, &
            0.2875403116335220D-01, 0.4505960770348939D-01, 0.6638762853525473D-01, 0.9218368027840027D-01, &
            0.1207522455632202_DP,  0.1491892696359569_DP, 0.1737092867706880_DP,  0.1904186225373517_DP,  &  
            0.1963579530037270_DP /)   
       w(17:31) = w(15:1:-1)
    case(6) ! N=63
       a(1:32) = (/ -0.9999538710056279_DP, &
            -0.9999093846951440_DP, -0.9998288220728749_DP, -0.9996882640283532_DP, -0.9994514344352746_DP, &    
            -0.9990651964557858_DP, -0.9984542087676977_DP, -0.9975148564572244_DP, -0.9961086654375085_DP, &    
            -0.9940555066314022_DP, -0.9911269924416988_DP, -0.9870405605073769_DP, -0.9814548266773352_DP, &    
            -0.9739668681956775_DP, -0.9641121642235473_DP, -0.9513679640727469_DP, -0.9351608575219846_DP, &    
            -0.9148792632645746_DP, -0.8898914027842602_DP, -0.8595690586898966_DP, -0.8233170055064023_DP, &    
            -0.7806074389832004_DP, -0.7310180347925614_DP, -0.6742714922484359_DP, -0.6102736575006389_DP, &    
            -0.5391467053879677_DP, -0.4612535439395857_DP, -0.3772097381640342_DP, -0.2878799327427159_DP, &    
            -0.1943570033249354_DP, -0.9792388528783233D-01, 0.000000000000000_DP /)
       a(33:63) = -a(31:1:-1)
       w(1:32) = (/ 0.3208813366945178D-04, &
            0.5937356198247694D-04, 0.1056829831739953D-03, 0.1814131240119765D-03, 0.3010281997741137D-03, &
            0.4839279419160360D-03, 0.7552454743965616D-03, 0.1146484368725149D-02, 0.1695898266447088D-02, &
            0.2448515649876432D-02, 0.3455715046209973D-02, 0.4774260684945725D-02, 0.6464715068326731D-02, &
            0.8589154671047334D-02, 0.1120813191793253D-01, 0.1437685651080538D-01, 0.1814061891180582D-01, &
            0.2252955457639992D-01, 0.2755295281779945D-01, 0.3319344700302310D-01, 0.3940157301229943D-01, &
            0.4609133016763458D-01, 0.5313749207959197D-01, 0.6037545476519407D-01, 0.6760433485730893D-01, &
            0.7459380948441433D-01, 0.8109481135285784D-01, 0.8685368240400047D-01, 0.9162881336028068D-01, &
            0.9520825784921207D-01, 0.9742643064229337D-01, 0.9817789022528611D-01 /)
       w(33:63) = w(31:1:-1)
    case(7) ! N=127
       a(1:64) = (/ -0.9999675930679435_DP, &   
            -0.9999538710056279_DP, -0.9999350199250824_DP, -0.9999093846951440_DP, -0.9998748650487803_DP, &    
            -0.9998288220728749_DP, -0.9997679715995609_DP, -0.9996882640283532_DP, -0.9995847503515176_DP, &    
            -0.9994514344352746_DP, -0.9992811119217919_DP, -0.9990651964557858_DP, -0.9987935342988059_DP, &
            -0.9984542087676977_DP, -0.9980333363154338_DP, -0.9975148564572244_DP, -0.9968803181281919_DP, &
            -0.9961086654375085_DP, -0.9951760261553273_DP, -0.9940555066314022_DP, -0.9927169971968273_DP, &    
            -0.9911269924416988_DP, -0.9892484310901339_DP, -0.9870405605073769_DP, -0.9844588311674308_DP, &    
            -0.9814548266773352_DP, -0.9779762351866650_DP, -0.9739668681956775_DP, -0.9693667328969173_DP, &
            -0.9641121642235473_DP, -0.9581360227102137_DP, -0.9513679640727469_DP, -0.9437347860527572_DP, &    
            -0.9351608575219846_DP, -0.9255686340686127_DP, -0.9148792632645746_DP, -0.9030132815135739_DP, &
            -0.8898914027842602_DP, -0.8754353976304087_DP, -0.8595690586898966_DP, -0.8422192463507568_DP, &    
            -0.8233170055064023_DP, -0.8027987413432413_DP, -0.7806074389832004_DP, -0.7566939086337300_DP, &    
            -0.7310180347925614_DP, -0.7035500051471421_DP, -0.6742714922484359_DP, -0.6431767589852047_DP, &
            -0.6102736575006389_DP, -0.5755844906351516_DP, -0.5391467053879677_DP, -0.5010133893793091_DP, &    
            -0.4612535439395857_DP, -0.4199521112784471_DP, -0.3772097381640342_DP, -0.3331422645776381_DP, &    
            -0.2878799327427159_DP, -0.2415663195388837_DP, -0.1943570033249354_DP, -0.1464179842905879_DP, &    
            -0.9792388528783233D-01, -0.4905596730507789D-01, 0.000000000000000_DP /)
       a(65:127) = -a(63:1:-1)
       w(1:64) = (/  0.1161490439541570D-04, &
            0.1604398885852894D-04, 0.2193370025503293D-04, 0.2968663670972276D-04, 0.3979256936961218D-04, &
            0.5284123477065571D-04, 0.6953570627161861D-04, 0.9070612116060810D-04, 0.1173235010440466D-03, &
            0.1505133683694674D-03, 0.1915688324651430D-03, 0.2419627949824722D-03, 0.3033589073744850D-03, &
            0.3776209019039487D-03, 0.4668199121535836D-03, 0.5732393983329520D-03, 0.6993772958933317D-03, &
            0.8479450120833562D-03, 0.1021862903213154D-02, 0.1224251874890123D-02, 0.1458420758058122D-02, &
            0.1727849125498576D-02, 0.2036165227271721D-02, 0.2387118740717046D-02, 0.2784548053021644D-02, &
            0.3232341824496342D-02, 0.3734394621555303D-02, 0.4294556463333869D-02, 0.4916576198477945D-02, &
            0.5604038722495898D-02, 0.6360296164565423D-02, 0.7188393318730888D-02, 0.8090987770112945D-02, &
            0.9070265373050116D-02, 0.1012785197447841D-01, 0.1126472253995556D-01, 0.1248110912697219D-01, &
            0.1377640945347946D-01, 0.1514909811907401D-01, 0.1659664283933321D-01, 0.1811542833488304D-01, &
            0.1970069075783202D-01, 0.2134646571905346D-01, 0.2304555307896604D-01, 0.2478950166021179D-01, &
            0.2656861691232506D-01, 0.2837199428819532D-01, 0.3018758066641841D-01, 0.3200226556544212D-01, &
            0.3380200314583941D-01, 0.3557196509688968D-01, 0.3729672347452612D-01, 0.3896046143248236D-01, &
            0.4054720861090164D-01, 0.4204109677008743D-01, 0.4342663014205159D-01, 0.4468896398360245D-01, &
            0.4581418401632107D-01, 0.4678957889054890D-01, 0.4760389756250997D-01, 0.4824758356181584D-01, &
            0.4871297856875025D-01, 0.4899448851409385D-01, 0.4908870653415225D-01 /)
       w(65:127) = w(63:1:-1)
    case(8) ! N=255
       a(1:128) = (/ -0.9999729464252323_DP, -0.9999675930679435_DP, &
            -0.9999612848078566_DP, -0.9999538710056279_DP, -0.9999451806144587_DP, -0.9999350199250824_DP, &
            -0.9999231701292893_DP, -0.9999093846951440_DP, -0.9998933865475925_DP, -0.9998748650487803_DP, &
            -0.9998534727731114_DP, -0.9998288220728749_DP, -0.9998004814311384_DP, -0.9997679715995609_DP, &
            -0.9997307615198084_DP, -0.9996882640283532_DP, -0.9996398313456004_DP, -0.9995847503515176_DP, &
            -0.9995222376512172_DP, -0.9994514344352746_DP, -0.9993714011409377_DP, -0.9992811119217919_DP, &
            -0.9991794489348860_DP, -0.9990651964557858_DP, -0.9989370348335121_DP, -0.9987935342988059_DP, &
            -0.9986331486406774_DP, -0.9984542087676977_DP, -0.9982549161719962_DP, -0.9980333363154338_DP, &
            -0.9977873919589065_DP, -0.9975148564572244_DP, -0.9972133470434686_DP, -0.9968803181281919_DP, &
            -0.9965130546402537_DP, -0.9961086654375085_DP, -0.9956640768169531_DP, -0.9951760261553273_DP, &
            -0.9946410557125112_DP, -0.9940555066314022_DP, -0.9934155131692640_DP, -0.9927169971968273_DP, &
            -0.9919556630026776_DP, -0.9911269924416988_DP, -0.9902262404675277_DP, -0.9892484310901339_DP, &
            -0.9881883538007427_DP, -0.9870405605073769_DP, -0.9857993630252835_DP, -0.9844588311674308_DP, &
            -0.9830127914811011_DP, -0.9814548266773352_DP, -0.9797782758006157_DP, -0.9779762351866650_DP, &
            -0.9760415602565767_DP, -0.9739668681956775_DP, -0.9717445415654873_DP, -0.9693667328969173_DP, &
            -0.9668253703123558_DP, -0.9641121642235473_DP, -0.9612186151511164_DP, -0.9581360227102137_DP, &
            -0.9548554958050227_DP, -0.9513679640727469_DP, -0.9476641906151531_DP, -0.9437347860527572_DP, &
            -0.9395702239332747_DP, -0.9351608575219846_DP, -0.9304969379971534_DP, -0.9255686340686127_DP, &
            -0.9203660530319528_DP, -0.9148792632645746_DP, -0.9090983181630204_DP, -0.9030132815135739_DP, &
            -0.8966142542800760_DP, -0.8898914027842602_DP, -0.8828349882446689_DP, -0.8754353976304087_DP, &
            -0.8676831757756460_DP, -0.8595690586898966_DP, -0.8510840079878488_DP, -0.8422192463507568_DP, &
            -0.8329662939194109_DP, -0.8233170055064023_DP, -0.8132636085029739_DP, -0.8027987413432413_DP, &
            -0.7919154923761421_DP, -0.7806074389832004_DP, -0.7688686867682466_DP, -0.7566939086337300_DP, &
            -0.7440783835473473_DP, -0.7310180347925614_DP, -0.7175094674873241_DP, -0.7035500051471421_DP, &
            -0.6891377250616677_DP, -0.6742714922484359_DP, -0.6589509917433501_DP, -0.6431767589852047_DP, &
            -0.6269502080510430_DP, -0.6102736575006389_DP, -0.5931503535919532_DP, -0.5755844906351516_DP, &
            -0.5575812282607782_DP, -0.5391467053879677_DP, -0.5202880506912302_DP, -0.5010133893793091_DP, &
            -0.4813318461169050_DP, -0.4612535439395857_DP, -0.4407895990339009_DP, -0.4199521112784471_DP, &
            -0.3987541504672377_DP, -0.3772097381640342_DP, -0.3553338251650745_DP, -0.3331422645776381_DP, &
            -0.3106517805528459_DP, -0.2878799327427159_DP, -0.2648450765834480_DP, -0.2415663195388837_DP, &
            -0.2180634734697120_DP, -0.1943570033249354_DP, -0.1704679723820105_DP, -0.1464179842905879_DP, &
            -0.1222291222015577_DP, -0.9792388528783233D-01, -0.7352512298567129D-01,-0.4905596730507789D-01, &
            -0.2453976357464916D-01,  0.000000000000000_DP /)
       a(129:255) = -a(127:1:-1)
       w(1:128) = (/ 0.4921560307232998D-05,  0.5807439097198490D-05, &
            0.6834464736551962D-05, 0.8021976333168366D-05, 0.9391479729414297D-05, 0.1096682538838511D-04, &
            0.1277439336593531D-04, 0.1484328487115382D-04, 0.1720551983262035D-04, 0.1989623980256602D-04, &
            0.2295391544676706D-04, 0.2642055778543214D-04, 0.3034193227027582D-04, 0.3476777470613483D-04, &
            0.3975200795256068D-04, 0.4535295827241165D-04, 0.5163357013109941D-04, 0.5866161819225363D-04, &
            0.6650991520349031D-04, 0.7525651441994497D-04, 0.8498490517338812D-04, 0.9578420016111737D-04, &
            0.1077493130013294D-03, 0.1209811245801770D-03, 0.1355866366999198D-03, 0.1516791115271980D-03, &
            0.1693781953350486D-03, 0.1888100250314509D-03, 0.2101073159703949D-03, 0.2334094295482540D-03, &
            0.2588624190980828D-03, 0.2866190526068620D-03, 0.3168388107952311D-03, 0.3496878591154798D-03, &
            0.3853389922411368D-03, 0.4239715496402142D-03, 0.4657713008437973D-03, 0.5109302990422449D-03, &
            0.5596467016628039D-03, 0.6121245566051857D-03, 0.6685735528359555D-03, 0.7292087340689185D-03, &
            0.7942501742877347D-03, 0.8639226138995281D-03, 0.9384550553452169D-03, 0.1018080317034812D-02, &
            0.1103034544525193D-02, 0.1193556677915233D-02, 0.1289887874500138D-02, 0.1392270885805003D-02, &
            0.1500949388208539D-02, 0.1616167266473494D-02, 0.1738167849622113D-02, 0.1867193098735087D-02, &
            0.2003482746412242D-02, 0.2147273387814999D-02, 0.2298797523415569D-02, 0.2458282553807834D-02, &
            0.2625949727191397D-02, 0.2802013040424356D-02, 0.2986678094853407D-02, 0.3180140908472528D-02, &
            0.3382586686334190D-02, 0.3594188551540296D-02, 0.3815106239573258D-02, 0.4045484759190482D-02, &
            0.4285453023596742D-02, 0.4535122456126910D-02, 0.4794585575214253D-02, 0.5063914563983784D-02, &
            0.5343159830393102D-02, 0.5632348564440302D-02, 0.5931483299565078D-02, 0.6240540485980043D-02, &
            0.6559469084277679D-02, 0.6888189188257894D-02, 0.7226590686503984D-02, 0.7574531972792687D-02, &
            0.7931838715948011D-02, 0.8298302700229281D-02, 0.8673680747771299D-02, 0.9057693734958360D-02, &
            0.9450025714903295D-02, 0.9850323158407353D-02, 0.1025819432588503D-01, 0.1067320878274018D-01, &
            0.1109489707056434D-01, 0.1152275054628725D-01, 0.1195622140103138D-01, 0.1239472286990171D-01, &
            0.1283762964327005D-01, 0.1328427848928526D-01, 0.1373396909635332D-01, 0.1418596514318194D-01, &
            0.1463949560267392D-01, 0.1509375628448443D-01, 0.1554791161943442D-01, 0.1600109668720402D-01, &
            0.1645241948682547D-01, 0.1690096344746955D-01, 0.1734579017488673D-01, 0.1778594242664566D-01, &
            0.1822044730702644D-01, 0.1864831967010019D-01, 0.1906856571718493D-01, 0.1948018677253986D-01, &
            0.1988218321887574D-01, 0.2027355857204934D-01, 0.2065332367220688D-01, 0.2102050096667823D-01, &
            0.2137412885813173D-01, 0.2171326608991022D-01, 0.2203699613911310D-01, 0.2234443158689293D-01, &
            0.2263471843462563D-01, 0.2290704033411088D-01, 0.2316062269978247D-01, 0.2339473667107199D-01, &
            0.2360870289358053D-01, 0.2380189508857838D-01, 0.2397374338156889D-01, 0.2412373736221462D-01, &
            0.2425142884981818D-01, 0.2435643434076045D-01, 0.2443843711680134D-01, 0.2449718899591576D-01, &
            0.2453251171033698D-01, 0.2454429789967597D-01 /)
       w(129:255) = w(127:1:-1)
    case default

       !! compute weights not saved as constants above
       N = 2**k-1
       r = (N-1)/2
       h = 4.0_DP/2**k
       allocate(u(2,N))
       
       u(1,1:N) = PIOV2*cosh(h*(/(i,i=-r,r)/))
       u(2,1:N) = PIOV2*sinh(h*(/(i,i=-r,r)/))

       a(1:N) = tanh(u(2,1:N))
       w(1:N) = u(1,1:N)/cosh(u(2,:))**2
       w(1:N) = 2.0_DP*w(1:N)/sum(w(1:N))

    end select
    
    ! map the -1<=x<=1 interval onto 0<=a<=s
    a = (a + 1.0_DP)*s/2.0_DP

  end subroutine tanh_sinh_setup


  !! ###################################################
  subroutine gauss_lobatto_setup(ord,abcissa,weight)
    implicit none
    integer, intent(in) :: ord
    real(DP), intent(out), dimension(ord-2) :: weight, abcissa

    ! leave out the endpoints (abcissa = +1 & -1), since 
    ! they will be the zeros of the Bessel functions
    ! (therefore, e.g., 5th order integration only uses 3 points)
    ! copied from output of Matlab routine by Greg von Winckel, at
    ! http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=4775

    select case(ord)
    case(5)
       abcissa(1:2) = (/-0.65465367070798_DP, 0.0_DP/)
       abcissa(3) = -abcissa(1)
       weight(1:2) =   (/0.54444444444444_DP, 0.71111111111111_DP/)
       weight(3) = weight(1)
    case(6)
       abcissa(1:2) = (/-0.76505532392946_DP, -0.28523151648065_DP/)
       abcissa(3:4) = -abcissa(2:1:-1)
       weight(1:2) = (/0.37847495629785_DP, 0.55485837703549_DP/)
       weight(3:4) = weight(2:1:-1)
    case(7)
       abcissa(1:3) = (/-0.83022389627857_DP, -0.46884879347071_DP, 0.0_DP/)
       abcissa(4:5) = -abcissa(2:1:-1)
       weight(1:3) = (/0.27682604736157_DP, 0.43174538120986_DP, &
            & 0.48761904761905_DP/)
       weight(4:5) = weight(2:1:-1)
    case(8)
       abcissa(1:3) = (/-0.87174014850961_DP, -0.59170018143314_DP, &
            & -0.20929921790248_DP/)
       abcissa(4:6) = -abcissa(3:1:-1)
       weight(1:3) = (/0.21070422714351_DP, 0.34112269248350_DP, &
            & 0.41245879465870_DP/)
       weight(4:6) = weight(3:1:-1)
    case(9)
       abcissa(1:4) = (/-0.89975799541146_DP, -0.67718627951074_DP, &
                      & -0.36311746382618_DP, 0.0_DP/)
       abcissa(5:7) = -abcissa(3:1:-1)
       weight(1:4) = (/0.16549536156081_DP, 0.27453871250016_DP, &
            & 0.34642851097305_DP, 0.37151927437642_DP/)
       weight(5:7) = weight(3:1:-1)
    case(10)
       abcissa(1:4) = (/-0.91953390816646_DP, -0.73877386510551_DP, &
            & -0.47792494981044_DP, -0.16527895766639_DP/)
       abcissa(5:8) = -abcissa(4:1:-1)
       weight(1:4) = (/0.13330599085107_DP, 0.22488934206313_DP, &
            & 0.29204268367968_DP, 0.32753976118390_DP/)
       weight(5:8) = weight(4:1:-1)
    case(11)
       abcissa(1:5) = (/-0.93400143040806_DP, -0.78448347366314_DP, &
            & -0.56523532699621_DP, -0.29575813558694_DP, 0.0_DP/)
       abcissa(6:9) = -abcissa(4:1:-1)
       weight(1:5) = (/0.10961227326699_DP, 0.18716988178031_DP, &
            & 0.24804810426403_DP, 0.28687912477901_DP, &
            & 0.30021759545569_DP/)
       weight(6:9) = weight(4:1:-1)
    case(12)
       abcissa(1:5) = (/-0.94489927222288_DP, -0.81927932164401_DP, &
            & -0.63287615303186_DP, -0.39953094096535_DP, &
            & -0.13655293285493_DP/)
       abcissa(6:10) = -abcissa(5:1:-1)
       weight(1:5) = (/0.09168451741320_DP, 0.15797470556437_DP, &
            & 0.21250841776102_DP, 0.25127560319920_DP, &
            & 0.27140524091070_DP/) 
       weight(6:10) = weight(5:1:-1)
    case default
       print *, 'unsupported Gauss-Lobatto integration order selected' 
       stop
    end select
  end subroutine gauss_lobatto_setup


  !! ###################################################
  !! wynn-epsilon acceleration of partial sums, given a series
  function wynn_epsilon(series) result(accsum)
    use constants, only : RONE, RZERO

    implicit none
    integer, parameter :: MINTERMS = 4
    real(DP), dimension(1:), intent(in) :: series
    real(DP) :: denom, accsum
    integer :: np, i, j, m
#ifdef DEBUG
    integer :: mm
#endif

    real(DP), dimension(1:size(series),-1:size(series)-1) :: ep

    np = size(series)

    ! first column is intiallized to zero
    ep(:,-1) = RZERO

    ! build up partial sums, but check for problems
    check: do i=1,np
       if((abs(series(i)) > huge(RZERO)).or.(series(i) /= series(i))) then
          ! +/- Infinity or NaN ? truncate series to avoid corrupting accelerated sum
          np = i-1
          if(np < MINTERMS) then
             write(*,'(A)',advance='no') 'not enough Wynn-Epsilon series to accelerate'
             accsum = 1.0/0.0  ! make it clear answer is bogus
             goto 777
          else
             write(*,'(A,I3,A)',advance='no') 'Wynn-Epsilon series&
                  &, truncated to ',np,' terms.'
             exit check
          end if
       else
          ! term is good, continue
          ep(i,0) = sum(series(1:i))
       end if
    end do check
    
    ! build up epsilon table (each column has one less entry)
    do j=0,np-2 
       do m = 1,np-(j+1)
          denom = ep(m+1,j) - ep(m,j)
          if(abs(denom) > tiny(1.0_DP)) then ! check for div by zero
             ep(m,j+1) = ep(m+1,j-1) + RONE/denom
          else
             accsum = ep(m+1,j)
             write(*,'(A,I2,1X,I2,A)',advance='no') 'epsilon cancel ',m,j,' '
#ifdef DEBUG
             write(*,*) 'ep'
             do mm=1,np
                write(*,'(ES11.3E3,1X)') ep(mm,:)
             end do
             write(*,*) 'accsum'
             write(*,*) accsum
#endif
             goto 777
          end if
       end do
    end do

    ! if made all the way through table use "corner value" of triangle as answer
    if(mod(np,2) == 0) then
       accsum = ep(2,np-2)  ! even number of terms - corner is acclerated value
    else
       accsum = ep(2,np-3)  ! odd numbers, use one column in from corner
    end if
777 continue

  end function wynn_epsilon

  ! modified from numerical recipes f90 (section 3.1)
  SUBROUTINE polint(xa,ya,x,y,dy)
    ! xa and ya are given x and y locations to fit an nth degree polynomial
    ! through.  results is a value y at given location x, with error estimate dy

    use constants, only : DP
    IMPLICIT NONE
    REAL(DP), DIMENSION(:), INTENT(IN) :: xa,ya
    REAL(DP), INTENT(IN) :: x
    REAL(DP), INTENT(OUT) :: y,dy
    INTEGER :: m,n,ns
    REAL(DP), DIMENSION(size(xa)) :: c,d,den,ho
    integer, dimension(1) :: tmp

    n=size(xa)
    c=ya
    d=ya
    ho=xa-x
    tmp = minloc(abs(x-xa))
    ns = tmp(1)
    y=ya(ns)
    ns=ns-1
    do m=1,n-1
       den(1:n-m)=ho(1:n-m)-ho(1+m:n)
       if (any(den(1:n-m) == 0.0)) then
          print *, 'polint: calculation failure'
          stop
       end if
       den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m)
       d(1:n-m)=ho(1+m:n)*den(1:n-m)
       c(1:n-m)=ho(1:n-m)*den(1:n-m)
       if (2*ns < n-m) then
          dy=c(ns+1)
       else
          dy=d(ns)
          ns=ns-1
       end if
       y=y+dy
    end do
  END SUBROUTINE polint

end program Driver