/* Copyright (c) 2015  Gerald Knizia
 * 
 * This file is part of the IboView program (see: http://www.iboview.org)
 * 
 * IboView is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3.
 * 
 * IboView is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
 * 
 * Please see IboView documentation in README.txt for:
 * -- A list of included external software and their licenses. The included
 *    external software's copyright is not touched by this agreement.
 * -- Notes on re-distribution and contributions to/further development of
 *    the IboView software
 */

#include <iostream>
#include <boost/format.hpp>
#include <cmath>

#include "CxDefs.h"
#include "CxTypes.h"

#include "CtAtomSet.h"
#include "CtDftGrid_ivb.h"
#include "CxLebedevGrid.h"
#include "CtConstants.h"
#include "CtTiming.h"

using boost::format;

namespace ct {


   // main references:
   //   [1] JCP 102 346 (1995)  (Treutler & Ahlrichs)
   //   [2] JCP 108 3226 (1998)  (Krack & Koester)
   //   [3] JCP 88 2547 (1988)  (Becke)
   //   [4] JCP 104 9848 (1996) (Mura & Knowles)
   //   [5] JCP 101 8894 (1994) (Delley)
   //   [6] CPL 257 213 (1996) (Stratmann, Scuseria, Frisch). This is what Scheffler recommend for partitioning.
   //       (Stratmann, Scuseria, Frisch - Achieving linear scaling in exchange-correlation density functional quadratures.pdf)
   //   [7] See also "Ab initio molecular simulations with numeric atom-centered orbitals" by Scheffler et al,
   //       who give some notes on their grids (mostly referring back to Delly)


   // maybe better:
   //   JCP 121 681 (2004) (Koester, Flores-Moreno, Reveles)

   // Further refs which migt be useful:
   // - Izsak and Neese describe in http://dx.doi.org/10.1063/1.3646921
   //   how they set up grid for COSX computations (Section IV A). Might be worth investigating.
   // - NWChem's description:
   //   http://www.nwchem-sw.org/index.php/Density_Functional_Theory_for_Molecules#GRID_--_Numerical_Integration_of_the_XC_Potential
   // - Kakhiani, Tsereteli, Tsereteli -- A program to generate a basis set adaptive radial quadrature grid for density functional theory
   //   10.1016/j.cpc.2008.10.004
   // - Lindh, Malmqvist, Gagliardi - Molecular integrals by numerical quadrature. I. Radial integration.pdf
   //   (2001), Theor Chem Acc (2001) 106:17 10.1007/s002140100263
   // - Weber, Daul, Baltensperger - Radial numerical integrations based on the sinc function.pdf
   //   10.1016/j.cpc.2004.08.008
   // - El-Sherbiny, Poirier - An evaluation of the radial part of numerical integration commonly used in DFT.pdf
   // - The Becke Fuzzy Cells Integration Scheme in the Amsterdam Density Functional Program Suite
   //   http://onlinelibrary.wiley.com/doi/10.1002/jcc.23323/full
   //   - Note: This also has a set of TA Eta scaling parameters for the entire periodic table


double FDftGridParams::fTargetAccuracy() const {
   // would return 1e-5 for level 3 and 1e-6 for level 5
   return 1e-3 * std::pow(.1, double(this->nLevel+1)/2.);
}

void FDftGridParams::SetTargetAccuracy(double fAcc) {
//    fAcc = 1e-3 * std::pow(.1, double(this->nLevel+1)/2.);
//    1e3 * fAcc = std::pow(.1, double(this->nLevel+1)/2.);
//    std::log(1e3 * fAcc)/std::log(.1) = double(this->nLevel+1)/2.
//    2 * std::log(1e3 * fAcc)/std::log(.1) = double(this->nLevel+1)
//    2 * std::log(1e3 * fAcc)/std::log(.1) - 1 = this->nLevel
   nLevel = 2. * (std::log(1e3 * fAcc)/std::log(.1)) - 1.;
   assert_rt(std::abs(fAcc - fTargetAccuracy()) < 1e-10);
}

// separated from FDftGrid in order to decouple grid generation from grid usage.
struct FDftGridGenerator
{
   typedef FDftGrid::FPoint
      FPoint;
   typedef FDftGrid::FPointList
      FPointList;
   typedef FDftGrid::FGridBlock
      FGridBlock;
   typedef FDftGrid::FGridBlockList
      FGridBlockList;

   FAtomSet const
      &Atoms;
   FDftGridParams const
      &Params;
   FDftGrid
      &Grid;
   FPointList
      &Points;
   FGridBlockList
      &GridBlocks;


   FDftGridGenerator( FDftGrid &Grid_, FAtomSet const &Atoms_,
            FDftGridParams const &Params_ )
      : Atoms(Atoms_), Params(Params_), Grid(Grid_), Points(Grid_.Points ), GridBlocks(Grid_.GridBlocks)
   {}

   void Create();

   FScalar GetAtomWeight( FVector3 const &vPos, uint iAtom, double const *pInvDistAt, FMemoryStack &Mem );
   static FScalar GetAtomPeriodRowFraction( uint ElementNumber );
   void GetAtomGridParams( uint &nRadialPt, double &AtomicScale, uint &iAngGrid, uint iAtom );
   void GetAtomRadialGrid( double *r, double *w, uint n, double AtomicScale );
   void AddAtomGrid( FPointList &Points, uint iAtom, double const *pInvDistAt, FMemoryStack &Mem);

   FScalar GetPairVoronoiR(double Mu, size_t iAtom, size_t iOtherAtom);

   void BlockifyGridR( FGridBlockList &Blocks, uint iFirst, FPoint *pFirst, FPoint *pLast );
};


// // JCP 41 3199 (1964). In Angstrom (of the time, strictly)
// static FScalar SlaterBraggAtomicRadii[] = {
// 	0.35, // modified recommendation by Becke in [3]
// 	0,
// 	1.45, 1.05, 0.85, 0.70, 0.65, 0.60, 0.50, // Li--F
// 	0,
// 	1.8, 1.5, 1.25, 1.1, 1.0, 1.0, 1.0,
// 	0,
// 	2.2, 1.8, 1.6, 1.4, 1.35, 1.4, 1.4, 1.4, 1.35, 1.35, 1.35, 1.35, 1.30, 1.25, 1.15, 1.15, 1.15, // K--Br
// 	0,
// 	2.35, 2.0, 1.8, 1.55, 1.45, 1.35, 1.35, 1.30, 1.35, 1.40, 1.60, 1.55, 1.55, 1.45, 1.45, 1.40, 1.40, // Rb-I
// 	2.6, 2.15, 1.95, 1.85, 1.85, 1.85, 1.85, 1.85, 1.85, 1.8, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.55, 1.45, 1.35, 1.35, 1.30, 1.35, 1.35, 1.35, 1.5, 1.9, 1.6, 1.9, // Cs--Po
// 	0.0, 0.0, 0.0, // At, Rn, Fr
// 	2.15,1.95,1.8,1.8,1.75,1.75,1.75,1.75 // Ra-Am
// };

// from wikipedia... : http://en.wikipedia.org/wiki/Covalent_radius
// Looks better than Slaters values, imo. Original citation:
//    Beatriz Cordero, Verónica Gómez, Ana E. Platero-Prats, Marc Revés, Jorge Echeverría, Eduard Cremades, Flavia Barragán and Santiago Alvarez. Covalent radii revisited. Dalton Trans., 2008, 2832-2838, doi:10.1039/b801115j
// Multiple entries for a atom (e.g., C,sp..sp3) have been averaged.
static FScalar CovalentRadii[] = { // H-Cm.
   0.31, 0.28, 1.28, 0.96, 0.84, 0.73, 0.71, 0.66, 0.57, 0.58, 1.66, 1.41,
   1.21, 1.11, 1.07, 1.05, 1.02, 1.06, 2.03, 1.76, 1.70, 1.60, 1.53, 1.39,
   1.50, 1.42, 1.38, 1.24, 1.32, 1.22, 1.22, 1.20, 1.19, 1.20, 1.20, 1.16,
   2.20, 1.95, 1.90, 1.75, 1.64, 1.54, 1.47, 1.46, 1.42, 1.39, 1.45, 1.44,
   1.42, 1.39, 1.39, 1.38, 1.39, 1.40, 2.44, 2.15, 2.07, 2.04, 2.03, 2.01,
   1.99, 1.98, 1.98, 1.96, 1.94, 1.92, 1.92, 1.89, 1.90, 1.87, 1.87, 1.75,
   1.70, 1.62, 1.51, 1.44, 1.41, 1.36, 1.36, 1.32, 1.45, 1.46, 1.48, 1.40,
   1.50, 1.50, 2.60, 2.21, 2.15, 2.06, 2.00, 1.96, 1.90, 1.87, 1.80, 1.69
};

FScalar FDftGridGenerator::GetPairVoronoiR(double Mu, size_t iAtom, size_t iOtherAtom)
{
   // [1], eq. (4)--(8)
   double g = Mu;
   if ( Params.AdjustByAtomicRadii && false ){
#ifdef _DEBUG
         uint const
            nRadii = sizeof(CovalentRadii)/sizeof(FScalar);
         assert( Atoms[iAtom].AtomicNumber < nRadii );
         assert( Atoms[iOtherAtom].AtomicNumber < nRadii );
#endif
         FScalar
            R1 = CovalentRadii[Atoms[iAtom].AtomicNumber - 1],
            R2 = CovalentRadii[Atoms[iOtherAtom].AtomicNumber - 1],
            X = 1.0;
         if ( R1 != 0.0 && R2 != 0.0 )
            // ^- data might not be there if Slater values are used.
            X = std::sqrt( R1/R2 ); // [1], eq. (13)
         FScalar
            u = (X - 1.0)/(X + 1.0),
            a = u/(u*u - 1.0);
         if ( a < -0.5 ) a = -0.5; // [3], eq. (A3-6)ff
         if ( a > 0.5 ) a = 0.5;

         g = g + a * (1.0 - g*g); // [3], eq. (A2)
   }

   if (0) {
      // Original Becke scheme.
      // [1], eq. (7)  (k=3)
      g = ( (3./2.)*g - .5*g*g*g );
      g = ( (3./2.)*g - .5*g*g*g );
      g = ( (3./2.)*g - .5*g*g*g );
   } else {
      // Stratmann's modified Becke scheme [6]
      double a = 0.64; // comment after eq. 14
      // [6], eq.11
      if (g <= -a)
         g = -1.;
      else if (g >= a)
         g = +1;
      else {
         // [6], eq.14
         double ma = g/a; // mu/a
         g = (1/16.)*(ma*(35 + ma*ma*(-35 + ma*ma*(21 - 5 *ma*ma))));
      }
   }

   double s = .5 * ( 1 - g );
   return s;
}

void ArgSort1(size_t *pOrd, double const *pVals, size_t nValSt, size_t nVals, bool Reverse);

// this function subdivides the entire space volume into atomic contributions.
// Returns weight of atom iAtom at point vPos.
// The weight returned by this function is /not normalized/ [to the molecule,
// only to the atom].
FScalar FDftGridGenerator::GetAtomWeight( FVector3 const &vGridPos, uint iAtom, double const *pInvDistAt, FMemoryStack &Mem )
{
   // compute distance from grid point to all other atoms.
   size_t
      nAt = Atoms.size();
   double
      *pDistAg;
   Mem.Alloc(pDistAg, nAt);
   for (size_t iAt = 0; iAt < nAt; ++ iAt)
      pDistAg[iAt] = Dist(vGridPos, Atoms[iAt].vPos);

   // sort atoms by distance from grid point. close ones first.
   size_t
      *pAtOrd;
   Mem.Alloc(pAtOrd, nAt);
   ArgSort1(pAtOrd, pDistAg, 1, nAt, false);

   FScalar
      wOut = 0.,
      wTotal = 0.;

   // [1], eq. (4)--(8)
   for ( size_t iCenterAtom = 0; iCenterAtom != Atoms.size(); ++ iCenterAtom) {
      double wCen = 1.;

      for ( size_t iOtherAtom_ = 0; iOtherAtom_ != Atoms.size(); ++ iOtherAtom_ ) {
         size_t
            iOtherAtom = pAtOrd[iOtherAtom_];

         if ( iCenterAtom == iOtherAtom )
            continue;
         FScalar
            InvR12 = pInvDistAt[iCenterAtom * nAt + iOtherAtom],
            r1 = pDistAg[iCenterAtom],
            r2 = pDistAg[iOtherAtom],
            // ``confocal elliptical coordinates''. A great word. [3], eq.(9)
            Mu = (r1-r2)*InvR12;
//             Mu = (r1-r2)/R12;

         wCen *= GetPairVoronoiR(Mu, iCenterAtom, iOtherAtom);
         if (wCen < 1e-12) {
            wCen = 0;
            break;
         }
      }
      if (iCenterAtom == iAtom)
         wOut = wCen;
      wTotal += wCen;
   }

   Mem.Free(pDistAg);
   if (wTotal == 0.)
      return 0.;
   return wOut/wTotal;
}


// 10.1002/jcc.23323, supp info Tab. 1 (replacement for [1], Tab 1.)
static FScalar EtaRescalingParameters[] = {
 0.8, 0.9, 1.8, 1.4, 1.3, 1.1, 0.9, 0.9, 0.9, 0.9, 1.4, 1.3, 1.3, 1.2, 1.1, 1.0, 1.0, 1.0, 1.5, 1.4, 1.3, 1.2, 1.2, 1.2, 1.2, 1.2, // H - Fe
 1.2, 1.1, 1.1, 1.1, 1.1, 1.0, 0.9, 0.9, 0.9, 0.9, 1.4, 1.4, 1.1, 1.3, 1.0, 1.2, 0.9, 0.9, 0.9, 1.0, 0.9, 1.0, 1.0, 1.3, 1.2, 1.2, // Co-Te
 0.9, 1.0, 1.7, 1.5, 1.5, 1.3, 1.3, 1.4, 1.8, 1.4, 1.2, 1.3, 1.3, 1.4, 1.1, 1.1, 1.2, 1.6, 1.4, 1.3, 1.2, 1.0, 1.0, 0.9, 1.3, 1.2, // I - Pt
 1.2, 1.0, 1.2, 1.2, 1.1, 1.2, 1.1, 2.1, 2.2, 1.8, 1.7, 1.3, 1.4, 1.2, 1.2, 1.3, 1.4, 1.4, 1.7, 1.9, 1.9, 2.0, 2.0, 1.6, 2.0 // Au-Lw
};


FScalar FDftGridGenerator::GetAtomPeriodRowFraction( uint ElementNumber )
{
   static uint
      RareGasAtomicNumbers[] = { 0, 2, 10, 18, 36, 54, 86 };
   static uint const
      N = sizeof(RareGasAtomicNumbers)/sizeof(uint);
   if ( ElementNumber == RareGasAtomicNumbers[N-1] )
      return 1.0;
   if ( ElementNumber > RareGasAtomicNumbers[N-1] )
      return 0.5;
   for ( uint i = 1; i < N; ++ i ){
      if ( RareGasAtomicNumbers[i] >= ElementNumber ){
         uint
            nFirst = RareGasAtomicNumbers[i-1],
            nLast = RareGasAtomicNumbers[i];
         return static_cast<FScalar>(ElementNumber - nFirst)/(nLast - nFirst);
      }
   }
   assert_rt(0);
   return 1.0;
}

uint GetAtomPeriod(uint ElementNumber)
{
   // returns 1 for H, He; 2 for Li-Ne, 3 for ...
   static uint
      RareGasAtomicNumbers[] = { 0, 2, 10, 18, 36, 54, 86 };
   static uint const
      N = sizeof(RareGasAtomicNumbers)/sizeof(uint);
   for ( uint i = 1; i < N; ++ i ){
      if ( ElementNumber <= RareGasAtomicNumbers[i] )
         return i;
   }
   assert(0);
   return N;
}


void FDftGridGenerator::GetAtomGridParams(uint &nRadialPt, double &AtomicScale, uint &iAngGrid, uint iAtom)
{
   FAtom const
      &Atom = Atoms.Atoms[iAtom];
   if ( 1 ) {
      if ( Atom.AtomicNumber <= sizeof(EtaRescalingParameters)/sizeof(FScalar) )
         AtomicScale = EtaRescalingParameters[Atom.AtomicNumber - 1];
      else {
         // lerp with 1.5 for alkali metals to 0.9 for rare gases.
         FScalar
               f = GetAtomPeriodRowFraction(Atom.AtomicNumber);
         f *= f;
         AtomicScale = f*1.5 + (1-f)*0.9;
      }
   } else
//       AtomicScale = std::sqrt(CovalentRadii[Atom.AtomicNumber-1]);
      AtomicScale = CovalentRadii[Atom.AtomicNumber-1] * (1/ToAng);

   {
//       if (GetAtomPeriod(Atom.AtomicNumber) != 0)
//          iL += 1; // use larger angular grid.
      uint DefaultAngularGridLs[] = {9,11,17,23,29,35,47,59,71,89};
      uint iRow = GetAtomPeriod(Atom.AtomicNumber);
//       std::cout << format(" %s is row %i.\n") % Atom.ElementName() % iRow;
      uint iOffs = Params.nLevel-1 + iRow-1;
//       std::cout << format(" %s is row %i. grid offs: %i\n") % Atom.ElementName() % iRow % iOffs;
      uint iMaxL = DefaultAngularGridLs[iOffs];
      for (iAngGrid = 0; iAngGrid < nAngularGrids; ++ iAngGrid) {
         if (AngularGridInfo[iAngGrid].MaxL == iMaxL)
            break;
      }

      // [5], text after 12c.
      //       def nf(sp,iat): return int(sp*14.*(iat+2)**(1./3.))
      // >>> [nf(1.,iat) for iat in [1,10,18,36]]
      // [20, 32, 38, 47]
      // TA grid 1 has 20, 25, 30, 35.
      // TA grid 3 has 30, 35, 40, 45.
      // >>> [nf(0.75,iat) for iat in [1,10,18,36]]
      // [15, 24, 28, 35]
      // >>> [nf(0.75+2*.25,iat) for iat in [1,10,18,36]]
      // [25, 40, 47, 58]
      // >>> [nf(0.75+2*.2,iat) for iat in [1,10,18,36]]
      // [23, 36, 43, 54]

      nRadialPt = (int)((0.75 + ((signed)Params.nLevel-1)*0.2) * 14. * std::pow((double)Atom.AtomicNumber+2., 1./3.));
      // ^- should produce more or less comparable radial sizes to TA.
   }
//    double
//       EpsTol = std::pow(0.1, 3 + Params.nLevel); // target energy tolerance.
//    n = std::max(20u, (uint)(-5. * (3. * std::log(EpsTol) - (double)iRow + 6))); // JCP 121 681 eq. 20
//    _xout0("EpsTol = " << EpsTol << " n = " << n);
}


/* this one is Delley's grid [5]/[7]. */
void FDftGridGenerator::GetAtomRadialGrid( double *r, double *w, uint n, double AtomicScale )
{
   FScalar
      r_outer = 12.0 * AtomicScale, // given in bohr? sounds a bit small.
      den = 1/FScalar(1+n),
      RFac = r_outer/std::log(1 - sqr(n*den));

   for ( uint i = 1; i <= n; ++ i ){ // <- beware of indices starting at 1!
      FScalar
         xi,
         // derivative d[ri]/d[xi] (for weight)
         dri;
      xi = RFac * std::log(1-sqr(i*den)); // note: is 0 for i = 0 and +inf for n+1.
      dri = RFac/((1-sqr(i*den))) * (-2.0*i*sqr(den));  // d/d[s] xi

      r[i-1] = xi;
      w[i-1] = dri * 4.*M_PI*r[i-1]*r[i-1]; // and that is the radial volume element.
      // ^- this seems to work fine, but should there not be a scale for the number of
      //    integration points in dri? Or is this implicit in transforming directly from
      //    int[i=0...n] to xi?
   }
}

void FDftGridGenerator::AddAtomGrid( FPointList &Points, uint iAtom, double const *pInvDistAt, FMemoryStack &Mem )
{
   FAtom const
      &Atom = Atoms.Atoms[iAtom];
   std::vector<double>
      ri, wi;
   std::vector<uint>
      nAngPts;
   double
      AtomicScale;
   uint
      nr,
      iBaseAngGrid;

   GetAtomGridParams(nr, AtomicScale, iBaseAngGrid, iAtom);
//    _xout0("iAtom " << iAtom << " nr: " << nr << " iBaseL: " << iBaseL );
   ri.resize(nr);
   wi.resize(nr);
   GetAtomRadialGrid( &ri[0], &wi[0], nr, AtomicScale );
//    _xout0("radial grid:");
//    for ( uint i = 0; i < nr; ++ i )
//       _xout0( boost::format("r[%2i] = %10.5f  (w[i]=%10.5f)") % i % ri[i] % wi[i] );

   // assign radial grid for each radius and count total number of points.
   uint
      nPts = 0,
      nMaxLMax = 0,
      nAngPtsMax = 0;
   double fAtomTotalWeight = 0;
   for ( uint iShell = 1; iShell <= nr; ++ iShell ){
      // BEWARE of iShell starting at 1!
      uint iAngGrid = iBaseAngGrid;
      // reduce grid size for inner shells ([1], eq. 37 and 38)
      // (a proper adaptive integration would surely be a better way of dealing with this...)
      if ( iShell <= nr/3 ) {
         if (iAngGrid) --iAngGrid;
         if (iAngGrid) --iAngGrid;
         if (iAngGrid) --iAngGrid;
         if (iAngGrid) --iAngGrid;
      } else if ( iShell <= nr/2 ) {
         if (iAngGrid) --iAngGrid;
         if (iAngGrid) --iAngGrid;
      }

      nMaxLMax = std::max(nMaxLMax, uint(AngularGridInfo[iAngGrid].MaxL));
      nAngPts.push_back(AngularGridInfo[iAngGrid].nPoints);
      nPts += nAngPts.back();
//       _xout0( iShell << " of " << nr << "  iL = " << iL << " grd-sz = " << nAngPts.back() );
      nAngPtsMax = std::max(nAngPtsMax, nAngPts.back());
   };
//    std::cout << format("   Atom %3i (%2s):  nr = %3i  MaxL = %2i  (nAng = %s)") % (1+iAtom) % Atom.ElementName() % nr % nMaxLMax % nAngPtsMax << std::endl;

   double
      (*pAngPts)[4] = reinterpret_cast<double (*)[4]>(::malloc( 4 * sizeof(FScalar) * nAngPtsMax ));
   uint
      nLastAng = 0xffffffff;

   for ( uint iShell = 0; iShell < nr; ++ iShell ){
      double
         fRad = ri[iShell], // current radius
         fRadWeight = wi[iShell]; // weight for the radial integration.
      uint
         nAng = nAngPts[iShell]; // current number of angular points

      // make lebedev grid if different from the one before.
      if ( nAng != nLastAng )
         nAng = MakeAngularGrid(pAngPts, nAng);
      nLastAng = nAng;

      double
         fShellTotalWeight = 0;

      // generate output points.
      for ( uint i = 0; i < nAng; ++ i ){
         double (&p)[4] = pAngPts[i];
         fShellTotalWeight += p[3];
         FPoint out;
         out.vPos[0] = fRad * p[0] + Atom.vPos[0];
         out.vPos[1] = fRad * p[1] + Atom.vPos[1];
         out.vPos[2] = fRad * p[2] + Atom.vPos[2];
         out.fWeight = p[3] * fRadWeight * GetAtomWeight( out.vPos, iAtom, pInvDistAt, Mem );
//          _xout0( boost::format("vPos=(%10.4f %10.4f %10.4f)  w(1) = %12.6f  w(2) = %12.6f   sum = %12.6f")
//             % out.vPos[0] % out.vPos[1] % out.vPos[2] % GetAtomWeight(out.vPos, 0) %
//             GetAtomWeight(out.vPos, 1) % (GetAtomWeight(out.vPos, 0)+GetAtomWeight(out.vPos, 1))
//          );
         fAtomTotalWeight += out.fWeight;
         if ( std::abs(out.fWeight) > 1e-12 )
            Points.push_back(out);
      };
      assert_rt(std::abs(fShellTotalWeight-1.0)<1e-13);
   }
   ::free(pAngPts);
//    _xout0("Atom " << iAtom << " total weight: " << fAtomTotalWeight);
}


void FDftGridGenerator::Create()
{
   size_t
      nAt = Atoms.size();
   FMemoryStack2
      Mem(2000000 + sizeof(double) * nAt*nAt);

   // compute inverse distance between all atoms.
   double
      *pInvDistAt;
   Mem.Alloc(pInvDistAt, nAt*nAt);
   for (size_t iAt = 0; iAt < nAt; ++ iAt)
      for (size_t jAt = 0; jAt < nAt; ++ jAt) {
         double rij = Dist(Atoms[iAt].vPos, Atoms[jAt].vPos);
         if ( rij != 0 )
            rij = 1./rij;
         else
            rij = 0.;
         pInvDistAt[nAt*iAt + jAt] = rij;
         pInvDistAt[nAt*jAt + iAt] = rij;
      }


   double
      fTotalWeight = 0;
   for ( uint iAtom = 0; iAtom < Atoms.size(); ++ iAtom ) {
      uint
         iOff = Points.size();
      AddAtomGrid(Points, iAtom, pInvDistAt, Mem);
      for ( uint i = iOff; i < Points.size(); ++ i )
         fTotalWeight += Points[i].fWeight;
//       _xout0(boost::format("Atom #%i:  Grid points %i--%i") % iAtom % iOff % Points.size());
   }
   Mem.Free(pInvDistAt);
   BlockifyGridR( GridBlocks, 0, 0, 0 );
//    _xout0(format("Generated integration grid with %i points") % Points.size());
//    _xout0("Grid integrated volume^(1/3): " << fmt::ff(std::pow(fTotalWeight,1./3.),12,4));
}

void FDftGridGenerator::BlockifyGridR( FGridBlockList &Blocks, uint /*iFirst*/, FPoint */*pFirst*/, FPoint */*pLast*/ )
{
   // recursively sub-divide points in [pFirst,pLast) at some axis (in a kd-tree fashion)
   // such that
   //   (a) the spatial extend of the grid points in the two sub-ranges is minimized
   //   (b) a more-or-less uniform number of total points per block is formed.

   // atm: don't do anything, just make fixed size blocks without any
   // regard to reasonableness.
   uint
      iPt = 0,
      nTargetPtsPerBlock = 128;
   while ( iPt < Points.size() ){
      uint
         iPtEnd = std::min(iPt + nTargetPtsPerBlock, (uint)Points.size());
      Blocks.push_back( FGridBlock() );
      FGridBlock
         &Block = Blocks.back();
      Block.iFirst = iPt;
      Block.iLast = iPtEnd;
      Block.vCenter = FVector3(0,0,0);
      Block.fLargestWeight = 0;
      for ( uint i = iPt; i < iPtEnd; ++ i )
         Block.vCenter += (1.0/(iPtEnd-iPt)) * Points[i].vPos;
      Block.fRadius = 0;
      for ( uint i = iPt; i < iPtEnd; ++ i ) {
         double fDist1 = (Points[i].vPos - Block.vCenter).LengthSq();
         Block.fRadius = std::max( Block.fRadius, fDist1 );
         Block.fLargestWeight = std::max(Block.fLargestWeight, Points[i].fWeight);
      }
      Block.fRadius = std::sqrt(Block.fRadius);

      iPt = iPtEnd;
   }
}



FDftGrid::FDftGrid( FAtomSet const &Atoms, FDftGridParams const &Params, ct::FLog *pLog )
{
   FLogStdStream
      xLog(xout);
   if (pLog == 0)
      pLog = &xLog;

   FTimer tDftGrid;
   FDftGridGenerator(*this, Atoms, Params).Create();
   MakeAdditionalRepresentations();
   pLog->Write(" Generated DFT grid with {} points for {} atoms in {:.2} sec.\n", Points.size(), Atoms.size(), (double)tDftGrid);
}


FDftGrid::~FDftGrid()
{
}


void FDftGrid::MakeAdditionalRepresentations()
{
   Positions.resize(Points.size());
   Weights.resize(Points.size());
   for ( uint i = 0; i < Points.size(); ++ i ){
      Positions[i][0] = Points[i].vPos[0];
      Positions[i][1] = Points[i].vPos[1];
      Positions[i][2] = Points[i].vPos[2];
      Weights[i] = Points[i].fWeight;
   }
}



} // namespace ct
