/* Copyright (c) 2015  Gerald Knizia
 * 
 * This file is part of the IboView program (see: http://www.iboview.org)
 * 
 * IboView is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3.
 * 
 * IboView is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with IboView (LICENSE). If not, see http://www.gnu.org/licenses/
 * 
 * Please see IboView documentation in README.txt for:
 * -- A list of included external software and their licenses. The included
 *    external software's copyright is not touched by this agreement.
 * -- Notes on re-distribution and contributions to/further development of
 *    the IboView software
 */

#include <iostream>
#include <cmath>
#include <stdlib.h> // for atoi

#include "CxDefs.h"
#include "CxTypes.h"
#include "CxOpenMpProxy.h"

// #include "CxXyzFrame.h"
#include "CtDftGrid.h"

#ifndef NO_RANDOM_GRIDS
#include "CxRandom.h"
#endif // ALLOW_RANDOM_GRIDS

#ifndef NO_AIGG_GRIDS
   // use weigthed angular grids generated by the Knizia group's Angular Integration Grid Generator (AIGG)
   // These come with tetrahedra, octahedral, and icosahedral symmetry.
   #include "CxAngularGrid.h"
#else
   // use standard Lebedev-Laikov-grids of octahedral symmetry
   #include "CxLebedevGrid.h"
#endif // NO_AIGG_GRIDS

#include "CxPhysicalUnits.h"
#include "CtAtomDensity.h"
#include "CxAtomData.h"
#include "CxTiming.h"
#ifndef NO_LINALG_ROUTINES
   #include "CtMatrix.h" // for a 3x3 Diagonalize() in AtomGrid_MakeAlignmentTrafo()...
#endif // NO_LINALG_ROUTINES


#include "CtDftGrid_Radial.h"
#include "CtDftGrid_QuadCriteria.h"

#include "CtVoronoiPartition.h"

using ct_free_atom_density_data::EvalFreeAtomDensity;
using ct_free_atom_density_data::EvalFreeAtomNumElec;


// #define USE_ADAPTIVE_INTEGRATION

namespace mig {
using ct::FMemoryStack;
using ct::GetVdwRadius_IsoDensity;

static bool
//    // these are just my interpretations of the spirit of those... not necessarily exactly what http://doi.org/10.1063/1.5049435 specifies.
   s_OchsenfeldSpacePartition = true;

static double GetEtaGridCenter(int iElement) { return GetGridCenter_TaEta(iElement); }


static int GetNextNobleGas(int iElement) {
   if (iElement <= 2) return 0;
   if (iElement <= 10) return 2;
   if (iElement <= 18) return 10;
   if (iElement <= 36) return 18;
   if (iElement <= 54) return 36;
   if (iElement <= 86) return 54;
   return 86;
}


// separated from FDftGrid in order to decouple grid generation from grid usage.
struct FDftGridGenerator
{
   FDftGridGenerator(FDftGrid &Grid_, ct::FRawAtomList const &Atoms_,
                     FDftGridParams const &Params_, ct::FLog *pLog_)
   : Atoms(Atoms_), Params(Params_), Grid(Grid_), Points(Grid_.Points ), GridBlocks(Grid_.GridBlocks), m_TargetAccuracy(Params_.fTargetAccuracy()), m_pLog(pLog_), m_iFreeAtomFitType(0)
   {
   }

   void Create();

protected:
   typedef FDftGrid::FPoint
      FPoint;
   typedef FDftGrid::FPointList
      FPointList;
   typedef FDftGrid::FGridBlock
      FGridBlock;
   typedef FDftGrid::FGridBlockList
      FGridBlockList;

   ct::FRawAtomList const
      &Atoms;
   FDftGridParams const
      &Params;
   FDftGrid
      &Grid;
   FPointList
      &Points;
   FGridBlockList
      &GridBlocks;
   double
      m_TargetAccuracy; // <-- FIXME: remove that? should be part of Params.
   ct::FLog
      *m_pLog;
//    FRandomNumberGenerator
//       *m_pRng; // used when random grid orientations are selected.
   FVoronoiPartitionPtr
      m_pVoronoiPartition;
   FRadialGridBuilderPtr
      m_pRadialGridBuilder;
   int
      m_iFreeAtomFitType;


   // get voronoi weight of vGridPos regarding the set of ALL atoms.
   double GetAtomWeight(FVector3 const &vGridPos, unsigned iAtom, FMemoryStack &Mem);
   double GetAtomWeight(FVector3 const &vGridPos, unsigned iAtom, size_t *pAtOrd, size_t nAt, FMemoryStack &Mem);
   double GetAtomWeight(FVector3 const &vGridPos, unsigned iAtom, double *pDistAg, size_t *pAtOrd, size_t nAt, FMemoryStack &Mem);

   // evaluate (3 x nCenters) x nGridPt matrix of weight derivatives
   // [d/dAx,d/dAy,d/dAz] (A=0..nCenter-1s) pGridWt[iGridPt]
   void EvalWeightGradient(FVector3 *pWtGrad, size_t nCenters, FPoint *pGridPt, size_t nGridPt, FMemoryStack &Mem);

   static FScalar GetAtomPeriodRowFraction( uint ElementNumber );
//    void GetAtomGridParams( size_t &nRadialPt, double &AtomicScale, uint &iAngGrid, unsigned iAtom );
   void GetAtomRadialGrid( double *r, double *w, uint n, double AtomicScale );
   void AddAtomGrid( FPointList &AtPoints, unsigned iAtom, FMemoryStack &Mem);
   void BlockifyGridR( FGridBlockList &Blocks, uint iFirst, FPoint *pFirst, FPoint *pLast );

   void MakeVoronoiInfo(FMemoryStack &Mem);
   FScalar GetPairVoronoiR(double Mu, size_t iAtom, size_t iOtherAtom);

   FAtomRadialGridDecl MakeAtomRadialGridDecl(size_t iAtom) const;
   FAtomSpec MakeAtomSpec(size_t iAtom) const;
   void MakeFixedRadialGrid(double *&ri, double *&wi, size_t nPt, size_t iAtom, FAtomRadialGridDecl const *pDecl, FMemoryStack &Mem);
   void MakeQuadCriteriaForRadialGridSize(FQuadCriteria &Out, size_t nRadialPt, size_t iAtom, FAtomRadialGridDecl const *pDecl, FMemoryStack &Mem);
   inline FQuadCriteria MakeQuadCriteriaForRadialGridSize(size_t nRadialPt, size_t iAtom, FAtomRadialGridDecl const *pDecl, FMemoryStack &Mem);
   size_t FindNumberOfRadialPoints(size_t iAtom, FMemoryStack &Mem);

   void EvalFreeAtomDensitySum(double *&pDensity, double fAddToDensity, double *pGridPt_, size_t nStGridPt, size_t nGridPt, size_t *pNeighbors, size_t nNeighbors, FMemoryStack &Mem, int iAtCenterRestrict=-1); // also used for free-atom-density test.
#ifdef USE_ADAPTIVE_INTEGRATION
   double InvDistAt(size_t iAt, size_t jAt) const { return 1/Dist(Atoms[iAt].vPos, Atoms[jAt].vPos); }
   void FindVoronoiTestNeighbors(size_t *&pAtOrd, double *&pDistAg, size_t &nNeighbors, size_t iAtom, double fSphereRadius,  FMemoryStack &Mem);
   void TestAngularGrid(FQuadCriteria *pCrit, double fRad, double fRadWt, double fDensityI, size_t *pNeighbors, size_t nNeighbors, size_t iAtom, double const *p3x3, size_t iAngGrid, FMemoryStack &Mem);
   void SortAtomsByDistanceToPoint(double *&pDistAg, size_t *&pAtOrd, size_t &nAtOut, FVector3 const &vGridPos, FMemoryStack &Mem);
#endif // USE_ADAPTIVE_INTEGRATION
   void FindAngularGrids(aig::FAngularGridEntry const **pAngGrids, double *ri, double *wi, size_t nRadialPt, size_t iAtom, FMemoryStack &Mem);

   void MakeAtomAlignmentTrafo(double *&p3x3, size_t iAtom, FMemoryStack &Mem);

   char const *ElementName(int iAt) const { return ct::ElementNameFromNumber(Atoms[iAt].iElement); }
   std::string GetAtomLabel(int iAt) const { return fmt::format("{:>3} {:<2}", iAt+1, ct::ElementNameFromNumber(Atoms[iAt].iElement)); }
};



FQuadCriteria FDftGridGenerator::MakeQuadCriteriaForRadialGridSize(size_t nRadialPt, size_t iAtom, FAtomRadialGridDecl const *pDecl, FMemoryStack &Mem)
{
   FQuadCriteria
      r;
   MakeQuadCriteriaForRadialGridSize(r, nRadialPt, iAtom, pDecl, Mem);
   return r;
}



size_t GetAtomRow(int ElementNumber)
{
   // returns 1 for H, He; 2 for Li-Ne, 3 for ...
   static size_t
      RareGasAtomicNumbers[] = {0, 2, 10, 18, 36, 54, 86};
   static size_t const
      N = sizeof(RareGasAtomicNumbers)/sizeof(uint);
   for (size_t i = 1; i < N; ++ i){
      if (ElementNumber <= int(RareGasAtomicNumbers[i]))
         return i;
   }
   assert(0);
   return N;
}



void FDftGridGenerator::MakeQuadCriteriaForRadialGridSize(FQuadCriteria &Out, size_t nRadialPt, size_t iAtom, FAtomRadialGridDecl const *pDecl, FMemoryStack &Mem)
{
   ct::TMemoryLock<char>
      pFreeMe(0, &Mem);
   double
      *ri, *wi;
   MakeFixedRadialGrid(ri, wi, nRadialPt, iAtom, pDecl, Mem);
   Out.Eval(ri, wi, nRadialPt, 0., Atoms[iAtom].iElement, Mem);
}



FAtomSpec FDftGridGenerator::MakeAtomSpec(size_t iAtom) const
{
   ct::FRawAtom const
      &Atom = Atoms[iAtom];
   return FAtomSpec{iAtom, Atom.iElement, Atom.iTag, FAtomSpec::CORE_EcpForDef2};
   // ^-- TODO: should probably get info about the ECPs from the parameter set or something.
}


FAtomRadialGridDecl FDftGridGenerator::MakeAtomRadialGridDecl(size_t iAtom) const
{
   ct::FRawAtom const
      &Atom = Atoms[iAtom];
   FAtomRadialGridDecl const
      // first get global default...
      &DefaultDecl = Params.RadialGridDecl(),
      // ...then possible overrides for a specific atom/element/tag...
      &AtDecl = Params.RadialGridDecl(iAtom, Atom.iElement, Atom.iTag);
   // ...then make a copy of the default and merge with updates for the atom.
   FAtomRadialGridDecl
      out = DefaultDecl;
   out.Update(AtDecl);
   assert(out.SchemeId.IsAssigned());
   return out;
}


void FDftGridGenerator::MakeFixedRadialGrid(double *&ri, double *&wi, size_t nPt, size_t iAtom, FAtomRadialGridDecl const *pDecl, FMemoryStack &Mem)
{
   Mem.Alloc(ri, nPt);
   Mem.Alloc(wi, nPt);
   if (pDecl == 0) {
      FAtomRadialGridDecl
         LocalDecl = MakeAtomRadialGridDecl(iAtom);
      m_pRadialGridBuilder->MakeFixedGrid(&ri[0], &wi[0], nPt, MakeAtomSpec(iAtom), &LocalDecl);
   } else {
      m_pRadialGridBuilder->MakeFixedGrid(&ri[0], &wi[0], nPt, MakeAtomSpec(iAtom), pDecl);
   }
}


size_t FDftGridGenerator::FindNumberOfRadialPoints(size_t iAtom, FMemoryStack &Mem)
{
   // number of radial points explicitly set?
   int nRadialPt1_ = Params.nRadialPt(GetAtomRow(Atoms[iAtom].iElement));
   if (nRadialPt1_ > 0) {
      return size_t(nRadialPt1_);
   }
   FAtomRadialGridDecl const
      &Decl = MakeAtomRadialGridDecl(iAtom);
   if (Decl.nRadialPt != FAtomRadialGridDecl::DefaultSize && Decl.nRadialPt != FAtomRadialGridDecl::AdaptiveSize)
      // a concrete size has been specified on input.
      return Decl.nRadialPt;
   if (Decl.nRadialPt != FAtomRadialGridDecl::AdaptiveSize) {
      return m_pRadialGridBuilder->EstimateSizeForTargetAccuracy(m_TargetAccuracy, MakeAtomSpec(iAtom), &Decl);
   } else {
      size_t
         nRadialPt;
      // make reference values of target criteria --- simply compute them on a big grid.
      size_t
         nRadialPtRef = 200;
      FQuadCriteria
         Ref = MakeQuadCriteriaForRadialGridSize(nRadialPtRef, iAtom, &Decl, Mem);

      if (Params.iPrintLevel() >= 4)
         m_pLog->Write("        nRadialPt = {:4}  fRes = {:12.6e}   Vals: {:18.12f}  {:18.12f}  {:18.12f}  {:18.12f}  {:18.12f}  {:18.12f}", nRadialPtRef, 0., Ref.fValues[0], Ref.fValues[1], Ref.fValues[2], Ref.fValues[3], Ref.fValues[4], Ref.fValues[5] );
      // now start with a smaller grid and keep on increasing its size until we have reached
      // the target quadrature accuracy for integrating a free-atom density.
      nRadialPt = 10;
      size_t
         nLookAhead = 15;
      double
         fRes = 0.,
         fTargetRes = m_TargetAccuracy*1e-1;
      if (1) {
         double fAccuExp = 1.1;
//          double fAccuExp = 1.25;
         fTargetRes = std::pow(m_TargetAccuracy, fAccuExp);
      }
      for ( ; nRadialPt < nRadialPtRef; ++ nRadialPt) {
         FQuadCriteria
            Crit = MakeQuadCriteriaForRadialGridSize(nRadialPt, iAtom, &Decl, Mem);
         fRes = Crit.fResidual(Ref);

         if (Params.iPrintLevel() >= 4)
            m_pLog->Write("        nRadialPt = {:4}  fRes = {:12.6e}   Vals: {:18.12f}  {:18.12f}  {:18.12f}  {:18.12f}  {:18.12f}  {:18.12f}", nRadialPt, fRes, Crit.fValues[0], Crit.fValues[1], Crit.fValues[2], Crit.fValues[3], Crit.fValues[4], Crit.fValues[5] );

         if (fRes < fTargetRes) {
            // check some of the bigger grids, to see if they have bigger residuals.
            // to make sure that this is not a fortuitous positive result.
            for (size_t iNext = nRadialPt + 1; iNext < std::min(nRadialPtRef, size_t(nRadialPt+nLookAhead)); ++iNext) {
               double fResNext = MakeQuadCriteriaForRadialGridSize(iNext, iAtom, &Decl, Mem).fResidual(Ref);
               fRes = std::max(fResNext, fRes);
            }
         }

         // residual still below the threshold?
         if (fRes < fTargetRes)
            break;
      }
      if (Params.iPrintLevel() >= 2) {
         #pragma omp critical
         m_pLog->Write("   At: {:3}  {:2}   nRadialPt = {:4}  fRes = {:8.2e}", iAtom, ElementName(iAtom), nRadialPt, fRes);
      }
      return nRadialPt;
   }
}


void AtomGrid_AlignAngularGrid(double *pAngPts, size_t iSt, size_t nAngPts, double const *p3x3, FMemoryStack &Mem)
{
   if (p3x3 != 0) {
      for (size_t i = 0; i < nAngPts; ++ i) {
         double *p = &pAngPts[i * iSt];
         double p0t, p1t, p2t;
         p0t = p3x3[0 + 3*0]*p[0] + p3x3[1 + 3*0]*p[1] + p3x3[2 + 3*0]*p[2];
         p1t = p3x3[0 + 3*1]*p[0] + p3x3[1 + 3*1]*p[1] + p3x3[2 + 3*1]*p[2];
         p2t = p3x3[0 + 3*2]*p[0] + p3x3[1 + 3*2]*p[1] + p3x3[2 + 3*2]*p[2];
//          p0t = p3x3[0 + 3*0]*p[0] + p3x3[0 + 3*1]*p[1] + p3x3[0 + 3*2]*p[2];
//          p1t = p3x3[1 + 3*0]*p[0] + p3x3[1 + 3*1]*p[1] + p3x3[1 + 3*2]*p[2];
//          p2t = p3x3[2 + 3*0]*p[0] + p3x3[2 + 3*1]*p[1] + p3x3[2 + 3*2]*p[2];
         // ^- FIXME: is that right? isn't the matrix transpose of what it should be?!
         p[0] = p0t;
         p[1] = p1t;
         p[2] = p2t;
      }
   }
   (void)Mem; // suppress unused warning
}

// static void MakeAngularGridAtPoint(FVector3 *&pGridPt, double *&pGridWt, size_t &nAng, FVector3 const &vCenter, double const *p3x3, double fRadius, double fRadialWt, size_t iAngGrid, FMemoryStack &Mem)
// FIXME: is this not used? why not? it looks fine.
void AtomGrid_MakeAngularGridAtPoint(FVector3 *&pGridPt, double *&pGridWt, size_t &nAng, FVector3 const &vCenter, double const *p3x3, double fRadius, double fRadialWt, size_t iAngGrid, FMemoryStack &Mem)
{
   size_t GetNumAngularGrids();
   aig::FAngularGridEntry const
      *pGridInfo = aig::GetAngularGridInfo(iAngGrid);

   nAng = pGridInfo->nPoints;
   Mem.Alloc(pGridPt, nAng);
   Mem.Alloc(pGridWt, nAng);

   double
      (*pAngPts)[4];
   Mem.Alloc(pAngPts, nAng);
   pGridInfo->MakeGrid(&pAngPts[0]);
   AtomGrid_AlignAngularGrid(&pAngPts[0][0], 4, nAng, p3x3, Mem);

   for (size_t iAng = 0; iAng < nAng; ++ iAng) {
      double const *g = pAngPts[iAng];
      pGridPt[iAng] = fRadius * FVector3(g[0],g[1],g[2]) + vCenter;
      pGridWt[iAng] = fRadialWt * g[3];
   }

   Mem.Free(pAngPts);
}



#ifndef USE_ADAPTIVE_INTEGRATION


static aig::FAngularGridEntry const *FindAngularGridInfoForL(int TargetL, int MaxL) {
   size_t
      nAngularGrids = aig::GetNumAngularGrids();
   for (size_t iAngularGrid = 0; iAngularGrid != nAngularGrids; ++ iAngularGrid) {
      aig::FAngularGridEntry const
         *pAngGrid = aig::GetAngularGridInfo(iAngularGrid);

      int
         lGrid = int(pAngGrid->MaxL);
      if (MaxL != -1 && lGrid > MaxL) {
         // was there a maximum L specified, and this grid is larger than it?
         // If yes, return the last grid which was still smaller than MaxL.
         if (iAngularGrid > 0)
            return aig::GetAngularGridInfo(iAngularGrid - 1);
         else
            return pAngGrid;
      }
      // is the current grid large enough?
      if (lGrid >= TargetL)
         return pAngGrid;
   }
   // no l big enough was found. Return the biggest grid we have.
   return aig::GetAngularGridInfo(nAngularGrids-1);
}


void FDftGridGenerator::FindAngularGrids(aig::FAngularGridEntry const **pAngGrids, double *ri, double *wi, size_t nRadialPt, size_t iAtom, FMemoryStack &Mem)
{
   int
      iElement = Atoms[iAtom].iElement,
      iAtomRow = GetAtomRow(iElement);
   int
      iMinL = Params.iMinL(iAtomRow),
      iMaxL = Params.iMaxL(iAtomRow);
   double
      fCoreRadius = Params.GetCoreRadius(iElement);
//       iTargetL = std::max(GetTargetAngularGridSizeForAccu(iElement, m_TargetAccuracy), iMinL);

   if (1) {
      // these are/are based on the angular grid recommendations in the LKO paper.
      int iGridLevel = Params.iGridLevel();

      // grid parameter specs from J. Chem. Phys. 149, 204111 (2018) 10.1063/1.5049435 Tab III in the appendix.
      //
      //   Name   Base(nrad)  nAngPt(inner/medium/outer)   nGrid(Carbon)
      //    "g1"     35       14/50/110                    2586
      //    "g2"     40       26/74/194                    5056
      //    "g3"     50       38/110/302                   9564
      //    "g4"     55       50/194/434                   15526
      //    "g5"     60       50/194/590                   21330
      //    "g6"     70       86/302/974                   40838
      //    "g7"     80       110/434/1454                 68770
      //
      // I did not see any comments about different elements having different
      // angular grid sizes (in particular, not the hydrogens). That is curious,
      // but on second thought maybe actually makes sense. Should give a try.
      // (Also, I might have just missed it).
      // So.. I just do it here anyway now. At least for H and He.
      if (iElement == 1 || iElement == 2)
         iGridLevel -= 1; // the LKO paper actually does *not* call for this! But the original TA one does.
      //
      // The Ochsenfeld paper uses Lebedev-Laikov grids. I guess these would
      // correspond to this subset from CxLebedevGrid.cpp:
      //
      // ---- inner grids:
      //   {   5,   14, false},
      //   {   7,   26, false},
      //   {   9,   38, false},
      //   {  11,   50, false},
      //   {  11,   50, false},
      //   {  15,   86, false},
      //   {  17,  110, false}, // 116 points in molpro/cadpac?

      // ---- intermediate grids:
      //   {  11,   50, false},
      //   {  13,   74, true }, // note: has negative weights.  78 points in molpro/cadpac?
      //   {  17,  110, false}, // 116 points in molpro/cadpac?
      //   {  23,  194, false},
      //   {  23,  194, false},
      //   {  29,  302, false},
      //   {  35,  434, false},

      // ---- outer grids:
      //   {  17,  110, false}, // 116 points in molpro/cadpac?
      //   {  23,  194, false},
      //   {  29,  302, false},
      //   {  35,  434, false},
      //   {  41,  590, false},
      //   {  53,  974, false},
      //   {  65, 1454, false},

      static int const s_iTargetLs_InnerCore[] = {3,5,7,9,11,13,15,17};  // NOTE: I changed the second 11 to 13 (assuming they didn't have one of those)
      static int const s_iTargetLs_InnerAtom[] = {9,11,13,17,23,26,29,35};  // NOTE: I changed the second 23 to 26 (assuming they didn't have one of those)
      static int const s_iTargetLs_Outer[] = {15,17,23,29,35,41,53,65};
      size_t const nLevelTab = sizeof(s_iTargetLs_Outer)/sizeof(s_iTargetLs_Outer[0]); // 0,1,...,7
      // FIXME: deleted code said something about "comments in 10.1002/jcc.20063 for TAnew." regarding
      // angular grid sizes. Maybe check. Or just fit my own, but properly.
      if (iGridLevel < 0)
         iGridLevel = 0;
      int
         iTargetL_InnerCore(0), iTargetL_InnerAtom(0), iTargetL_Outer(0);
      if (size_t(iGridLevel) >= nLevelTab) {
         int d = iGridLevel - nLevelTab - 1; // number of levels beyond the maximum tabulated one. Should be zero for last tabulated entry.
         iTargetL_InnerCore = s_iTargetLs_InnerCore[nLevelTab-1] + d * 2;
         iTargetL_InnerAtom = s_iTargetLs_InnerAtom[nLevelTab-1] + d * 3;
         iTargetL_Outer = s_iTargetLs_Outer[nLevelTab-1] + d * 12;
      } else {
         iTargetL_InnerCore = s_iTargetLs_InnerCore[size_t(iGridLevel)];
         iTargetL_InnerAtom = s_iTargetLs_InnerAtom[size_t(iGridLevel)];
         iTargetL_Outer = s_iTargetLs_Outer[size_t(iGridLevel)];
      }
      if (0) {
         // going below those does not appear to be such a great idea.
         // at least not with the 1/3 // 1/2 thing with logE grids...
         iTargetL_InnerCore = std::max(iTargetL_InnerCore, 5);
         iTargetL_InnerAtom = std::max(iTargetL_InnerAtom, 11);
      }

      if (iTargetL_Outer < iMinL)
         iTargetL_Outer = iMinL;
      if (Params.HasFixedL(iAtomRow)) {
         assert(iMaxL == iMinL);
         iTargetL_Outer = iMinL;
      }

      iTargetL_InnerAtom = std::min(iTargetL_InnerAtom, iTargetL_Outer);
      iTargetL_InnerCore = std::min(iTargetL_InnerCore, iTargetL_InnerAtom);

      aig::FAngularGridEntry const
         *pTargetAngGrid = FindAngularGridInfoForL(iTargetL_Outer, iMaxL),
         *pAngGrid_InnerCore = FindAngularGridInfoForL(iTargetL_InnerCore, iMaxL),
         *pAngGrid_InnerAtom = FindAngularGridInfoForL(iTargetL_InnerAtom, iMaxL);
      if (fCoreRadius == 0.) {
         pAngGrid_InnerCore = pTargetAngGrid;
         pAngGrid_InnerAtom = pTargetAngGrid;
      }
      double
         RefGridCenterR = GetEtaGridCenter(iElement);
//       RefGridCenterR = GetGridCenter_rExpAvg2(iElement);
      // ^-- this makes a significant difference in LogE vs TA for hydogen. Because my radii
      //     differ quite a lot in this case. FIXME: should probably figure this thing out.
      //     Not quite sure what to do about it.
      size_t
         nAtomGridPt = 0; // total number of points (without screening etc.)
      for (size_t iShell = 0; iShell != nRadialPt; ++ iShell) {
         if (0) {
            if (iShell < nRadialPt/3)
               pAngGrids[iShell] = pAngGrid_InnerCore;
            else if (iShell < nRadialPt/2)
               pAngGrids[iShell] = pAngGrid_InnerAtom;
            else
               pAngGrids[iShell] = pTargetAngGrid;
         } else {
            // this should correspond rather closely to the nPoints-based
            // thresholds for the M4 alpha=0.6 mapping. However, this one is
            // less fragile with respect to the used radial integration scheme
            // an order of points and so on.
            if (ri[iShell] < RefGridCenterR/4)
               pAngGrids[iShell] = pAngGrid_InnerCore;
            else if (ri[iShell] <= RefGridCenterR)
               pAngGrids[iShell] = pAngGrid_InnerAtom;
            else
               pAngGrids[iShell] = pTargetAngGrid;
         }
         // WARNING: CoreRadius is not actually used...
         nAtomGridPt += pAngGrids[iShell]->nPoints;
      }
      if (Params.iPrintLevel() >= 1) {
         #pragma omp critical
         {
            m_pLog->Write("   GRID[{:6}]:  nGridPt = {:6}  |  nRadialPt = {:3}  |  TargetLs = {:2}/{:2}/{:2}   ActualLs = {:2}/{:2}/{:2}  nAngPts = {:4}/{:4}/{:4}  |  (CoreRadius ={:7.4f}  MaxL = {})", GetAtomLabel(iAtom), nAtomGridPt, nRadialPt,
            iTargetL_InnerCore,iTargetL_InnerAtom,iTargetL_Outer,
            pAngGrid_InnerCore->MaxL, pAngGrid_InnerAtom->MaxL, pTargetAngGrid->MaxL,
            pAngGrid_InnerCore->nPoints, pAngGrid_InnerAtom->nPoints, pTargetAngGrid->nPoints,
            fCoreRadius, iMaxL);
         }
      }
      return;
   }
   (void)wi; // suppress unused warning (these are used in the currently-disabled adaptive version of the code)
   (void)Mem; // suppress unused warning
}


#else

#endif // NO_ADAPTIVE_INTEGRATION


void AtomGrid_MakeAlignmentTrafo(double *p3x3, size_t iAtom, ct::FRawAtomList const &Atoms, FMemoryStack &Mem)
{
#ifdef  NO_LINALG_ROUTINES
   // no algebra routines... just return a 3x3 matrix
   // (TODO: hack up a 3x3 matrix diagonalize for this. We do not want an
   // LAPACK dependency for *this*...)
   p3x3[0+3*0] = 1; p3x3[0+3*1] = 0; p3x3[0+3*2] = 0;
   p3x3[1+3*0] = 0; p3x3[1+3*1] = 1; p3x3[1+3*2] = 0;
   p3x3[2+3*0] = 0; p3x3[2+3*1] = 0; p3x3[2+3*2] = 1;
#else // NO_LINALG_ROUTINES
   FVector3
      vCenter = Atoms[iAtom].vPos;
   ct::TMemoryLock<double>
      pIm(9, &Mem),
      pImEw(3, &Mem);
   ct::FMatrixView
      mIm(&pIm[0], 3, 3);
   double
      fWeightSum = 0.;
   mIm.Clear();

   // compute 'distance-weighted charge-inertial' tensor around the atom
   for (size_t jAt = 0; jAt < Atoms.size(); ++ jAt) {
      if (jAt == iAtom)
         continue;
      FVector3 const
         vAtPos = Atoms[jAt].vPos - vCenter;

      double
         rcovij = ct::GetCovalentRadius(Atoms[iAtom].iElement) + ct::GetCovalentRadius(Atoms[jAt].iElement);

      double
         Rsq = ct::Dot(vAtPos, vAtPos),
         RsqRel = Rsq/(rcovij*rcovij),
         fWeight = std::exp(-4./3.*std::sqrt(RsqRel)) * Atoms[jAt].iElement;

      for (size_t i = 0; i < 3; ++ i)
         for (size_t j = 0; j < 3; ++ j)
            pIm[i + 3*j] += fWeight * (-vAtPos[i]*vAtPos[j]);
//             pIm[i + 3*j] += fWeight * ((i==j? 1.:0.)*Rsq - vAtPos[i]*vAtPos[j]);
      fWeightSum += fWeight;
   }
   if (fWeightSum != 0.) {
      ct::Diagonalize(mIm, &pImEw[0], Mem);
   } else {
      mIm.SetIdentity();
   }
   for (size_t ij = 0; ij < 9; ++ ij)
      p3x3[ij] = pIm[ij];
#endif // NO_LINALG_ROUTINES
}


#ifndef NO_RANDOM_GRIDS
// make a vector of unit length and random orientation in space.
FVector3 MakeRandomUnitVector()
{
   FVector3
      v;
   for (;;) {
      v = FVector3(ct::g_SharedRng.GetStdNormal(), ct::g_SharedRng.GetStdNormal(), ct::g_SharedRng.GetStdNormal());
      double
         fLength = Length(v);
      if (fLength > 1e-10) {
         v /= fLength;
         break;
      }
   }
   return v;
}
#endif // NO_RANDOM_GRIDS


void FDftGridGenerator::MakeAtomAlignmentTrafo(double *&p3x3, size_t iAtom, FMemoryStack &Mem)
{
   p3x3 = 0;
   if (Params.AngularGridAlign() == FDftGridParams::ANGULARALIGN_AtomicEnvironment) {
      Mem.Alloc(p3x3, 9);
      AtomGrid_MakeAlignmentTrafo(p3x3, iAtom, this->Atoms, Mem);
   } else if (Params.AngularGridAlign() == FDftGridParams::ANGULARALIGN_Randomize) {
#ifdef NO_RANDOM_GRIDS
      throw std::runtime_error(fmt::format("FDftGridGenerator::MakeAtomAlignmentTrafo: was asked to generate a random angular grid alignment, but this version was compiled without random grid support (-DNO_RANDOM_GRIDS)."));
#else // NO_RANDOM_GRIDS
      FVector3
         vN, vU, vV; // normal, tangent, and cotangent.
      // we use a shared global RNG which is not thread safe -> put in critical section
      // UPDATE: wait... why do we do that?
      #pragma omp critical
      {
         // note: I will not guarantee that this construction will generate
         // a distribution of random orientations which is 100% equally spaced
         // in orientation space (whatever that means). I do not think that is
         // important for our current purposes, either, however.
         vN = MakeRandomUnitVector();
         for (;;) {
            vU = MakeRandomUnitVector();
            // if vN and vU are too colinear, make another vU.
//             if (Dot(vN, vU) < 1. - 1e-5)
            if (Dot(vN, vU) < 0.8)
               break;
         }
         // orthogonalize U against V and renormalize
         vU -= vN*Dot(vN, vU);
         vU.Normalize();
         // make a vector orthogonal to both N and U
         vV = Cross(vN, vU);
         // vV should already be normalized (cross product of two normalized
         // orthogonal vectors), but we do not want slight rounding artifacts to
         // confuse things.
         vV.Normalize();
         if (ct::g_SharedRng.GetUniformF(-1., 1.) < 0) {
            // flip the sign of vU to randomize the orientedness (right-handed
            // or left-handed) of the coordinate system. The 3x3 matrix align
            // transform can flip signs (FIXME: maybe put in an eigenvector
            // sign flip using the cross product check?)
            vV = -vV;
         }
      }
      // okay... we have a random coordinate system now. Put it into the
      // 3x3 matrix.
      Mem.Alloc(p3x3, 9);
      for (size_t i = 0; i < 3; ++ i) {
         p3x3[i + 3*0] = vU[i]; p3x3[i + 3*1] = vV[i]; p3x3[i + 3*2] = vN[i];
      }
#endif // NO_RANDOM_GRIDS
   } else if (Params.AngularGridAlign() == FDftGridParams::ANGULARALIGN_None) {
      // do nothing -- p3x3 == 0 is understood as "don't do it"
   } else {
      throw std::runtime_error(fmt::format("FDftGridGenerator::MakeAtomAlignmentTrafo: grid alignment mode {} not recognized", unsigned(Params.AngularGridAlign())));
   }
}







void FDftGridGenerator::AddAtomGrid(FPointList &AtPoints, unsigned iAtom, FMemoryStack &Mem)
{
   ct::TMemoryLock<char>
      pFreeMe(0, &Mem);
   ct::FRawAtom const
      &Atom = Atoms[iAtom];
   std::vector<aig::FAngularGridEntry const*>
      pAngGrids;

   int iPrint = Params.iPrintLevel();

   size_t
      nRadialPt = FindNumberOfRadialPoints(iAtom, Mem);
   double
      *ri, *wi; //< allocated on Mem by MakeFixedRadialGrid
   FAtomRadialGridDecl const &Decl = MakeAtomRadialGridDecl(iAtom);
   MakeFixedRadialGrid(ri, wi, nRadialPt, iAtom, &Decl, Mem);

   // assign radial grid for each radius and count total number of points.
   size_t
      nAngPtsMax = 0;
   double fAtomTotalWeight = 0;
   double *p3x3;
   MakeAtomAlignmentTrafo(p3x3, iAtom, Mem);

   pAngGrids.resize(nRadialPt);
   FindAngularGrids(&pAngGrids[0], ri, wi, nRadialPt, iAtom, Mem);

   for (size_t iShell = 0; iShell < nRadialPt; ++ iShell)
      nAngPtsMax = std::max(size_t(nAngPtsMax), size_t(pAngGrids[iShell]->nPoints));
   double
      (*pAngPts)[4] = reinterpret_cast<double (*)[4]>(::malloc(4 * sizeof(double) * nAngPtsMax));

   if (iPrint >= 2) {
      #pragma omp critical
      m_pLog->Write("   Radial Grid:   At = {}   nRadialPt = {:4}   Decl = '{}'", GetAtomLabel(iAtom), nRadialPt, Decl.Format());
      if (iPrint >= 4) {
         for (size_t i = 0; i < nRadialPt; ++ i)
            m_pLog->Write("   {:6}   r = {:<12.6g}   w = {:<12.6g}   L = {:<3}   nAngPt = {} ", i, ri[i], wi[i], pAngGrids[i]->MaxL, pAngGrids[i]->nPoints);
         m_pLog->WriteLine();
      }
   }

   aig::FAngularGridEntry const
      *pLastAngGrid = 0;
   size_t
      nPtsTotal = 0;
   for (size_t iShell = 0; iShell < nRadialPt; ++ iShell)
      nPtsTotal += pAngGrids[iShell]->nPoints;
   AtPoints.clear();
   AtPoints.reserve(nPtsTotal);

   for (size_t iShell = 0; iShell < nRadialPt; ++ iShell){
      double
         fRad = ri[iShell], // current radius
         fRadWeight = wi[iShell]; // weight for the radial integration.
      aig::FAngularGridEntry const
         *pAng = pAngGrids[iShell];
      size_t
         nAngPt = pAng->nPoints;

      // make angular grid if different from the one before.
      if (pAng != pLastAngGrid) {
         pAng->MakeGrid(pAngPts);
         AtomGrid_AlignAngularGrid(&pAngPts[0][0], 4, nAngPt, p3x3, Mem);
      }
      pLastAngGrid = pAng;
      double
         fShellTotalWeight = 0;

      // generate output points.
      for (size_t i = 0; i < nAngPt; ++ i){
         double (&p)[4] = pAngPts[i];
         fShellTotalWeight += p[3];
         FPoint out;
         out.vPos[0] = fRad * p[0] + Atom.vPos[0];
         out.vPos[1] = fRad * p[1] + Atom.vPos[1];
         out.vPos[2] = fRad * p[2] + Atom.vPos[2];
         out.fWeight = p[3] * fRadWeight * GetAtomWeight(out.vPos, iAtom, Mem);
         out.iAtomicCenter = ptrdiff_t(iAtom);
         fAtomTotalWeight += out.fWeight;
         if (std::abs(out.fWeight) > Params.fWeightCut())
// ^- is that a good idea? points might have large density, even if the weight is small
//    UPDATE: it is not entirely benign. It leads to energy errors in the ~1e-6 region.
//            however, it also leads to a large reduction in points in preg.xyz, and no
//            noticeable change in geometry convergence behavior.
            AtPoints.push_back(out);
      }
      assert_rt(std::abs(fShellTotalWeight-1.0)<1e-13);
   }
   ::free(pAngPts);
}


static bool TreatAsNeighborsFn_DftGrid_CovBondedOnly(double fDistAB, ct::FRawAtom const &AtA, double fRadA, ct::FRawAtom const &AtB, double fRadB)
{
   bool
      TreatAsNeighbors = true;
   double
      fCovRadiusA = ct::GetCovalentRadius(AtA.iElement),
      fCovRadiusB = ct::GetCovalentRadius(AtB.iElement);

//    if (fDistAB > fRadA + fRadB)
   // for atoms which are too far apart to be considered as covalently bonded,
   // place dividing plane at the center between them.
   if (fDistAB > 1.3 * (fCovRadiusA + fCovRadiusB))
      TreatAsNeighbors = false;
   return TreatAsNeighbors;
   (void)fRadA; // suppress unused warning
   (void)fRadB; // suppress unused warning
}


static bool TreatAsNeighborsFn_DftGrid_IfVdwSpheresOverlap(double fDistAB, ct::FRawAtom const &AtA, double fRadA, ct::FRawAtom const &AtB, double fRadB)
{
//    return fDistAB <= 2*(GetVdwRadius_IsoDensity(AtA.iElement) + GetVdwRadius_IsoDensity(AtB.iElement));
//    return fDistAB <= 2*(GetVdwRadius_IsoDensity(AtA.iElement) + GetVdwRadius_IsoDensity(AtB.iElement));
   return fDistAB <= 1*(GetVdwRadius_IsoDensity(AtA.iElement) + GetVdwRadius_IsoDensity(AtB.iElement));
   (void)fRadA; // suppress unused warning
   (void)fRadB; // suppress unused warning
}



void FDftGridGenerator::MakeVoronoiInfo(FMemoryStack &Mem)
{
   size_t
      nAt = Atoms.size();

   // and the atomic size adjustments for the voronoi cells, if requested.
   FVoronoiPartitionParams
      VoronoiParams(FVoronoiPartitionParams::SMOOTHFN_PolyStep_a10, FVoronoiPartitionParams::ATOMSIZE_None);
   VoronoiParams.m_ThrPairVoronoiWeightCut = std::min(Params.fWeightCut(), VoronoiParams.m_ThrPairVoronoiWeightCut);
   VoronoiParams.m_fWeightCut_AtomVdwRadiusFactor = Params.fWeightCut_AtomVdwRadiusFactor(); // as multiple of iso-den vdW radius.

   if (s_OchsenfeldSpacePartition) {
      VoronoiParams.m_AtomSizeAdjustType = FVoronoiPartitionParams::ATOMSIZE_None; // WARNING: atom size adjustments disabled here!
      VoronoiParams.m_CutPairVoronoiAtMaxAtRange = true;
      VoronoiParams.m_MaxAtRanges.resize(this->Atoms.size());
      for (size_t iAt = 0; iAt != this->Atoms.size(); ++ iAt) {
         double ri = 0;
//          ri = .75 * GetVdwRadius_IsoDensity(this->Atoms[iAt].iElement); // .75 gives about 5 abohr for 2*radius(C)
//          ri = 1. * GetVdwRadius_IsoDensity(this->Atoms[iAt].iElement); // .75 gives about 5 abohr for 2*radius(C)
         ri = 2.5; // <- that's what the Ochsenfeld paper (10.1063/1.5049435) recommends -- 5 bohr cutoff universal.
         VoronoiParams.m_MaxAtRanges[iAt] = ri;
      }
   }

   if (1) {
      m_pVoronoiPartition = new FVoronoiPartition(this->Atoms, VoronoiParams, 0, 0, 0);
   } else {
      ct::TArray<double>
         AtomicRadii(nAt);
      for (size_t iAt = 0; iAt != nAt; ++ iAt) {
         int iElement = Atoms[iAt].iElement;
   //       double fAtomRadius = GetVdwRadius_IsoDensity(iElement);
   //       double fAtomRadius = GetEtaGridCenter(iElement);
   //       double fAtomRadius = GetSlaterBraggRadius(iElement);
         double fAtomRadius = GetGridCenter_rExpAvg2(iElement);
   //       fAtomRadius = std::pow(fAtomRadius,1);
         fAtomRadius = std::pow(fAtomRadius,0.5);
   //       fAtomRadius = std::pow(fAtomRadius,2);
   //       fAtomRadius= 1;
         AtomicRadii[iAt] = fAtomRadius;
      }
      FVoronoiChiData
         ChiData;
//       VoronoiParams.m_AtomSizeAdjustType = FVoronoiPartitionParams::ATOMSIZE_OriginalBecke;
      VoronoiParams.m_AtomSizeAdjustType = FVoronoiPartitionParams::ATOMSIZE_TaModOfBecke;
      FVoronoiChiData::FTreatAsNeighborsFn
         pTreatAsNeighborsFn = 0; // == 0 -> treat every atom pair as neighbors for size adjustment purposes.
      pTreatAsNeighborsFn = TreatAsNeighborsFn_DftGrid_CovBondedOnly;
      ChiData.MakeFromAtomRadii(this->Atoms, &AtomicRadii[0], -1.0, pTreatAsNeighborsFn, VoronoiParams.m_AtomSizeAdjustType);
      m_pVoronoiPartition = new FVoronoiPartition(this->Atoms, VoronoiParams, &ChiData.m_ChiAB[0], ChiData.m_iRowSt, ChiData.m_iColSt);
   }

   (void)TreatAsNeighborsFn_DftGrid_IfVdwSpheresOverlap; // suppress unused warning
   (void)GetNextNobleGas; // suppress unused warning
   (void)Mem; // suppress unused warning
}



// get voronoi weight of vGridPos regarding the set of ALL atoms.
double FDftGridGenerator::GetAtomWeight(FVector3 const &vGridPos, unsigned iAtom, FMemoryStack &Mem) {
   return m_pVoronoiPartition->GetAtomWeight(vGridPos, iAtom, Mem);
}

double FDftGridGenerator::GetAtomWeight(FVector3 const &vGridPos, unsigned iAtom, size_t *pAtOrd, size_t nAt, FMemoryStack &Mem) {
   return m_pVoronoiPartition->GetAtomWeight(vGridPos, iAtom, pAtOrd, nAt, Mem);
}

// double FDftGridGenerator::GetAtomWeight(FVector3 const &vGridPos, unsigned iAtom, double *pDistAg, size_t *pAtOrd, size_t nAt, FMemoryStack &Mem) {
//    return m_pVoronoiPartition->GetAtomWeight(vGridPos, iAtom, pAtOrd, nAt, Mem);
// }
// ^-- should forward to the corresponding version in m_pVoronoiPartition, too.

void FDftGridGenerator::EvalFreeAtomDensitySum(double *&pDensity, double fAddToDensity, double *pGridPt_, size_t nStGridPt, size_t nGridPt,  size_t *pNeighbors, size_t nNeighbors, FMemoryStack &Mem, int iAtCenterRestrict)
{
   Mem.Alloc(pDensity, nGridPt);
   for (size_t iPt = 0; iPt < nGridPt; ++ iPt)
      pDensity[iPt] = fAddToDensity;

   if (pNeighbors == 0) {
      nNeighbors = Atoms.size();
      Mem.Alloc(pNeighbors, nNeighbors);
      for (size_t i = 0; i < nNeighbors; ++ i)
         pNeighbors[i] = i;
   }

   for (size_t iNeighbor = 0; iNeighbor < nNeighbors; ++ iNeighbor) {
      size_t
         jAt = pNeighbors[iNeighbor];
      if (iAtCenterRestrict != -1) {
         // only sum up densities of atoms which are close enough to be considered
         // covalently bonded. This is mainly a performance question---computing atomic
         // densities from all over the place is quite expensive.
         size_t iAt = iAtCenterRestrict;
         double rCovij =  1./ct::ToAng * .5 * (ct::GetCovalentRadius(Atoms[iAt].iElement) + ct::GetCovalentRadius(Atoms[jAt].iElement));
//          if (Dist(Atoms[iAt].vPos, Atoms[jAt].vPos) > 1.3 * rCovij)
         if (Dist(Atoms[iAt].vPos, Atoms[jAt].vPos) > 2.0 * rCovij)
//          if (Dist(Atoms[iAt].vPos, Atoms[jAt].vPos) > 1.5 * rCovij)
            continue;
      }

      // compute distance of radial grid point to current neighbor atom jAt
      ct::TMemoryLock<double>
         pRho(nGridPt, &Mem),
         rPts(nGridPt, &Mem);
      FVector3
         vAtj = Atoms[jAt].vPos;
      for (size_t iPt = 0; iPt < nGridPt; ++ iPt) {
         FVector3 const &vGridPt = *reinterpret_cast<FVector3 const*>(pGridPt_ + iPt * nStGridPt);
         rPts[iPt] = Dist(vAtj, vGridPt);
      }
      EvalFreeAtomDensity(pRho.p, rPts.p, nGridPt, Atoms[jAt].iElement, m_iFreeAtomFitType);
      for (size_t iPt = 0; iPt < nGridPt; ++ iPt)
         pDensity[iPt] += pRho[iPt];
   }
}





struct FPointSortPred {
   bool operator () (FDftGrid::FPoint const &a, FDftGrid::FPoint const &b) const {
      return b.fWeight < a.fWeight;
   }
};



void FDftGridGenerator::Create()
{
   ct::FTimer // FIXME: remove this (unnecessary dependency)
      tDftGrid;
   size_t
      nAt = Atoms.size();
   ct::FMemoryStack2
      Mem(omp_get_max_threads() * (2000000 + sizeof(double) * nAt*nAt*2));
   ct::TMemoryLock<char>
      pFreeMe(0, &Mem);
   // precompute some stuff required for voronoi cells.
   // that stuff goes onto Mem.
   MakeVoronoiInfo(Mem);
   m_pRadialGridBuilder = new(FRadialGridBuilder);

   {
      ct::FMemoryStackArray MemStacks(Mem);
      #pragma omp parallel for schedule(dynamic)
      for (int iAtom = 0; size_t(iAtom) < Atoms.size(); ++ iAtom) {
         FMemoryStack &Mem_ = MemStacks.GetStackOfThread();
         Mem_.Align(32);
         FPointList
            AtPoints;
         AddAtomGrid(AtPoints, iAtom, Mem_);
         #pragma omp critical
         {
            Points.insert(Points.end(), AtPoints.begin(), AtPoints.end());
         }
      }
   }
   if (0) {
      // sort from large weights to small weights? Would increase accuracy of data accumulation
      // (large weight = outer atom = far away = low density)
      std::sort(Points.begin(), Points.end(), FPointSortPred());

      // ^- makes things really slow because it breaks the spatial alignment of the points.
      // creates issues for screeening in DFTI, I guess.
   }

   BlockifyGridR(GridBlocks, 0, 0, 0);


   m_pLog->Write(" Generated DFT grid with {} points for {} atoms in {:.2f} sec. ThrGrid: {:8.2e}.", Points.size(), Atoms.size(), (double)tDftGrid, Params.fTargetAccuracy());
   bool EvalAccuracyForFreeAtoms = Params.EvalFreeAtomGridAccuracy();
   // ^- In this particular program (migrid) we do not need that, because we
   // will be evaluating exactly the same quantity again.
   if (EvalAccuracyForFreeAtoms) {
      // this does the free atom density test for accuracy: see what we get when
      // we evaluate the density of a superposition of spherical free atom
      // densities on the just-generated grid.
//       FMemoryStack2
//          Mem(2000000 + sizeof(double) * Points.size());
      ct::FTimer
         tDummy;
      assert(sizeof(FPoint) % sizeof(double) == 0);
      double
         fElec = 0.;
      ct::FMemoryStackArray MemStacks(Mem);
      #pragma omp parallel for schedule(dynamic)
      for (int iGridBlock_ = 0; iGridBlock_ < int(GridBlocks.size()); ++ iGridBlock_) {
         size_t iGridBlock = size_t(iGridBlock_);
         FMemoryStack &Mem_ = MemStacks.GetStackOfThread();
         Mem_.Align(32);
         double
            *pDensity;
         FGridBlock const
            &Block = GridBlocks[iGridBlock];
         double
            fElecBlock = 0.;
         EvalFreeAtomDensitySum(pDensity, 0., &Points[Block.iFirst].vPos[0], sizeof(FPoint)/sizeof(double), Block.nPt(), 0, 0, Mem_);
         for (size_t iPt = Block.iFirst; iPt < Block.iLast; ++ iPt)
            fElecBlock += Points[iPt].fWeight * pDensity[iPt - Block.iFirst];
         #pragma omp critical
         fElec += fElecBlock;
         Mem_.Free(pDensity);
      }

      // compute number of electrons we are supposed to be having in a charge-
      // neutral reference density. In particular, this is the number of
      // electrons expected for our free-atom reference densities (which are
      // configured with these default ECPs.).
      double
         fElecFreeAt = 0;
      for (size_t iAt = 0; iAt != Atoms.size(); ++ iAt)
         fElecFreeAt += EvalFreeAtomNumElec(Atoms[iAt].iElement, m_iFreeAtomFitType);

      m_pLog->Write(" DFT grid expected accuracy: {:8.2e}.", fElec - fElecFreeAt);
      m_pLog->Write(" (this took {:.4f} sec to evaluate).", (double)tDummy);
      m_pLog->Write(" WARNING: this version has no free-density renormalization! Port back!!");
   }
   m_pLog->WriteLine();

}

void FDftGridGenerator::BlockifyGridR(FGridBlockList &Blocks, uint /*iFirst*/, FPoint * /*pFirst*/, FPoint * /*pLast*/)
{
   // recursively sub-divide points in [pFirst,pLast) at some axis (in a kd-tree fashion)
   // such that
   //   (a) the spatial extend of the grid points in the two sub-ranges is minimized
   //   (b) a more-or-less uniform number of total points per block is formed.

   // atm: don't do anything, just make fixed size blocks without any
   // regard to reasonableness.
   size_t
      iPt = 0,
      nTargetPtsPerBlock = 64;
      // ^- hm... 64 leads to much better OpenMP scaling for high processor numbers in preg/tzvp (near perfect--.18 with 20 cores, 3.1 with 1).
      //    I guess with this size everything fits into L1? Proper sorting of grid might also much help in this
      //    regard by reducing the nMap dimension.
//       nTargetPtsPerBlock = 128;
//    ptrdiff_t
//       iLastAtomicCenter = -1;
   while (iPt < Points.size()) {
//       size_t
//          iPtEnd = std::min(iPt + nTargetPtsPerBlock, (size_t)Points.size());
//       while (Points[iPtEnd-1].iAtomicCenter != Points[iPt]
      size_t
         iPtEnd = iPt + 1;
      while (iPtEnd - iPt < nTargetPtsPerBlock && iPtEnd < Points.size() && Points[iPtEnd].iAtomicCenter == Points[iPt].iAtomicCenter)
         iPtEnd += 1;
      Blocks.push_back( FGridBlock() );
      FGridBlock
         &Block = Blocks.back();
      Block.iFirst = iPt;
      Block.iLast = iPtEnd;
      Block.vCenter = FVector3(0,0,0);
      Block.fLargestWeight = 0;
      Block.iAtomicCenter = Points[iPt].iAtomicCenter;
      for ( size_t i = iPt; i < iPtEnd; ++ i )
         Block.vCenter += (1.0/(iPtEnd-iPt)) * Points[i].vPos;
      Block.fRadius = 0;
      for ( size_t i = iPt; i < iPtEnd; ++ i ) {
         double fDist1 = (Points[i].vPos - Block.vCenter).LengthSq();
         Block.fRadius = std::max( Block.fRadius, fDist1 );
         Block.fLargestWeight = std::max(Block.fLargestWeight, Points[i].fWeight);
      }
      Block.fRadius = std::sqrt(Block.fRadius);

      iPt = iPtEnd;
   }
}



FDftGrid::FDftGrid(ct::FRawAtomList const &Atoms, FDftGridParams const &Params, ct::FLog *pLog)
{
   ct::FLogStdStream
      xLog(ct::xout);
   if (pLog == 0)
      pLog = &xLog;

   FDftGridGenerator(*this, Atoms, Params, pLog).Create();
   MakeAdditionalRepresentations();
}


FDftGrid::~FDftGrid()
{
}


void FDftGrid::MakeAdditionalRepresentations()
{
   Positions.resize(Points.size());
   Weights.resize(Points.size());
   for ( uint i = 0; i < Points.size(); ++ i ){
      Positions[i][0] = Points[i].vPos[0];
      Positions[i][1] = Points[i].vPos[1];
      Positions[i][2] = Points[i].vPos[2];
      Weights[i] = Points[i].fWeight;
   }
}



} // namespace mig
