/* Copyright (c) 2015  Gerald Knizia
 * 
 * This file is part of the IboView program (see: http://www.iboview.org)
 * 
 * IboView is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3.
 * 
 * IboView is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
 * 
 * Please see IboView documentation in README.txt for:
 * -- A list of included external software and their licenses. The included
 *    external software's copyright is not touched by this agreement.
 * -- Notes on re-distribution and contributions to/further development of
 *    the IboView software
 */

#include "Ir.h"
#include "IrAmrr.h"
#include "CxMemoryStack.h"
#include "CxAlgebra.h"
#include <algorithm> // for std::min.
#include <stdexcept> // for std::runtime_error

namespace ir {

void EvalSlmX_Deriv0(double *IR_RP Out, double x, double y, double z, unsigned L);


// For a given 3x3 unitary rotation matrix R[3][3], construct the matrix T[lc,l'c'] which makes
//    S[l,c](R r) = \sum_{c',l'} T[lc,l'c'] S[l',c'](r)
// for all 3-vectors 'r' (i.e., compute the transformation between solid harmonic components
// induced by rotations in real space).
//
// Notes:
//  - The matrix elements T[lc,l'c'] for l != l' vanish.
//  - T[lc,l'c'] is stored at pOut[iSlcX(l,c) + nSlmX(MaxL) * iSlcX(l',c')].
//    nStride is set to nSlmX(MaxL); pOut is allocated on Mem by this routine.
//  - Order of trafo not checked. If it doesn't work, use transpose(T) instead of T.
void EvalSlcXRotationTrafo(double *&pOut, size_t &nStride, unsigned MaxL, double const *R, ct::FMemoryStack &Mem)
{
   nStride = nSlmX(MaxL);
   Mem.Alloc(pOut, nStride*nStride);
   void
      *pFreeMe = Mem.Alloc(0);
   unsigned
      nSl = nSlmX(MaxL),
      nCa = nCartX(MaxL);
   double
      *pA, *pB;
   Mem.Alloc(pA, nSl * nCa);
   Mem.Alloc(pB, nSl * nCa);

   // while this could probably be done by employing some funky solid harmonic
   // transformation theorems, we here just set up an equation system such that
   //
   //     a_i = T * b_i,
   //
   // where a_i = Slc(R r_i) and b_i = Slc(r_i) for a sufficient basis
   // of vectors r_i (with Slc components in the rows).
   //
   // The equation system is then linearly solved via LAPACK.
   //
   // Such a set of vectors is given, for example, by the cartesian vectors
   //     r_i = (x_i,y_i,z_i)
   // with all x_i + y_i + z_i <= MaxL (for all l at once)
   // or x_i + y_i + z_i = MaxL (for individual l)

   // setup the matrices B = (b_i) and A = (a_i).
   unsigned
      iComp = 0;
   for (unsigned i = 0; i <= MaxL; ++i)
      for (unsigned j = 0; j <= i; ++j)
         for (unsigned k = 0; k <= j; ++k) {
            // make trial vector x and it's rotated counterpart R x.
            double
               r[3] = {double(i), double(j), double(k)},
               Rr[3] = {0., 0., 0.};
            for (unsigned a = 0; a < 3; ++ a)
               for (unsigned b = 0; b < 3; ++ b)
                  Rr[a] += R[a + 3*b] * r[b];
            // evaluate SlcX(x) and SlcX(R x).
            EvalSlmX_Deriv0(&pB[iComp * nSl], r[0], r[1], r[2], MaxL);
            EvalSlmX_Deriv0(&pA[iComp * nSl], Rr[0], Rr[1], Rr[2], MaxL);
            iComp += 1;
         }
   assert(iComp == nCa);

   // solve for T such that A = T B.
   // By transposing both sides we get this into standard form:
   //   A^T = B^T T,
   // where we solve for T. Due to LAPACK restrictions we do the
   // transposition explicitly here.
   double
      *pAt, *pBt, *pSig, *pWork;
   Mem.Alloc(pAt, nCa * nSl);
   Mem.Alloc(pBt, nCa * nSl);
   Mem.Alloc(pSig, nCa);
   for (unsigned iSl = 0; iSl < nSl; ++ iSl)
      for (unsigned iCa = 0; iCa < nCa; ++ iCa) {
         pAt[iCa + nCa*iSl] = pA[iSl + nSl*iCa];
         pBt[iCa + nCa*iSl] = pB[iSl + nSl*iCa];
      }

   // FIXME: do at least the SVDs individually per l. Results should be the same,
   // but this would make it faster.
   FORTINT
      nRank = -1,
      info = 0,
      // minimal work space required according to docs.
      nWork = 3*std::min(nCa,nSl) + std::max(std::max(2*std::min(nCa, nSl), std::max(nCa, nSl)), nSl);
   nWork += 2*nCa*nSl*nSl; // <- we don't really care...
   Mem.Alloc(pWork, nWork);
   DGELSS(nCa, nSl, nSl, pAt, nCa, pBt, nCa, pSig, 1e-10, &nRank, pWork, nWork, &info);
   if (info != 0)
      throw std::runtime_error("MakeRotatedSlmXTransform: dgelss failed.");
   if (nRank != nSl)
      throw std::runtime_error("MakeRotatedSlmXTransform: equation system solution went wrong.");

   // copy solution to output matrix.
   for (unsigned iSl = 0; iSl < nSl; ++ iSl)
      for (unsigned jSl = 0; jSl < nSl; ++ jSl)
         pOut[iSl + nStride * jSl] = pBt[iSl + nCa * jSl];


//    for (unsigned l = 0; l <= MaxL; ++ l) {
//       double
//          *IR_RP pT = &pOut[what?].
//       (use addressing as above.. just make svds separately)
//    }
   Mem.Free(pFreeMe);
}


} // namespace ir
