/* blackbox.inc
 * Daniel S. Roche, January 2011
 * See COPYING.txt for permissions.
 *
 * Classes for a uni-modular black box
 * See "Diversification improves interpolation", Giesbrecht & Roche, 2011
 *
 * These classes are designed to be used with NTL types, for instance
 * zz_p for Base and zz_pX for Poly.
 *
 * Include file (template implementations)
 */

#include <complex>
extern "C" {
#include <fftw3.h>
}

#include <cassert>
#include <vector>
#include <algorithm>
#include <NTL/ZZ.h>
#include "misc.h"

template <typename BB>
typename SpinBB<BB>::BaseT SpinBB<BB>::eval 
(const NTL::ZZ& num, const NTL::ZZ& denom) {
  // This could be more efficient with lcms, but in general
  // denom and wdenom will be distinct primes, so it will be
  // a wasted computation to find gcd(denom,wdenom).
  NTL::ZZ n(num);
  n *= wdenom;
  n += (wnum*denom);
  return bb.eval(n,(denom*wdenom));
}

// Have to declare template specializations of the class.
// This is necessary because FFTW (being a C library) is not templatized.
// The following are for single precision
template <typename BB> class ApproxModBB<float,BB> {
  private: BB& bb;
  public:
  ApproxModBB (BB& thebb) :bb(thebb) { }
  void eval (std::vector< std::complex<float> >&, long);
};

/* Evaluate the black box and use FFTW to construct the unknown
 * polynomial mod x^p - 1
 */
//template <typename T, typename BB>
//void ApproxModBB<T,BB>::eval 
//(typename ApproxModBB<T,BB>::DenseT& out, long p)
template <typename BB>
void ApproxModBB<float,BB>::eval 
(typename std::vector< std::complex<float> >& out, long p) 
{
  long i;
  std::complex<float>* evals = reinterpret_cast< std::complex<float>* > 
    (fftwf_malloc(sizeof(std::complex<float>) * p));
  
  // Sample the black box at all p'th roots of unity
  NTL::ZZ denom;
  conv(denom,p);
  for (i = 0; i < p; ++i) {
    evals[i] = bb.eval(NTL::to_ZZ(i), denom);
  }

  // Now do FFT to interpolate
  fftwf_plan plan = fftwf_plan_dft_1d
    (p, // size
     reinterpret_cast<fftwf_complex*>(evals), // input
     reinterpret_cast<fftwf_complex*>(evals), // output (in-place)
     FFTW_FORWARD, FFTW_ESTIMATE);
  fftwf_execute(plan);
  fftwf_destroy_plan(plan);

  // Finally, copy the results to the output and divide by p
  out.clear();
  for (i=0; i<p; ++i) out.push_back(evals[i]/((float)p));

  fftwf_free(reinterpret_cast<fftwf_complex*>(evals));
}

// Have to declare template specializations of the class.
// This is necessary because FFTW (being a C library) is not templatized.
// The following are for double precision
template <typename BB> class ApproxModBB<double,BB> {
  private: BB& bb;
  public:
  ApproxModBB (BB& thebb) :bb(thebb) { }
  void eval (std::vector< std::complex<double> >&, long);
};

/* Evaluate the black box and use FFTW to construct the unknown
 * polynomial mod x^p - 1
 */
//template <typename T, typename BB>
//void ApproxModBB<T,BB>::eval 
//(typename ApproxModBB<T,BB>::DenseT& out, long p)
template <typename BB>
void ApproxModBB<double,BB>::eval 
(typename std::vector< std::complex<double> >& out, long p) 
{
  long i;
  std::complex<double>* evals = reinterpret_cast< std::complex<double>* > 
    (fftw_malloc(sizeof(std::complex<double>) * p));
  
  // Sample the black box at all p'th roots of unity
  NTL::ZZ denom;
  conv(denom,p);
  for (i = 0; i < p; ++i) {
    evals[i] = bb.eval(NTL::to_ZZ(i), denom);
  }

  // Now do FFT to interpolate
  fftw_plan plan = fftw_plan_dft_1d
    (p, // size
     reinterpret_cast<fftw_complex*>(evals), // input
     reinterpret_cast<fftw_complex*>(evals), // output (in-place)
     FFTW_FORWARD, FFTW_ESTIMATE);
  fftw_execute(plan);
  fftw_destroy_plan(plan);

  // Finally, copy the results to the output and divide by p
  out.clear();
  for (i=0; i<p; ++i) out.push_back(evals[i]/((double)p));

  fftw_free(reinterpret_cast<fftw_complex*>(evals));
}

// Have to declare template specializations of the class.
// This is necessary because FFTW (being a C library) is not templatized.
// The following are for long double (Intel 80-bit mantissa) precision
template <typename BB> class ApproxModBB<long double,BB> {
  private: BB& bb;
  public:
  ApproxModBB (BB& thebb) :bb(thebb) { }
  void eval (std::vector< std::complex<long double> >&, long);
};

/* Evaluate the black box and use FFTW to construct the unknown
 * polynomial mod x^p - 1
 */
//template <typename T, typename BB>
//void ApproxModBB<T,BB>::eval 
//(typename ApproxModBB<T,BB>::DenseT& out, long p)
template <typename BB>
void ApproxModBB<long double,BB>::eval 
(typename std::vector< std::complex<long double> >& out, long p) 
{
  long i;
  std::complex<long double>* evals = 
    reinterpret_cast< std::complex<long double>* > 
    (fftwl_malloc(sizeof(std::complex<long double>) * p));
  
  // Sample the black box at all p'th roots of unity
  NTL::ZZ denom;
  conv(denom,p);
  for (i = 0; i < p; ++i) {
    evals[i] = bb.eval(NTL::to_ZZ(i), denom);
  }

  // Now do FFT to interpolate
  fftwl_plan plan = fftwl_plan_dft_1d
    (p, // size
     reinterpret_cast<fftwl_complex*>(evals), // input
     reinterpret_cast<fftwl_complex*>(evals), // output (in-place)
     FFTW_FORWARD, FFTW_ESTIMATE);
  fftwl_execute(plan);
  fftwl_destroy_plan(plan);

  // Finally, copy the results to the output and divide by p
  out.clear();
  for (i=0; i<p; ++i) out.push_back(evals[i]/((long double)p));

  fftwl_free(reinterpret_cast<fftwl_complex*>(evals));
}

// for sorting monomials in the following function
template <typename T>
inline static bool monsort (const std::complex<T>& a, const std::complex<T>& b)
{ return std::abs(a) > std::abs(b); }

/* Returns the "unknown" polynomial at exp(2*Pi*num/denom) */
template <typename T>
typename FakeApproxBB<T>::BaseT FakeApproxBB<T>::eval 
(const NTL::ZZ& num, const NTL::ZZ& denom) {
  std::vector< std::complex<T> > monomials;
  std::complex<T> temp;

  for (typename SparsePoly<T>::RepT::const_iterator fiter = f.rep.begin();
       fiter != f.rep.end(); ++fiter)
  {
    temp = ru<T> (num * fiter->second, denom);
    monomials.push_back (temp * fiter->first);
  }

  // Now sort the monomials by decreasing order of absolute value,
  // for numerically stable summing
  std::sort (monomials.begin(), monomials.end(), monsort<T>);

  typename std::vector< std::complex<T> >::const_iterator 
    miter = monomials.begin();
  temp = *miter;
  while (++miter != monomials.end()) temp += *miter;

  // Finally, add some random noise according to the internal value
  // of epsilon.
  RandomGen<FloatT> rg(0.0, ApproxBB<T>::epsilon * abs(temp) );

  return (temp + rg());
}
