/*
   Takashi Goda's model EVPPI problem for our paper
   'Decision-making under uncertainty: using
    MLMC for efficient estimation of EVPPI'
    Statistics and Computing, 29(4):739-751, 2019.
*/

//#include "mlmc_test.cpp"     // master MLMC file
#include "mlmc_test_100.cpp" // master file for 100 tests
#include "mlmc_rng.cpp"      // new file with RNG functions

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

//
// declaration of low-level routine
//

void nested_l(int, int, double *);

//
// main code
//

int main(int argc, char **argv) {
  
  int N    = 200000; // samples for convergence tests
  int L    = 10;     // levels for convergence tests 

  int N0   = 1000;   // initial samples on each level
  int Lmin = 2;      // minimum refinement level
  int Lmax = 20;     // maximum refinement level
 
  float Eps[] = { 0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.0 };

  FILE *fp;

#ifdef _OPENMP
  double wtime = omp_get_wtime();
#endif

//
// main MLMC calculation
// 

  // initialise generator, with separate storage for each
  // thread when compiled for OpenMP
#pragma omp parallel
  rng_initialisation();

  fp = fopen("nested.txt","w");
  mlmc_test(nested_l, N,L, N0,Eps, Lmin,Lmax, fp);
  fclose(fp);
  
  // print out time taken, if using OpenMP
#ifdef _OPENMP
  printf(" execution time = %f s\n",omp_get_wtime() - wtime);
  wtime = omp_get_wtime();
#endif

  // delete generator and any associated storage
#pragma omp parallel
  rng_termination();

//
// now do 100 MLMC calcs in parallel
//

  // initialise generator and storage for each thread
#pragma omp parallel
  rng_initialisation();

  float val = (sqrt(2.0)-1.0)/sqrt(2.0*3.141592653589793);

  fp = fopen("nested_100.txt","w");
  mlmc_test_100(nested_l, val, N0,Eps,Lmin,Lmax, fp);
  fclose(fp);

  // print out time taken, if using OpenMP
#ifdef _OPENMP
  printf(" execution time = %f s\n",omp_get_wtime() - wtime);
  wtime = omp_get_wtime();
#endif

  // delete generator and storage
#pragma omp parallel
  rng_termination();
}

/*-------------------------------------------------------
%
% level l estimator
%
*/

void nested_l(int l, int N, double *sums) {

  for (int m=0; m<7; m++) sums[m]=0.0;
    
  int nf = 1<<l;
  int nc = nf/2;

  /*
  OpenMP reduction of C++ array sections is discussed here:
  https://www.openmp.org/spec-html/5.0/openmpsu107.html
  and an example is given on page 301 here:
  https://www.openmp.org/wp-content/uploads/openmp-examples-5.0.0.pdf
  */
  
#pragma omp parallel for shared(nf,nc) reduction(+:sums[0:7])
  for (int nn=0; nn<N; nn++) {
    // variables declared here inside OpenMP parallel loop
    // will have local allocation for each thread
    float Pf, Pc, dP, X, Y, f2;

    // level 0

    if (l==0) {
      Pf = 0;
      dP = Pf;
    }

    // level l>0, with antithetic sampler
    else {
      X  = next_normal();  // outer variable

      float sum1=0.0, sum2=0.0, sum3=0.0;
      for (int n=0; n<nc; n++) {
        Y  = next_normal(); // inner variables
        f2 = X + Y;
        sum1 += f2;
	sum3 += fmaxf(0.0,f2);
        Y  = next_normal(); // inner variables
        f2 = X + Y;
        sum2 += f2;
	sum3 += fmaxf(0.0,f2);
      }
      
      float ave = sum3/nf;
      Pf  = ave - fmaxf(0.0,(sum1+sum2)/nf);
      Pc  = ave - 0.5*(fmaxf(0.0,sum1/nc) + fmaxf(0.0,sum2/nc));
      dP = Pf - Pc;
    }

    sums[0] += nf;   // cost
    sums[1] += dP;
    sums[2] += dP*dP;
    sums[3] += dP*dP*dP;
    sums[4] += dP*dP*dP*dP;
    sums[5] += Pf;
    sums[6] += Pf*Pf;
  }
}
