#ifdef USE_PIK_ML
/**
 * \file pik_ml_interface.cpp
 *
 * $Id$
 *
 * This is the C++ side of the the interface between
 * MOM4/FMS and the POEM-attached machine learning module
 */
#if defined(USE_MPI) || defined(USE_MPI_DOMAIN_DECOMPOSE)
// Intel MPI requires mpi.h before stdio.h
#include <mpi.h>
#else
typedef int MPI_Comm;
#endif

#include <cstdlib>
#include <cmath>
#include <cstdio>
#include <assert.h>
#include <cstring>
#include <unistd.h>
#include <mcheck.h>

#ifdef _OPENMP
#include "omp.h"
#endif

#include "online_gan_coupled.h"

static int isg, ieg, jsg, jeg; ///< boundary indices of local compute domain in Fortran notation 1..n
static MPI_Comm comm;          ///< MPI communicator handle for the atmosphere domain
static int pe, root_pe, npes;  ///< our own MPI rank, root MPI rank, total number of atmosphere ranks

static double dt_atm_sec;      ///< timestep of atmosphere in seconds

static const double epsilon = 1e-10;



// Compare double values for equality.
// They are considered equal if the relative error is smaller than epsilon.
inline static bool double_eq(const double v1, const double v2, const double e) {
  return fabs((v1-v2)/((v1+v2)/2)) < e;
}

extern "C" {

  // This block contains those functions that are called by the
  // F90-implemented FMS.  It is tested with our two main target
  // platforms GNU gfortran with g++ and Intel Fortran with Intel C++,
  // both on Linux.  For other Fortran/C++ combos, different naming
  // and argument passing conventions might be needed.  GNU and Intel
  // Fortran convert names of external functions to lowercase and
  // append one underscore. (This their default behaviour, can be
  // changed with command line options and/or special directive
  // comments in the code.


  /// pik_ml_interfacecheck_() first of all is used to check for
  /// compatibility of the procedure call interface between
  /// the Fortran90 and C++ modules.
  /// Possible pitfalls include:
  /// - size of Fortran integer vs. sizeof C++ int
  /// - size of Fortran real vs. sizeof C++ float or double
  /// - assumed size arrays and pointers might be passed as
  ///   descriptors (alias dope vectors)
  /// - alignment of elements in derived types
  ///   => to avoid this problem, we try to avoid passing
  ///      derived types, and use the single elements as call
  ///      arguments (we shall see if this will always be possible)
  ///
  /// Our primary target platforms are
  /// Linux with Intel Compilers,
  /// and Linux with GNU compilers.
  void pik_ml_interfacecheck_
  ( /// first some arguments for checking the F90->C++ interface
   const int *const deadbeef, const double *const pitest,
   const int *const true_in, const int *const false_in,
   const double *const testarray2d,
   const int *const kind_i, const int *const kind_r, const int *const kind_l,

   /// and finally some interface-checking args again
   const double *const onetwothree, const int *const fse
    ) { // begin pik_ml_interfacecheck_()

    const double myonetwothree = 1234567809.0987654321L;

    // When mtrace() is called, it checks the value of the environment
    // variable MALLOC_TRACE, which should contain the pathname of a
    // file in which the tracing information is to be recorded.  If
    // the pathname is successfully opened, it is truncated to zero
    // length.
    //
    // If MALLOC_TRACE is not set, or the pathname it specifies is
    // invalid or not writable, then no hook functions are installed,
    // and mtrace() has no effect.
    mtrace();

    printf("%s\n", "$Id$");
    // TODO: perform this test and all the printout only on the
    // atmosphere ``root'' process to avoid cluttering of the output
    // logs
    printf("%s %s F90->C++ interface test\n", __FILE__, __FUNCTION__); fflush(stdout);
    printf("Fortran KIND(integer) %d KIND(real) %d KIND(logical) %d C++ sizeof int %d long %d float %d double %d bool %d\n",
	   *kind_i, *kind_r, *kind_l, (int)sizeof(int), (int)sizeof(long), (int)sizeof(float), (int)sizeof(double),
	   (int)sizeof(bool));
    fflush(stdout);
    if (*kind_i != sizeof(int)) { printf("Error: Fortran KIND(integer) != C++ sizeof(int)\n"); fflush(stdout); abort(); }
    if (*kind_r != sizeof(double)) { printf("Error: Fortran KIND(real) != C++ sizeof(double)\n"); fflush(stdout); abort(); }
    if (*kind_l != sizeof(int)) { printf("Error: Fortran KIND(logical) != C++ sizeof(int)\n"); fflush(stdout); abort(); }
    if (*deadbeef != 0xeadbeef /* 0xdeadbeef yields overflow error on 32bit machines */) {
      printf("Error: test1 failed: 0xdeadbeef != %x\n", *deadbeef);
      fflush(stdout);
      abort();
    }
    if (! double_eq(*pitest, M_PIl, epsilon)) {
      printf("Error: test2 failed: %30.28f != %30.28f\n", M_PI, *pitest); fflush(stdout);
      abort();
    }
    if (! (*true_in)) { printf("Error: test3 failed: true_in %x is not true\n", *true_in);
      fflush(stdout);
      abort();
    }
    if (*false_in) { printf("Error: test4 failed: false_in %x is not false\n", *false_in);
      fflush(stdout);
      abort();
    }

    for (int j = 1; j <= 4; j++) {
      for (int i = 1; i <= 3; i++) {
        const int idx = (j-1)*3 + (i-1);
        const double testval = testarray2d[idx];
        const double myval = i*100.0+j;
        //printf("testarray2d(%2d,%2d) = testarray2d[%2d] = %10.9g vs. %10.9g\n",
        //	 i, j, idx, testval, myval);
        if (! double_eq(testval, myval, epsilon)) {
          printf("Error: test5 failed: testarray2d(%d,%d) is %10.9g != %10.9g\n", i, j, testval, myval);
          fflush(stdout);
          abort();
	}
      }
    }

    if (! double_eq(*onetwothree, myonetwothree, epsilon)) {
      printf("Error: test6 failed: %24.12f != %24.12f\n", myonetwothree, *onetwothree);
      fflush(stdout);
      abort();
    }
    if (*fse != 4711) { printf("Error: test7 failed: 4711 != %d\n", *fse); fflush(stdout); abort(); }
    //printf("double HUGE_VAL is %g\n", HUGE_VAL);
    printf("... interface test finished\n"); fflush(stdout);

#ifdef _OPENMP
    int tid, nthreads;
#pragma omp parallel shared(nthreads) private(tid)
    {
      tid = omp_get_thread_num();
      if (tid == 0) { /* Only master thread does this */
        nthreads = omp_get_num_threads();
        printf("Running with %d OpenMP threads\n", nthreads);
      }
    }
#else
    printf("Compiled without OpenMP\n");
#endif

  } // end pik_ml_interfacecheck_()


  /// pik_ml_init_() obtains necessary information from the F90 side.
  /// It is separated from pik_ml_interfacecheck_() in the hope
  /// to make the code more readable and easier to maintain.
  void pik_ml_init_
  ( /// now the ``real'' arguments

   /// the boundary indices of the global atmosphere domain
   const int *const isg_in, const int *const ieg_in, const int *const jsg_in, const int *const jeg_in,
   const int *const Time_step_seconds, ///< atmos time step length [sec]
   //const int *const peset_in,   ///< MPI communicator for the set of atmoshpere pes
   const int *const pe_in,      ///< our own MPI rank == processing element number
   const int *const root_pe_in, ///< MPI rank of atmosphere master process
   const int *const npes_in,    ///< number of total atmosphere processes
   int *const fse ///< and finally a minimal interface check for correct number of args
    ) { // begin pik_ml_init_()

    if (*fse != 4711) {
      printf("Error: %s test8 failed: 4711 != %d. Wrong number of args?\n", __FUNCTION__, *fse);
      fflush(stdout);
      abort();
    }

    //
    // now look at the actual parameters
    //
    isg = *isg_in; ieg = *ieg_in; jsg = *jsg_in; jeg = *jeg_in;
    printf("Domain: global boundary indices: isg %d ieg %d jsg %d jeg %d\n", isg, ieg, jsg, jeg);

    // constant timestepping
    dt_atm_sec = static_cast<double>(*Time_step_seconds);
    printf("Atmosphere time step for ML is %g sec\n", dt_atm_sec);

    //comm = *peset_in;
    pe = *pe_in; root_pe = *root_pe_in; npes = *npes_in;
    printf("MPI: pe %d root_pe %d npes %d\n", pe, root_pe, npes);
    printf("MPI: comm %d 0x%x\n", comm, comm);
#ifdef USE_MPI_DOMAIN_DECOMPOSE
    printf("MPI_COMM_WORLD 0x%x MPI_COMM_SELF 0x%x MPI_COMM_NULL 0x%x\n", MPI_COMM_WORLD, MPI_COMM_SELF, MPI_COMM_NULL);
    printf("MPI_GROUP_EMPTY 0x%x MPI_GROUP_NULL 0x%x\n", MPI_GROUP_EMPTY, MPI_GROUP_NULL);
#else
    if (npes > 1) {
      printf("Note: %s was not compiled with USE_MPI_DOMAIN_DECOMPOSE #defined, but is running on %d MPI tasks\n",
              __FILE__, npes);
      fflush(stdout);
      //abort();
    }
#endif

#ifdef USE_MPI_DOMAIN_DECOMPOSE
    Aeolus_Communicator = comm; // implicit invocation of copy constructor which converts handle to object
#endif

    online_gan_coupled_init(ieg-isg+1, jeg-jsg+1, dt_atm_sec);

  } /* end pik_ml_init_() */


  /// pik_ml_precipitation_() receives the precipitation data on the global domain.
  /// Note: these are passed by reference, i.e. changed values will be
  /// visible on the Fortran side after return from this function.
  /// Note: The C data type is an 1-D array, but it points to a 2-D F90 array.
  void pik_ml_precipitation_
  ( const int *const nx,       ///< interface check: array size in i-direction
    const int *const ny,       ///< interface check: array size in j-direction
    /*const*/ double *const lprec, ///< mass of liquid precipitation since last time step (Kg/m2)
    /*const*/ double *const fprec, ///< mass of frozen precipitation since last time step (Kg/m2)
    const int *const fortytwo  ///< minimal interface check
   ) { // begin pik_ml_precipitation_()

    assert(42 == *fortytwo);
    assert(ieg-isg+1 == *nx);
    assert(jeg-jsg+1 == *ny);
    printf("%s %s\n", __FILE__, __FUNCTION__);
    printf("printing liquid precipitation received from F90\n");
    for (int j = jsg; j <= jeg; j++) {
      for (int i = isg; i <= ieg; i++) {
        const double val = lprec[(j-jsg)*(ieg-isg+1) + (i-isg)];
        printf("%3d %3d %8.7g\n", i-1, j-1, val);
      }
    }
    fflush(stdout);
    online_gan_coupled_precipitation(*nx, *ny, lprec, fprec);

  } // end pik_ml_precipitation_()

} // end extern "C"

#endif /* USE_PIK_ML */
