#			    -*-Makefile-*-
# template for the Intel fortran compiler versions 11 or 12 with Intel MPI on the PIK iplex cluster
# typical use with mkmf
# mkmf -t template.ifc -c"-Duse_libMPI -Duse_netCDF" path_names /usr/local/include

# -fpp : run the Fortran preprocessor before compilation
# -fno-alias : If you do not want aliasing to be assumed in the program
# -stack_temps : allocate space for temporary arrays on the runtime stack
# -safe-cray-ptr : Cray* pointers do not alias other variables.
# -ftz : Flushes denormal results to zero.
#        This  option  flushes  denormal  results  to zero when the
#        application is in  the  gradual  underflow  mode.  It  may
#        improve  performance if the denormal values are not criti
#        cal to your application's behavior.
#
#        If this option produces undesirable results of the numeri
#        cal  behavior  of  your  program, you can turn the FTZ/DAZ
#        mode off by using -no-ftz or /Qftz- in  the  command  line
#        while still benefiting from the O3 optimizations.
#
#        NOTE: Options -ftz and /Qftz are performance options. Set
#        ting these options does not guarantee that  all  denormals
#        in  a  program are flushed to zero. They only cause denor
#        mals generated at run time to be flushed to zero.
# -i-dynamic : use -shared-intel
# -assume byterecl : the  units for the OPEN statement RECL  specifier
#                    (record length) value are in bytes for  unformatted  data,
#                    not longwords
# -Wp,-w : pass option -p to the preprocessor
#          Prevents warnings from being output.
# -sox : save the compilation options and version number in the executable
FFLAGS_BASE =  -fpp -fno-alias -stack_temps -safe_cray_ptr -ftz -i_dynamic -assume byterecl -g -i4 -r8 -Wp,-w -sox

# -fltconsistency : Enables improved floating-point consistency.
#        This  option  enables  improved floating-point consistency
#        and may slightly reduce execution speed. It limits  float
#        ing-point  optimizations and maintains declared precision.
#        It also disables inlining of math library functions.
#
#        - Even if vectorization is enabled by the -x options, the
#          compiler does not  vectorize  reduction  loops  (loops
#          computing  the  dot  product)  and loops with mixed
#          precision types. Similarly, the compiler  does  not
#          enable  certain  loop transformations. For example,
#          the compiler does not transform reduction loops  to
#          perform partial summation or loop interchange.
#
#        This  option  causes  performance  degradation relative to
#        using default floating-point optimization flags.
#        The  recommended method to control the semantics of float
#        ing-point calculations is to use option  -fp-model
FFLAGS_REPRO = -fltconsistency
## Su-Bong Lee <sky-shine@pusan.ac.kr> wrote on 13.3.2012:
## I also did experiment using "-fp_model precise" option with "-O2",
## Then the outputs from two identical restart run were the same.
## Marshall Ward <marshall.ward@anu.edu.au> answered:
## We are seeing similar issues on our machine in Australia using the intel 
## compilers (with -O2).
## 
## On single CPU submissions of the bowl1 experiment, we saw two different 
## solutions, on the order of floating point error. The answer we get is 
## quasi-random, with no clear explanation.
## 
## As Swathi recommends, using '-fp-model precise' gives a third solution 
## different from the other two, which we can reproduce consistently. Using 
## -fltconsistency did not address the issue for us either. This link 
## suggests Intel may soon deprecate -fltconsistency: http://goo.gl/IPBkH


# -check : Checks for certain conditions at run time.
# -check all : is the same as specifying check with no keyword
# -check bounds : compile-time and run-time checking for array subscripts
# -WB : Turns a compile-time bounds check into a warning.
# -inline_debug_info : use -debug inline-debug-info
# -fpe0 : Floating-point invalid, divide-by-zero, and overflow exceptions
#        are enabled. If any such exceptions occur, execution is aborted.
#        This option sets the -ftz ; therefore underflow results will be set
#        to zero unless you explicitly specify -no-ftz
# -fpe3 : (default) All floating-point exceptions are disabled.
#        Floating-point underflow is gradual
# -ftrapuv : Initializes  stack  local variables to an unusual value to aid error detection.
# -fp-stack-check : generate  extra  code  after  every function call
#        By default, there is no checking. So  when  the  FP  stack
#        overflows, a NaN value is put into FP calculations and the
#        program's  results  differ.  Unfortunately,  the  overflow
#        point  can  be  far away from the point of the actual bug.
#        This option places code that causes  an  access  violation
#        exception immediately after an incorrect call occurs, thus
#        making it easier to locate these issues.

# options that possibly create runtime overhead, thus should be used during development
# but nor for production.
#FFLAGS_DEBUG = -check bounds -WB -warn -warn noerrors -debug variable_locations -debug-parameters -debug inline-debug-info -fpe0 -traceback -ftrapuv
#FFLAGS_DEBUG = -check all    -WB -warn -warn noerrors -debug variable_locations -debug-parameters -debug inline-debug-info       -traceback
#FFLAGS_DEBUG = -check bounds -WB -warn -warn noerrors -fpe0 -traceback -ftrapuv
#FFLAGS_DEBUG = -check all    -WB -warn -warn noerrors -fpe0 -traceback -ftrapuv
#FFLAGS_DEBUG = -check all    -WB -warn -warn noerrors       -traceback
#FFLAGS_DEBUG = -check all    -WB                            -traceback
#FFLAGS_DEBUG = -check bounds -WB -warn -warn noerrors       -traceback
# -traceback should not cause runtime overhead (?), thus we want it for production also
FFLAGS_DEBUG =                   -warn -warn noerrors                                                                            -traceback 
#FFLAGS_DEBUG =                   -warn -warn noerrors -debug variable_locations -debug-parameters -debug inline-debug-info       -traceback 

#FFLAGS_OPT = -O1    -vec # for testing why vectorization produces MPI errors
#FFLAGS_OPT = -O2 -no-vec # default from GFDL
#FFLAGS_OPT = -O3 -no-vec # minimal speed improvement at cost of more rounding differences
#FFLAGS_OPT = -O3 -xSSE4.1 # makes repeated runs non-deterministic

FFLAGS = $(FFLAGS_BASE) $(FFLAGS_REPRO) $(FFLAGS_OPT) $(FFLAGS_DEBUG) # -nowarn

# -Wremarks is needed for ifort 12 to turn on remarks
# -diag-disable : turn off some annoying remarks
# remark #1: last line of file ends without a newline
# remark #981: operands are evaluated in unspecified order
# remark #1418: external function definition with no prior declaration
# remark #1419: external declaration in primary source file
# remark #1572: floating-point equality and inequality comparisons are unreliable
# remark #1782 : #pragma once is obsolete

CXXFLAGS_BASE = -Wall -Wremarks -g -debug inline-debug-info -debug extended -diag-disable 1,981,1782,1572,1418,1419 -ftz -fno-exceptions # -DONLY_TRANSPORT_ON_REDUCED_GRID
CXXFLAGS_REPRO = # -fp-model precise # for production, else strict for debugging
#CXXFLAGS_OPT = -O2 -no-vec # default from GFDL for bit-wise repeatability
#CXXFLAGS_OPT = -O2 -no-vec -DNDEBUG # removes assertions. Greatly improves speed, but must be used only after extensive testing
# options that create runtime overhead, not desirable for production runs
#CXXFLAGS_DEBUG = -traceback -fp-model precise # -check-uninit -ftrapuv -fstack-security-check
# -traceback should not cause runtime overhead (?), thus we want it for production also
# there are several preprocessor defines to enable several debugging facilities
# -DDO_PRINT_SUMS -DDO_VARIABLE_CHECKSUMS -DTRACE_AEOLUS_INIT -DTRACE_AEOLUS_UPDATE -DDEBUG_STOCK_MOVE -DDEBUG_AEOLUS_STOCKS
#CXXFLAGS_DEBUG = -traceback -fp-model precise  -DDO_PRINT_SUMS -DDO_VARIABLE_CHECKSUMS -DTRACE_AEOLUS_INIT -DTRACE_AEOLUS_UPDATE -DDEBUG_STOCK_MOVE -DDEBUG_AEOLUS_STOCKS
CXXFLAGS_DEBUG = -traceback -fp-model precise  -DDO_PRINT_SUMS -DDO_VARIABLE_CHECKSUMS -DTRACE_AEOLUS_INIT                       -DDEBUG_STOCK_MOVE -DDEBUG_AEOLUS_STOCKS
CXXFLAGS = $(CXXFLAGS_BASE) $(CXXFLAGS_REPRO) $(CXXFLAGS_OPT) $(CXXFLAGS_DEBUG)

CPPFLAGS =  -I/home/petri/netcdf-4.2.1.1-intel/include -I/iplex/01/libraries/include
FC = mpiifort # -check_mpi -trace
#LD = mpiifort -v -check_mpi -trace -profile=vtfs
LD = mpiifort # -v -check_mpi -profile=vtmc
CC = mpiicc # -check_mpi -trace
CXX= mpiicpc # -check_mpi -trace
# when -g is given to the linking step, Intel will use the debugging version of the MPI library.
# Thus, for production performance, link without -g
# -sox Tells the compiler to save  the  compilation  options  and
#      version number in the Linux* OS executable
# to enable debugging, we must prevent IPO
LDFLAGS = -no-ipo -sox -g -L/home/petri/netcdf-4.2.1.1-intel/lib -lnetcdf_c++ -lnetcdff -lnetcdf -L/iplex/01/sys/libraries/lib -lhdf5 -lhdf5_hl -L/iplex/01/sys/applications/lib -ludunits -lstdc++
#LDFLAGS =         -sox -g -L/home/petri/netcdf-4.2.1.1-intel/lib -lnetcdf_c++ -lnetcdff -lnetcdf -L/iplex/01/sys/libraries/lib -lhdf5 -lhdf5_hl -L/iplex/01/sys/applications/lib -ludunits -lstdc++
CFLAGS = -D__IFC -g -Wall -Wremarks # -O2


LPJ_OPTFLAGS=-no-ipo -DDEBUG3 -traceback

# the Intel compiler requires that libraries are built using xiar if -ipo is used as compiler / linker option.
# LPJ wants that option, thus we need xiar here.
AR = xiar