# -*-Makefile-*- # template for the Intel fortran compiler versions 11 or 12 with Intel MPI on the PIK iplex cluster # typical use with mkmf # mkmf -t template.ifc -c"-Duse_libMPI -Duse_netCDF" path_names /usr/local/include # -fpp : run the Fortran preprocessor before compilation # -fno-alias : If you do not want aliasing to be assumed in the program # -stack_temps : allocate space for temporary arrays on the runtime stack # -safe-cray-ptr : Cray* pointers do not alias other variables. # -ftz : Flushes denormal results to zero. # This option flushes denormal results to zero when the # application is in the gradual underflow mode. It may # improve performance if the denormal values are not criti # cal to your application's behavior. # # If this option produces undesirable results of the numeri # cal behavior of your program, you can turn the FTZ/DAZ # mode off by using -no-ftz or /Qftz- in the command line # while still benefiting from the O3 optimizations. # # NOTE: Options -ftz and /Qftz are performance options. Set # ting these options does not guarantee that all denormals # in a program are flushed to zero. They only cause denor # mals generated at run time to be flushed to zero. # -i-dynamic : use -shared-intel # -assume byterecl : the units for the OPEN statement RECL specifier # (record length) value are in bytes for unformatted data, # not longwords # -Wp,-w : pass option -p to the preprocessor # Prevents warnings from being output. # -sox : save the compilation options and version number in the executable FFLAGS_BASE = -fpp -fno-alias -stack_temps -safe_cray_ptr -ftz -i_dynamic -assume byterecl -g -i4 -r8 -Wp,-w -sox # -fltconsistency : Enables improved floating-point consistency. # This option enables improved floating-point consistency # and may slightly reduce execution speed. It limits float # ing-point optimizations and maintains declared precision. # It also disables inlining of math library functions. # # - Even if vectorization is enabled by the -x options, the # compiler does not vectorize reduction loops (loops # computing the dot product) and loops with mixed # precision types. Similarly, the compiler does not # enable certain loop transformations. For example, # the compiler does not transform reduction loops to # perform partial summation or loop interchange. # # This option causes performance degradation relative to # using default floating-point optimization flags. # The recommended method to control the semantics of float # ing-point calculations is to use option -fp-model FFLAGS_REPRO = -fltconsistency ## Su-Bong Lee wrote on 13.3.2012: ## I also did experiment using "-fp_model precise" option with "-O2", ## Then the outputs from two identical restart run were the same. ## Marshall Ward answered: ## We are seeing similar issues on our machine in Australia using the intel ## compilers (with -O2). ## ## On single CPU submissions of the bowl1 experiment, we saw two different ## solutions, on the order of floating point error. The answer we get is ## quasi-random, with no clear explanation. ## ## As Swathi recommends, using '-fp-model precise' gives a third solution ## different from the other two, which we can reproduce consistently. Using ## -fltconsistency did not address the issue for us either. This link ## suggests Intel may soon deprecate -fltconsistency: http://goo.gl/IPBkH # -check : Checks for certain conditions at run time. # -check all : is the same as specifying check with no keyword # -check bounds : compile-time and run-time checking for array subscripts # -WB : Turns a compile-time bounds check into a warning. # -inline_debug_info : use -debug inline-debug-info # -fpe0 : Floating-point invalid, divide-by-zero, and overflow exceptions # are enabled. If any such exceptions occur, execution is aborted. # This option sets the -ftz ; therefore underflow results will be set # to zero unless you explicitly specify -no-ftz # -fpe3 : (default) All floating-point exceptions are disabled. # Floating-point underflow is gradual # -ftrapuv : Initializes stack local variables to an unusual value to aid error detection. # -fp-stack-check : generate extra code after every function call # By default, there is no checking. So when the FP stack # overflows, a NaN value is put into FP calculations and the # program's results differ. Unfortunately, the overflow # point can be far away from the point of the actual bug. # This option places code that causes an access violation # exception immediately after an incorrect call occurs, thus # making it easier to locate these issues. # options that possibly create runtime overhead, thus should be used during development # but nor for production. #FFLAGS_DEBUG = -check bounds -WB -warn -warn noerrors -debug variable_locations -debug-parameters -debug inline-debug-info -fpe0 -traceback -ftrapuv #FFLAGS_DEBUG = -check all -WB -warn -warn noerrors -debug variable_locations -debug-parameters -debug inline-debug-info -traceback #FFLAGS_DEBUG = -check bounds -WB -warn -warn noerrors -fpe0 -traceback -ftrapuv #FFLAGS_DEBUG = -check all -WB -warn -warn noerrors -fpe0 -traceback -ftrapuv #FFLAGS_DEBUG = -check all -WB -warn -warn noerrors -traceback #FFLAGS_DEBUG = -check all -WB -traceback #FFLAGS_DEBUG = -check bounds -WB -warn -warn noerrors -traceback # -traceback should not cause runtime overhead (?), thus we want it for production also FFLAGS_DEBUG = -warn -warn noerrors -traceback #FFLAGS_DEBUG = -warn -warn noerrors -debug variable_locations -debug-parameters -debug inline-debug-info -traceback #FFLAGS_OPT = -O1 -vec # for testing why vectorization produces MPI errors #FFLAGS_OPT = -O2 -no-vec # default from GFDL #FFLAGS_OPT = -O3 -no-vec # minimal speed improvement at cost of more rounding differences #FFLAGS_OPT = -O3 -xSSE4.1 # makes repeated runs non-deterministic FFLAGS = $(FFLAGS_BASE) $(FFLAGS_REPRO) $(FFLAGS_OPT) $(FFLAGS_DEBUG) # -nowarn # -Wremarks is needed for ifort 12 to turn on remarks # -diag-disable : turn off some annoying remarks # remark #1: last line of file ends without a newline # remark #981: operands are evaluated in unspecified order # remark #1418: external function definition with no prior declaration # remark #1419: external declaration in primary source file # remark #1572: floating-point equality and inequality comparisons are unreliable # remark #1782 : #pragma once is obsolete CXXFLAGS_BASE = -Wall -Wremarks -g -debug inline-debug-info -debug extended -diag-disable 1,981,1782,1572,1418,1419 -ftz -fno-exceptions # -DONLY_TRANSPORT_ON_REDUCED_GRID CXXFLAGS_REPRO = # -fp-model precise # for production, else strict for debugging #CXXFLAGS_OPT = -O2 -no-vec # default from GFDL for bit-wise repeatability #CXXFLAGS_OPT = -O2 -no-vec -DNDEBUG # removes assertions. Greatly improves speed, but must be used only after extensive testing # options that create runtime overhead, not desirable for production runs #CXXFLAGS_DEBUG = -traceback -fp-model precise # -check-uninit -ftrapuv -fstack-security-check # -traceback should not cause runtime overhead (?), thus we want it for production also # there are several preprocessor defines to enable several debugging facilities # -DDO_PRINT_SUMS -DDO_VARIABLE_CHECKSUMS -DTRACE_AEOLUS_INIT -DTRACE_AEOLUS_UPDATE -DDEBUG_STOCK_MOVE -DDEBUG_AEOLUS_STOCKS #CXXFLAGS_DEBUG = -traceback -fp-model precise -DDO_PRINT_SUMS -DDO_VARIABLE_CHECKSUMS -DTRACE_AEOLUS_INIT -DTRACE_AEOLUS_UPDATE -DDEBUG_STOCK_MOVE -DDEBUG_AEOLUS_STOCKS CXXFLAGS_DEBUG = -traceback -fp-model precise -DDO_PRINT_SUMS -DDO_VARIABLE_CHECKSUMS -DTRACE_AEOLUS_INIT -DDEBUG_STOCK_MOVE -DDEBUG_AEOLUS_STOCKS CXXFLAGS = $(CXXFLAGS_BASE) $(CXXFLAGS_REPRO) $(CXXFLAGS_OPT) $(CXXFLAGS_DEBUG) CPPFLAGS = -I/home/petri/netcdf-4.2.1.1-intel/include -I/iplex/01/libraries/include FC = mpiifort # -check_mpi -trace #LD = mpiifort -v -check_mpi -trace -profile=vtfs LD = mpiifort # -v -check_mpi -profile=vtmc CC = mpiicc # -check_mpi -trace CXX= mpiicpc # -check_mpi -trace # when -g is given to the linking step, Intel will use the debugging version of the MPI library. # Thus, for production performance, link without -g # -sox Tells the compiler to save the compilation options and # version number in the Linux* OS executable # to enable debugging, we must prevent IPO LDFLAGS = -no-ipo -sox -g -L/home/petri/netcdf-4.2.1.1-intel/lib -lnetcdf_c++ -lnetcdff -lnetcdf -L/iplex/01/sys/libraries/lib -lhdf5 -lhdf5_hl -L/iplex/01/sys/applications/lib -ludunits -lstdc++ #LDFLAGS = -sox -g -L/home/petri/netcdf-4.2.1.1-intel/lib -lnetcdf_c++ -lnetcdff -lnetcdf -L/iplex/01/sys/libraries/lib -lhdf5 -lhdf5_hl -L/iplex/01/sys/applications/lib -ludunits -lstdc++ CFLAGS = -D__IFC -g -Wall -Wremarks # -O2 LPJ_OPTFLAGS=-no-ipo -DDEBUG3 -traceback # the Intel compiler requires that libraries are built using xiar if -ipo is used as compiler / linker option. # LPJ wants that option, thus we need xiar here. AR = xiar