/**
 * \file drainage2cdfvec.c
 *
 * $Id$
 *
 * Converts LPJ drainage file into NetCDF data file with drainage
 * directions encoded as vector components in x,y directions
 *
 * written by Stefan Petri
 * inspired by code from Werner von Bloh
 * Potsdam Institute for Climate Impact Research
 * PO Box 60 12 03
 * 14412 Potsdam/Germany
 */

#include "lpj.h"

#include <netcdf.h>
#include <time.h>
#include <assert.h>

static char *myname;

static void myusage(char *m) {
  if (m) fprintf(stderr, "%s\n", m);
  fprintf(stderr, "usage: %s lpj_grid_file lpj_drainage_file netcdf_file\n"
          "  reads an LPJ grid description and an LPJ binary drainage file,\n"
          "  and writes the data in NetCDF format (CF-1.0, regular lat-lon grid),\n"
          "  together with the drainage directions encoded as vector\n"
          "  components in x/y directions.\n"
          "  -10 == missing value, i.e. outside land mask, i.e. ocean cell\n"
          "  -9 == land cell according to grid file, but missing value in DDM30\n"
          "         i.e. routing info needs to be enhanced here\n"
          "  -1 == sink or drain into ocean;\n"
          ">= 0 index of destination cell\n"
          "  Vectors give x/y directions as pairs of -1/0/1 values, suitable for plotting with ferret.\n",
          myname);
  fprintf(stderr, "Additional variables routesx/routesy contain coordinate pairs for plotting routing directions more precisely with ferret plot/vs command\n");
  exit(EXIT_FAILURE);
}

#define ncerror(e) { if(e) {fprintf(stderr,"Error: %s\n", nc_strerror(e)); exit(1);}}

#define MISSING_DDM30 -9
#define DESTCELL_OCEAN -10

typedef struct
{
  const Coord_array *index;
  int ncid;
  int varid, vardirxid, vardiryid;  // IDs of dest cell var, x-component var, y-component var
  int varroutexid, varrouteyid; // x and y components of route markers
  int dcount_var_id; // count of cells which drain (indirectly) into this one
  int basin_var_id;
  int basinboundx_var_id, basinboundy_var_id; // basin boundaries
} Cdf;

static Cdf *create_cdf(const char *filename,
                       const char *clm_filename,
                       float delta_lon,
                       float delta_lat,
                       const Coord_array *array,
                       int routescount,
                       int basinboundscount)
{
  Cdf *cdf;
  float *lon, *lat;
  int missdest = DESTCELL_OCEAN;
  short missdir = NC_FILL_SHORT;
  int missint = NC_FILL_INT;
  float missfloat = NC_FILL_FLOAT;
  int i,rc,dim[2];
  String s;
  time_t t;
  int lat_var_id,lon_var_id,lat_dim_id,lon_dim_id;
  int linelen_dim_id, routes_dim_id; // record dimension is number of routes
  int basinbounds_dim_id;

  cdf=new(Cdf);
  lon=newvec(float,array->nlon);
  if (lon==NULL) { printallocerr("lon"); return NULL; }
  lat=newvec(float,array->nlat);
  if (lat==NULL) { printallocerr("lat"); return NULL; }
  lon[0] = (float)array->lon_min;
  for (i=1; i<array->nlon; i++)
    lon[i] = lon[i-1]+(float)delta_lon;
  lat[0] = (float)array->lat_min;
  for (i=1; i<array->nlat; i++)
    lat[i] = lat[i-1]+(float)delta_lat;

  cdf->index=array;
#ifdef USE_NETCDF4
  rc=nc_create(filename,(compress) ? NC_CLOBBER|NC_NETCDF4 : NC_CLOBBER,&cdf->ncid);
#else
  rc=nc_create(filename,NC_CLOBBER,&cdf->ncid);
#endif
  if(rc)
  {
    fprintf(stderr,"ERROR426: Cannot create file '%s': %s.\n",
            filename,nc_strerror(rc));
    free(lon);
    free(lat);
    free(cdf);
    return NULL;
  }
  rc=nc_def_dim(cdf->ncid,LAT_DIM_NAME,array->nlat,&lat_dim_id);
  ncerror(rc);
  rc=nc_def_dim(cdf->ncid,LON_DIM_NAME,array->nlon,&lon_dim_id);
  ncerror(rc);
  rc=nc_def_dim(cdf->ncid,"routelen",3,&linelen_dim_id);
  ncerror(rc);
  rc=nc_def_dim(cdf->ncid,"routes",routescount,&routes_dim_id);
  ncerror(rc);
  rc=nc_def_dim(cdf->ncid,"basinbounds",basinboundscount,&basinbounds_dim_id);
  ncerror(rc);


  snprintf(s,STRING_LEN,"%s %s",myname, clm_filename);
  rc=nc_put_att_text(cdf->ncid,NC_GLOBAL,"source",strlen(s),s);
  ncerror(rc);
  time(&t);
  snprintf(s,STRING_LEN,"Created for user %s on %s at %s",getuser(),gethost(),
         ctime(&t));
  s[strlen(s)-1]='\0';
  rc=nc_put_att_text(cdf->ncid,NC_GLOBAL,"history",strlen(s),s);
  ncerror(rc);
  rc=nc_def_var(cdf->ncid,LAT_NAME,NC_FLOAT,1,&lat_dim_id,&lat_var_id);
  ncerror(rc);
  rc=nc_def_var(cdf->ncid,LON_NAME,NC_FLOAT,1,&lon_dim_id,&lon_var_id);
  ncerror(rc);

  rc=nc_put_att_text(cdf->ncid,lon_var_id,"units",strlen("degrees_east"),
                     "degrees_east");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, lon_var_id,"long_name",strlen("longitude"),"longitude");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, lon_var_id,"standard_name",strlen("longitude"),"longitude");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, lon_var_id,"axis",strlen("X"),"X");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid,lat_var_id,"units",strlen("degrees_north"),
                   "degrees_north");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, lat_var_id,"long_name",strlen("latitude"),"latitude");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, lat_var_id,"standard_name",strlen("latitude"),"latitude");
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, lat_var_id,"axis",strlen("Y"),"Y");
  ncerror(rc);
  dim[0]=lat_dim_id;
  dim[1]=lon_dim_id;

  rc=nc_def_var(cdf->ncid,"destcell",NC_INT,2,dim,&cdf->varid);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->varid, 0, 1,compress);
    ncerror(rc);
  }
#endif
  //rc=nc_put_att_text(cdf->ncid, cdf->varid,"units",strlen("dest_cell_nr"),"dest_cell_nr");
  //error(rc);
  //if (descr!=NULL) {
    rc=nc_put_att_text(cdf->ncid, cdf->varid,"long_name",strlen("number of destination cell"),"number of destination cell");
    ncerror(rc);
  //}
  nc_put_att_int(cdf->ncid, cdf->varid,"missing_value",NC_INT,1,&missdest);
  rc=nc_put_att_int(cdf->ncid, cdf->varid,"_FillValue",NC_INT,1,&missdest);
  ncerror(rc);

  rc=nc_def_var(cdf->ncid,"dcount",NC_INT,2,dim,&cdf->dcount_var_id);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->dcount_var_id, 0, 1,compress);
    ncerror(rc);
  }
#endif
  //rc=nc_put_att_text(cdf->ncid, cdf->dcount_var_id,"units",strlen("count"),"count");
  //error(rc);
  //if (descr!=NULL) {
    rc=nc_put_att_text(cdf->ncid, cdf->dcount_var_id,"long_name",strlen("count of cells draining into this one"),"count of cells draining into this one");
    ncerror(rc);
  //}
  nc_put_att_int(cdf->ncid, cdf->dcount_var_id,"missing_value",NC_INT,1,&missint);
  rc=nc_put_att_int(cdf->ncid, cdf->dcount_var_id,"_FillValue",NC_INT,1,&missint);
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, cdf->dcount_var_id,"ferret_command", strlen("shade if dcount ge 0 then dcount"), "shade if dcount ge 0 then dcount");
  ncerror(rc);

  rc=nc_def_var(cdf->ncid,"basin",NC_INT,2,dim,&cdf->basin_var_id);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->basin_var_id, 0, 1,compress);
    ncerror(rc);
  }
#endif
  //rc=nc_put_att_text(cdf->ncid, cdf->varid,"units",strlen("basinnr"),"basinnr");
  //error(rc);
  //if (descr!=NULL) {
    rc=nc_put_att_text(cdf->ncid, cdf->basin_var_id,"long_name",strlen("catchment basin number"),"catchment basin number");
    ncerror(rc);
  //}
  nc_put_att_int(cdf->ncid, cdf->basin_var_id,"missing_value",NC_INT,1,&missint);
  rc=nc_put_att_int(cdf->ncid, cdf->basin_var_id,"_FillValue",NC_INT,1,&missint);
  ncerror(rc);

  rc=nc_def_var(cdf->ncid,"dirx",NC_SHORT,2,dim,&cdf->vardirxid);
ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->vardirxid, 0, 1,compress);
    ncerror(rc);
  }
#endif
  //if (descr!=NULL) {
  //  rc=nc_put_att_text(cdf->ncid, cdf->vardirxid,"long_name",strlen(descr),descr);
  //  ncerror(rc);
  //}
  nc_put_att_short(cdf->ncid, cdf->vardirxid,"missing_value",NC_SHORT,1, &missdir);
  rc=nc_put_att_short(cdf->ncid, cdf->vardirxid,"_FillValue",NC_SHORT,1, &missdir);
  ncerror(rc);

  rc=nc_def_var(cdf->ncid,"diry",NC_SHORT,2,dim,&cdf->vardiryid);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->vardiryid, 0, 1,compress);
    ncerror(rc);
  }
#endif
  //if (descr!=NULL) {
  //  rc=nc_put_att_text(cdf->ncid, cdf->vardiryid,"long_name",strlen(descr),descr);
  //ncerror(rc);
  //}
  nc_put_att_short(cdf->ncid, cdf->vardiryid,"missing_value",NC_SHORT,1,&missdir);
  rc=nc_put_att_short(cdf->ncid, cdf->vardiryid,"_FillValue",NC_SHORT,1,&missdir);
  ncerror(rc);

  dim[1] = linelen_dim_id;
  dim[0] = routes_dim_id; // record dimension (if used) must be first
  rc=nc_def_var(cdf->ncid,"routex",NC_FLOAT,2,dim,&cdf->varroutexid);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->varroutexid, 0, 1,compress);
    ncerror(rc);
  }
#endif
  rc=nc_put_att_text(cdf->ncid, cdf->varroutexid,"units",strlen("degrees_east"),"degrees_east");
  ncerror(rc);
  nc_put_att_float(cdf->ncid, cdf->varroutexid,"missing_value",NC_FLOAT,1,&missfloat);
  rc=nc_put_att_float(cdf->ncid, cdf->varroutexid,"_FillValue",NC_FLOAT,1,&missfloat);
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, cdf->varroutexid,"ferret_command", strlen("plot/vs/line/over routex,routey"), "plot/vs/line/over routex,routey");
  ncerror(rc);

  rc=nc_def_var(cdf->ncid,"routey",NC_FLOAT,2,dim,&cdf->varrouteyid);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->varrouteyid, 0, 1,compress);
    ncerror(rc);
  }
#endif
  rc=nc_put_att_text(cdf->ncid, cdf->varrouteyid,"units",strlen("degrees_north"),"degrees_north");
  ncerror(rc);
  nc_put_att_float(cdf->ncid, cdf->varrouteyid,"missing_value",NC_FLOAT,1,&missfloat);
  rc=nc_put_att_float(cdf->ncid, cdf->varrouteyid,"_FillValue",NC_FLOAT,1,&missfloat);
  ncerror(rc);

  dim[1] = linelen_dim_id;
  dim[0] = routes_dim_id; // record dimension (if used) must be first
  rc=nc_def_var(cdf->ncid,"basinboundsx",NC_FLOAT,2,dim,&cdf->basinboundx_var_id);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->basinboundx_var_id, 0, 1,compress);
    ncerror(rc);
  }
#endif
  rc=nc_put_att_text(cdf->ncid, cdf->basinboundx_var_id,"units",strlen("degrees_east"),"degrees_east");
  ncerror(rc);
  nc_put_att_float(cdf->ncid, cdf->basinboundx_var_id,"missing_value",NC_FLOAT,1,&missfloat);
  rc=nc_put_att_float(cdf->ncid, cdf->basinboundx_var_id,"_FillValue",NC_FLOAT,1,&missfloat);
  ncerror(rc);
  rc=nc_put_att_text(cdf->ncid, cdf->basinboundx_var_id,"ferret_command", strlen("plot/vs/line/over basinboundsx,basinboundsy"), "plot/vs/line/over basinboundsx,basinboundsy");
  ncerror(rc);

  rc=nc_def_var(cdf->ncid,"basinboundsy",NC_FLOAT,2,dim,&cdf->basinboundy_var_id);
  ncerror(rc);
#ifdef USE_NETCDF4
  if (compress) {
    rc=nc_def_var_deflate(cdf->ncid, cdf->basinboundy_var_id, 0, 1,compress);
    ncerror(rc);
  }
#endif
  rc=nc_put_att_text(cdf->ncid, cdf->basinboundy_var_id,"units",strlen("degrees_north"),"degrees_north");
ncerror(rc);
  nc_put_att_float(cdf->ncid, cdf->basinboundy_var_id,"missing_value",NC_FLOAT,1,&missfloat);
  rc=nc_put_att_float(cdf->ncid, cdf->basinboundy_var_id,"_FillValue",NC_FLOAT,1,&missfloat);
  ncerror(rc);


  rc=nc_enddef(cdf->ncid);
  ncerror(rc);
  rc=nc_put_var_float(cdf->ncid,lat_var_id,lat);
  ncerror(rc);
  rc=nc_put_var_float(cdf->ncid,lon_var_id,lon);
  ncerror(rc);
  free(lat);
  free(lon);
  return cdf;
}


static int getdrain(FILE *file,int *index,int pos,char *headername,
                    int version,Bool swap,Bool *iserror)
{
  struct {
    int index,len;
  } buf;
  if(fseek(file,headersize(headername,version)+sizeof(buf)*pos,SEEK_SET)) {
    *iserror=TRUE;
    return 0;
  }
  if(fread(&buf,sizeof(buf),1,file)!=1) {
    *iserror=TRUE;
    return 0;
  }
  *iserror=FALSE;
  if (swap) {
    *index=swapint(buf.index);
    return swapint(buf.len);
  } else {
    *index=buf.index;
    return buf.len;
  }
} /* of 'getdrain' */



static void close_cdf(Cdf *cdf)
{
  nc_close(cdf->ncid);
  free(cdf);
}


int main(int argc, char **argv) {
  FILE *drainfile;
  Coordfile coordfile;
  Coord_array *index;
  Coord *grid,res;
  Cdf *cdf;
  Header header;
  String headername;
  //short *data;
  int *destcell; // destination cell number
  int *dcount;   // count of cells which drain into this cell
  int *basin;    // basin number
  int maxbasin = 0;
  short *dirx, *diry;
  int i,j, ngrid,version,compress,rc;
  Bool swap, iserror;
  Filename filename;
  size_t routeindex[2] = { 0, 0 };
  int basinboundscount = 0;
  float delta_lon, delta_lat;

  myname = argv[0];
  if (argc != 4) myusage("wrong number of arguments");
  filename.fmt = CLM;
  filename.name = argv[1];
  coordfile = opencoord(&filename,TRUE);
  if (NULL == coordfile) {
    fprintf(stderr, "Error opening grid file '%s'.\n", filename.name);
    return EXIT_FAILURE;
  }
  ngrid = numcoord(coordfile);
#if LPJGRID_VERSION < 3
  res.lon = res.lat = getcellsizecoord(coordfile);
#else
  getcellsizecoord(&delta_lon, &delta_lat, coordfile);
  res.lon = delta_lon; res.lat = delta_lat;
#endif
  printf("grid has %d cells, resolution %gx%g\n", ngrid, res.lon, res.lat);
  grid = newvec(Coord,ngrid);
  if (NULL == grid) {
    printallocerr("grid");
    return EXIT_FAILURE;
  }
  for (i = 0; i < ngrid; i++) {
#if LPJGRID_VERSION < 3
    readcoord(coordfile, grid+i, res);
#else
    readcoord(coordfile, grid+i, &res);
#endif
  }
  closecoord(coordfile);

  drainfile = fopen(argv[2], "rb");
  if (NULL == drainfile) {
    fprintf(stderr, "Error opening '%s': %s.\n", argv[2], strerror(errno));
    return EXIT_FAILURE;
  }
  version=READ_VERSION;
  if (freadanyheader(drainfile,&header,&swap,headername,&version,TRUE)) {
    fprintf(stderr,"Error reading header of '%s'.\n",argv[2]);
    fclose(drainfile);
    return EXIT_FAILURE;
  }
#if LPJDRAIN_VERSION < 3
  printf("found drainfile %s version %d swap %d headername %s and in header: ncell %d scalar %g cellsize %g nbands %d nyear %d\n",
         argv[2], version, swap, headername, header.ncell, header.scalar, header.cellsize, header.nbands, header.nyear);
  delta_lon = delta_lat = header.cellsize;
#else
  printf("found drainfile %s version %d swap %d headername %s and in header: ncell %d scalar %g cellsize %g/%g nbands %d nyear %d datatype %s\n",
         argv[2], version, swap, headername, header.ncell, header.scalar, header.cellsize_lon, header.cellsize_lat, header.nbands, header.nyear,
         (header.datatype==LPJ_BYTE?"byte"
          :(header.datatype==LPJ_SHORT?"short"
            :(header.datatype==LPJ_INT?"int"
              :(header.datatype==LPJ_FLOAT?"float"
                :(header.datatype==LPJ_DOUBLE?"double"
                  :"UNKNOWN TYPE"))))));
  delta_lon = header.cellsize_lon;
  delta_lat = header.cellsize_lat;
#endif

  //if(version==1)
  //  header.scalar=scale;
  if (header.ncell!=ngrid) {
    fprintf(stderr,"Number of cells in '%s' is different from %d in '%s'.\n",
            argv[2],ngrid,argv[1]);
    fclose(drainfile);
    return EXIT_FAILURE;
  }
  if (delta_lon!=res.lon || delta_lat!=res.lat) {
    fprintf(stderr,"Cell size in '%s' differs from '%s'.\n",
            argv[2],argv[1]);
    fclose(drainfile);
    return EXIT_FAILURE;
  }
  index=createindex(grid,ngrid,res,TRUE);
  if (NULL == index)
    return EXIT_FAILURE;

  printf("created index lon_min %g lat_min %g nlon %d nlat %d\n",
         index->lon_min, index->lat_min, index->nlon, index->nlat);

  /* sigh. here we want a map of the entire world.
   * but createindex() considers only the bounding box of the
   * actually populated cells.
   * Thus we need to override the results of createindex()
   */
  index->lon_min = -180.0+(res.lon/2.0);
  index->lat_min =  -90.0+(res.lat/2.0);
  index->nlon = (int)(360.0/res.lon);
  index->nlat = (int)(180.0/res.lat);
  printf("patched index lon_min %g lat_min %g nlon %d nlat %d\n",
         index->lon_min, index->lat_min, index->nlon, index->nlat);
  free(index->index); index->index = NULL; // should not be used subsequently. Else let it crash.
  // some lines copied from lpj_trunk/src/netcdf/createindex.c
  index->index=newvec(int,ngrid);
  if (NULL == index->index) { printallocerr("index"); free(index); return EXIT_FAILURE; }
  for(i=0; i<ngrid; i++) {
    index->index[i] = (int)((grid[i].lon-index->lon_min)/res.lon+0.5)+
                      (int)((grid[i].lat-index->lat_min)/res.lat+0.5)*index->nlon;
#ifndef NDEBUG
    if (index->index[i]<0 || index->index[i] >= index->nlon*index->nlat)
      fprintf(stderr,"Invalid index %d.\n", index->index[i]);
#endif
  }

  //free(grid);

  destcell = newvec(int, index->nlon*index->nlat);
  if (destcell==NULL) { printallocerr("destcell"); return EXIT_FAILURE; }
  dirx = newvec(short, index->nlon*index->nlat);
  if (dirx==NULL) { printallocerr("dirx"); return EXIT_FAILURE; }
  diry = newvec(short, index->nlon*index->nlat);
  if (diry==NULL) { printallocerr("diry"); return EXIT_FAILURE; }
  dcount = newvec(int, index->nlon*index->nlat);
  if (dcount==NULL) { printallocerr("dcount"); return EXIT_FAILURE; }
  basin = newvec(int, index->nlon*index->nlat);
  if (basin==NULL) { printallocerr("basin"); return EXIT_FAILURE; }
  for (i=0; i<index->nlon*index->nlat; i++) {
    destcell[i] = DESTCELL_OCEAN;
    dirx[i] = diry[i] = NC_FILL_SHORT;
    dcount[i] = basin[i] = NC_FILL_INT;
  }

  // read drainage info for each cell
  // and count number of outflow routes for creation of NetCDF dimension variables
  for (i=0; i<ngrid; i++) {
    float x = (float)grid[i].lon;
    float y = (float)grid[i].lat;
    int mapindex = index->index[i]; // position in output arrays
    if (mapindex < 0 || mapindex >= index->nlon*index->nlat) {
      printf("invalid mapindex %d %d\n", i, mapindex);
      exit(EXIT_FAILURE);
    }
    int len = getdrain(drainfile, destcell+mapindex, i, headername, version, swap, &iserror);
    if (iserror) {
      fprintf(stderr,"Error reading river route at %d.\n",i); 
      return EXIT_FAILURE;
    }
    printf("cell %5d %7.2f %6.2f dest %d ", i, x, y, destcell[mapindex]);
    if (-10 == destcell[mapindex]) {
      printf("FMS ocean cell\n");
      //dirx[mapindex] = diry[mapindex] = NC_FILL_SHORT; // already pre-set above
    } else if (-9 == destcell[mapindex]) {
      printf("DDM30 ocean cell inside LPJ/FMS land\n");
      //dirx[mapindex] = diry[mapindex] = NC_FILL_SHORT; // already pre-set above
    } else if (-88 == destcell[mapindex]) {
      printf("LPJ land cell not covered by DDM30\n");
      dirx[mapindex] = diry[mapindex] = -88;
    } else if (-1 == destcell[mapindex]) {
      printf("DDM30 coast or inland sink\n");
      dirx[mapindex] = diry[mapindex] = 0;
    } else if (0 <= destcell[mapindex] && destcell[mapindex] < ngrid) {
      float destx = (float)grid[destcell[mapindex]].lon;
      float desty = (float)grid[destcell[mapindex]].lat;
      printf("%7.2f %6.2f\n", destx, desty);
      // if dest cell has dest value -10 it is FMS ocean :-)
      // but its value might not be set yet, can check this only in a second iteration
      if      (x-res.lon == destx) { dirx[mapindex] = -1; }
      else if (x         == destx) { dirx[mapindex] = 0; }
      else if (x+res.lon == destx) { dirx[mapindex] = +1; }
      else { printf("Error: dest is not x-adjacent\n"); }
      if      (y-res.lat == desty) { diry[mapindex] = -1; }
      else if (y         == desty) { diry[mapindex] = 0; }
      else if (y+res.lat == desty) { diry[mapindex] = +1; }
      else { printf("Error: dest is not y-adjacent!\n"); }
      routeindex[0]++;
    } else printf("ERROR invalid dest!\n");
  } // loop over cells
  printf("found %d outflow routes\n", (int)(routeindex[0]));
  if (0 == (int)(routeindex[0])) {
    fprintf(stderr, "Error: There are no/keine/nada/zero cells from which water flows out in this map.!\nPlease check your input file.\n");
    exit(1);
  }

  // Another pass over cells:
  // Increase count of cells draining into the destination cell.
  // We must trace the drainage route from the current cell to the final sink.
  int pathmax = 0;
  int pathlen = 0; // total length of path, to avoid loops
  int dcountmax = 0; // just for curiosity
  for (i=0; i<ngrid; i++) {
    int mapindex = index->index[i]; // position in output arrays
    if (mapindex < 0 || mapindex >= index->nlon*index->nlat) {
      printf("invalid mapindex %d %d\n", i, mapindex);
      exit(EXIT_FAILURE);
    }
    if (dcount[mapindex] != NC_FILL_INT)
      continue; // We have been here before. No sense to add 0 all along downstream path
    dcount[mapindex] = 0; // start a new path
    int pathadd = 1; // how much to add to new cells in the path
    int pathincr = 1; // increment from cell cell, might be set to 0 below
    pathlen = 1;
    int desti = destcell[mapindex];
    while (0 <= desti && desti < ngrid) {
      int dstmapindex = index->index[desti];
      if (dcount[dstmapindex] == NC_FILL_INT) dcount[dstmapindex] = pathadd;
      else {
        dcount[dstmapindex] += pathadd;
        // We have been here before. Stop increasing pathadd,
        // in order to not count downstream cells multiple times.
        pathincr = 0;
      }
      if (dcount[dstmapindex] > dcountmax) dcountmax = dcount[dstmapindex]; // just for curiosity
      mapindex = dstmapindex;
      desti = destcell[mapindex];
      pathadd += pathincr;
      if (++pathlen > 1000) { printf("endless loop?\n"); break; }
    }
    if (pathlen > pathmax) pathmax = pathlen;
    if (dcount[mapindex] > dcountmax) dcountmax = dcount[mapindex];
  }
  printf("longest flow path %d cells, max dcount %d\n", pathmax, dcountmax);

  // Third pass over cells:
  // Assign basin numbers.
  // Basins with size 0, i.e. cells without outflow and without inflow get 0.
  // All non-trivial basins get a unique number.
  // TODO: possible improvement to reduce the number of needed iterations of the while loop:
  //       use recursive search along the _inflow_ paths of each cell.
  int changed = 0; // Termination flag. If anything was changed, we need another loop over the cells.
  do {
    changed = 0;
    for (i=0; i<ngrid; i++) {
      int mapindex = index->index[i]; // position in output arrays
      //if (basin[mapindex] != NC_FILL_INT)
      //  continue; // this cell is part of an already-seen basin
      // However, we might find that it is actually part of another basin,
      // thus we need to re-examine it.

      int desti = destcell[mapindex];
      if (basin[mapindex] == NC_FILL_INT && (desti < 0 || ngrid <= desti)) { // no outflow, might be a basin of size 0
        basin[mapindex] = 0;
        //printf("[%d] = 0\n", mapindex);
        changed++;
        continue;
      }
      if (basin[mapindex] == NC_FILL_INT)
        basin[mapindex] = ++maxbasin; // found a new cell with an outflow, i.e. a new basin
      pathlen = 0; // total length of path, to avoid loops
      while (0 <= desti && desti < ngrid) {
        int dstmapindex = index->index[desti];
        if (basin[dstmapindex] == NC_FILL_INT  // new cell found
            || basin[dstmapindex] == 0) {      // cell without outflow, but now we found an inflow
          basin[dstmapindex] = basin[mapindex];
          //printf("[%d] = [%d] = %d newcell\n", dstmapindex, mapindex, basin[mapindex]);
          changed++;
        } else if (basin[dstmapindex] == basin[mapindex]) {
          break; // apparently we have been along this path before, no more changes to do downstream
        } else {
          // Found inflow into a previously found different basin.
          // Propagate the lower basin number.
          if (basin[dstmapindex] < basin[mapindex])
            basin[mapindex] = basin[dstmapindex];
          else
            basin[dstmapindex] = basin[mapindex];
          //printf("[%d] = [%d] = %d propagate\n", dstmapindex, mapindex, basin[mapindex]);
          changed++;
        }
        mapindex = dstmapindex;
        desti = destcell[mapindex];
        if (pathlen++ > pathmax) { printf("endless loop?\n"); break; }
      } // downstream while loop
    } // loop over cells
    printf("basin loop: %d changed\n", changed);
  } while (changed > 0);
  printf("max basin number %d\n", maxbasin);


  for (j = 0; j < index->nlat-1; j++) {
    for (i = 0; i < index->nlon-1; i++) {
      if (basin[i+j*index->nlon] > 0 && basin[i+1+j*index->nlon] > 0 && basin[i+j*index->nlon] != basin[i+1+j*index->nlon]) {
        // found longitudinal border
        basinboundscount++;
      }
      if (basin[i+j*index->nlon] > 0 && basin[i+(j+1)*index->nlon] > 0 && basin[i+j*index->nlon] != basin[i+(j+1)*index->nlon]) {
        // found meridional border
        basinboundscount++;
      }
    }
  }
  printf("found %d basin bounds\n", basinboundscount);

  // now we have all the information to create NetCDF file
  cdf = create_cdf(argv[3],argv[2], (float)res.lon, (float)res.lat, index, routeindex[0], basinboundscount);
  if (NULL == cdf) return EXIT_FAILURE;


  rc = nc_put_var_int(cdf->ncid, cdf->varid, destcell);
  ncerror(rc);
  rc = nc_put_var_short(cdf->ncid, cdf->vardirxid, dirx);
  ncerror(rc);
  rc = nc_put_var_short(cdf->ncid, cdf->vardiryid, diry);
  ncerror(rc);
  rc = nc_put_var_int(cdf->ncid, cdf->dcount_var_id, dcount);
  ncerror(rc);
  rc = nc_put_var_int(cdf->ncid, cdf->basin_var_id, basin);
  ncerror(rc);

  // loop over cells and actually write out the outflow routes
  routeindex[0] = 0;
  for (i=0; i<ngrid; i++) {
    float x = (float)grid[i].lon;
    float y = (float)grid[i].lat;
    int mapindex = cdf->index->index[i]; // position in output arrays     
    if (0 <= destcell[mapindex] && destcell[mapindex] < ngrid) {
      float destx = (float)grid[destcell[mapindex]].lon;
      float desty = (float)grid[destcell[mapindex]].lat;
      routeindex[1] = 0;
      rc = nc_put_var1_float(cdf->ncid, cdf->varroutexid, routeindex, &x);
      ncerror(rc);
      rc = nc_put_var1_float(cdf->ncid, cdf->varrouteyid, routeindex, &y);
      ncerror(rc);
      routeindex[1] = 1;
      rc = nc_put_var1_float(cdf->ncid, cdf->varroutexid, routeindex, &destx);
      ncerror(rc);
      rc = nc_put_var1_float(cdf->ncid, cdf->varrouteyid, routeindex, &desty);
      ncerror(rc);
      // leave 3rd entry as missing_value
      //routeindex[1] = 2;
      //rc = nc_put_var1_float(cdf->ncid, cdf->varroutexid, routeindex, &missfloat);
      //error(rc);
      //rc = nc_put_var1_float(cdf->ncid, cdf->varrouteyid, routeindex, &missfloat);
      //error(rc);
      routeindex[0]++;
    }
  } // loop over cells

  routeindex[0] = 0; // recycle routeindex for the basin borders
  for (j = 0; j < index->nlat-1; j++) {
    for (i=0; i < index->nlon-1; i++) {
      //int mapindex = i+j*index->nlon;
      if (basin[i+j*index->nlon] > 0 && basin[i+1+j*index->nlon] > 0 && basin[i+j*index->nlon] != basin[i+1+j*index->nlon]) {
        float x, y1, y2;
        x = (float)(index->lon_min + i*res.lon + 0.5*res.lon); // E edge
        y1 = (float)(index->lat_min + j*res.lat + 0.5*res.lat); // N edge
        y2 = (float)(index->lat_min + j*res.lat - 0.5*res.lat); // S edge
        routeindex[1] = 0;
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundx_var_id, routeindex, &x);
        ncerror(rc);
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundy_var_id, routeindex, &y1);
        ncerror(rc);
        routeindex[1] = 1;
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundx_var_id, routeindex, &x);
        ncerror(rc);
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundy_var_id, routeindex, &y2);
        ncerror(rc);
        // leave 3rd entry as missing_value
        //routeindex[1] = 2;
        //rc = nc_put_var1_float(cdf->ncid, cdf->basinboundx_var_id, routeindex, &missfloat);
        //error(rc);
        //rc = nc_put_var1_float(cdf->ncid, cdf->basinboundy_var_id, routeindex, &missfloat);
        //error(rc);
        routeindex[0]++;
      }
      if (basin[i+j*index->nlon] > 0 && basin[i+(j+1)*index->nlon] > 0 && basin[i+j*index->nlon] != basin[i+(j+1)*index->nlon]) {
        // found meridional border
        float x1, x2, y;
        x1 = (float)(index->lon_min + i*res.lon + 0.5*res.lon); // E edge
        x2 = (float)(index->lon_min + i*res.lon - 0.5*res.lon); // W edge
        y = (float)(index->lat_min + j*res.lat + 0.5*res.lat); // S edge
        routeindex[1] = 0;
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundx_var_id, routeindex, &x1);
        ncerror(rc);
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundy_var_id, routeindex, &y);
        ncerror(rc);
        routeindex[1] = 1;
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundx_var_id, routeindex, &x2);
        ncerror(rc);
        rc = nc_put_var1_float(cdf->ncid, cdf->basinboundy_var_id, routeindex, &y);
        ncerror(rc);
        // leave 3rd entry as missing_value
        //routeindex[1] = 2;
        //rc = nc_put_var1_float(cdf->ncid, cdf->basinboundx_var_id, routeindex, &missfloat);
        //error(rc);
        //rc = nc_put_var1_float(cdf->ncid, cdf->basinboundy_var_id, routeindex, &missfloat);
        //error(rc);
        routeindex[0]++;
      }
    }
  }

  close_cdf(cdf);
  fclose(drainfile);
  // the main purpose of these free() invocations is to catch out-of-bound errors in the preceding code
  free(grid);
  free(destcell);
  free(dcount);
  free(basin);
  free(dirx);
  free(diry);
  free(index->index);
  free(index);
  return EXIT_SUCCESS;
} /* of 'main' */
