/*
 * watershed-to-lad.c
 *
 * PIK/S. Petri/April 2013
 *
 * Convert output from GRASS r.watershed into a format that can be used by
 * land_lad/soil/rivers.F90
 */


/* drainage
 * Output map: drainage direction. Provides the "aspect" for each cell
 * measured CCW from East. Multiplying positive values by 45 will give
 * the direction in degrees that the surface runoff will travel from that
 * cell. The value 0 (zero) indicates that the cell is a depression area
 * (defined by the depression input map). Negative values indicate that
 * surface runoff is leaving the boundaries of the current geographic
 * region. The absolute value of these negative cells indicates the
 * direction of flow.
 *
 *
 *  That gives:
 *  0  1  2  3  4  5  6  7  8
 *  -  NE N  NW W  SW S  SE E 
 *
 * This now needs to be converted into something that land_lad/soil/rivers.F90 can read:
 *
 * Read the GRASS-generated drainage map and the original land/sea mask.
 * For each land cell c, trace its outflow, until it reaches an ocean cell o.
 * Record that o as discharge destination cell for c.
 *
 * Possible problems: along the map edges, r.watershed places negative
 * drainage direction values to indicate that the drainage leaves the
 * region. Apparently, that means that r.watershed does not ``wrap'' the
 * world around the longitude?
 * Yes. And, more important, not across the poles.
 * That hits us with a bone crushing sound in Antarctica.
 *
 * Above suspicion turns out to be true. Moreover, tracing drainage across
 * the map edges results in circular flow in several places, especially in antarctica,
 * but also across the east/west boundary in some places.
 *
 * Approach:
 * Extend the original map, by appending the western half to the eastern edge,
 * the eastern half to the western edge,
 * south half rotated and mirrored to southern edge,
 * northern half rotated and mirrored to northern edge.
 * Ignore lat / lon axis labels.
 * Then import it into GRASS, run r.watershed on it, export, and run
 * the inverse procedure, which just cuts out the original area.
 *
 * In that cut-out area, the edges should now contain only positive values,
 * because r.watershed did not perceive them as edges in its work.
 * Thus we must check for over-the-edge wrap-around here.
 * 
 */

/* If projection and region were set correctly, the generated NetCDF file has
 * 256 columns longitudes 0.. < 360, and 128 rows.
 * If there are 257, then most probably the projection and/or region was not set
 * correctly, and the last column is either all-zeros or a duplicate of the first one.
 * In that case, it is just ignored here.
 * If there occurs any other number of rows, an error is raised.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netcdf.h>

static char *myname = NULL;
/*                        -  NE   N  NW   W  SW   S  SE   E */
static char *dname[] =  { "-","NE", "N","NW", "W","SW", "S","SE", "E"};
static int nextdrow[] = {   0,  +1,  +1,  +1,   0,  -1,  -1,  -1,  0 }; /* next drain j-row */
static int nextdcol[] = {   0,  +1,   0,  -1,  -1,  -1,   0,  +1, +1 }; /* next drain i-column */

static void
usage(char *myname, char *msg) {
  if (msg) fprintf(stderr, "%s\n", msg);
  fprintf(stderr, "usage: %s drainage-file drainage-var mask-file mask-var coord-file coord-var output-file\n",
          myname);
  exit(1);
}

static void
handle(int err) {
  if (err != NC_NOERR) {
    fprintf(stderr, "%s\n", nc_strerror(err));
    exit(1);
  }
}


int main(int argc, char **argv) {

  int i, j, err;
  int ncid, varid, londimid, latdimid, lonvarid, latvarid, ndims, *dimids = NULL;
  size_t nlons, nlats;
  char buf[NC_MAX_NAME];

  int *drainage_direction_raw = NULL; /* expanded map as read from file */
  int *drainage_direction = NULL; /* cut-out center, the original map region, on same coordinates as mask */
  double *lons = NULL, *lats = NULL;
  int *dest_map = NULL; /* for debugging: number of source points for this destination point */
  int *flow_count = NULL; /* for debugging: number of cells from which water flows through this cell */
  int *destrow = NULL, *destcol = NULL;

  int maskncid, maskvarid;
  size_t masknlons, masknlats;
  int *landmask = NULL;

  int coordncid, coordvarid;

  int samesize = 0;

  /* to avoid endless flow in circles */
  int maxpathlen = 0;

  FILE *outfile;

  myname = *argv;

  if (argc != 8) usage(myname, "need 7 parameters");

  err = nc_open(argv[1], NC_NOWRITE, &ncid); handle(err);
  err = nc_inq_varid (ncid, argv[2], &varid); handle(err);
  err = nc_inq_varndims(ncid, varid, &ndims); handle(err);
  printf("%s: variable %s has %d dimensions\n", argv[1], argv[2], ndims);
  if (2 != ndims) {
    fprintf(stderr, "Error: %s: variable %s: expected exact 2 dimensions, but got %d\n",
            argv[1], argv[2], ndims);
    exit(1);
  }
  dimids = (int *)malloc(ndims*sizeof(*dimids));
  err = nc_inq_vardimid(ncid, varid, dimids); handle(err);
  latdimid = dimids[0];
  londimid = dimids[1];
  free(dimids);
  dimids = NULL;

  err = nc_inq_dim(ncid, latdimid, buf, &nlats); handle(err);
  printf("dimension 0 name %s len %lu\n", buf, nlats);
  err = nc_inq_varid(ncid, buf, &latvarid); handle(err);
  lats = (double *)malloc(nlats*sizeof(*lats));
  err = nc_get_var_double(ncid, latvarid, lats); handle(err);
  for (i = 0; i < nlats; i++) printf("%g ", lats[i]); printf("\n");

  err = nc_inq_dim(ncid, londimid, buf, &nlons); handle(err);
  printf("dimension 1 name %s len %lu\n", buf, nlons);
  err = nc_inq_varid(ncid, buf, &lonvarid); handle(err);
  lons = (double *)malloc(nlons*sizeof(*lons));
  err = nc_get_var_double(ncid, lonvarid, lons); handle(err);
  for (i = 0; i < nlons; i++) printf("%g ", lons[i]); printf("\n");

  drainage_direction_raw = (int *)malloc(nlats*nlons*sizeof(*drainage_direction_raw));
  err = nc_get_var_int(ncid, varid, drainage_direction_raw); handle(err);

  for (j = 0; j < nlats; j++) {
    for (i = 0; i < nlons; i++) {
      int d = drainage_direction_raw[j*nlons+i];
      if (d != (int)d || d < -8 || d > 8) {
        fprintf(stderr, "Error: drainage_direction must consist of integers -8 to 8 only, but has %d at position %d, %d\n",
                d, i, j);
        exit(1);
      }
    }
  }

  
  err = nc_open(argv[3], NC_NOWRITE, &maskncid); handle(err);
  err = nc_inq_varid (maskncid, argv[4], &maskvarid); handle(err);
  err = nc_inq_varndims(maskncid, maskvarid, &ndims); handle(err);
  printf("%s: variable %s has %d dimensions\n", argv[3], argv[4], ndims);
  if (2 != ndims) {
    fprintf(stderr, "Error: %s: variable %s: expected exact 2 dimensions, but got %d\n",
            argv[3], argv[4], ndims);
    exit(1);
  }
  dimids = (int *)malloc(ndims*sizeof(*dimids));
  err = nc_inq_vardimid(maskncid, maskvarid, dimids); handle(err);


  err = nc_inq_dim(maskncid, dimids[0], buf, &masknlats); handle(err);
  printf("dimension 0 name %s len %lu\n", buf, masknlats);
  err = nc_inq_dim(maskncid, dimids[1], buf, &masknlons); handle(err);
  printf("dimension 1 name %s len %lu\n", buf, masknlons);

  maxpathlen = masknlons+masknlats; /* arbitrary value, should be sufficient for non-artificial worlds */

  if (nlats == masknlats && nlons == masknlons) {
    /* both are expanded maps */
    samesize = 1;
  } else {
    if (nlats != 2*masknlats) {
      fprintf(stderr, "number of latitudes of drainage_direction and land mask do not match: %lu != 2*%lu\n",
              nlats, masknlats);
      exit(1);
    }

    if (nlons == 2*masknlons+1) {
      /* examine last column. should be all zeros */
      i = nlons-1;
      for (j = 0; j < nlats; j++) {
        if (drainage_direction_raw[j*nlons+i] != 0) {
          fprintf(stderr, "drainage_direction[%d, %d] = %d != 0\n", i, j, drainage_direction_raw[i*nlons+j]);
          exit(1);
        }
      }
    } else if (nlons != 2*masknlons) {
      fprintf(stderr, "number of longitudes of drainage_direction and land mask do not match: %lu != 2*%lu\n",
              nlons, masknlons);
      exit(1);
    }
  }

  landmask = (int *)malloc(masknlats*masknlons*sizeof(*landmask));
  err = nc_get_var_int(maskncid, maskvarid, landmask); handle(err);

  for (j = 0; j < masknlats; j++) {
    for (i = 0; i < masknlons; i++) {
      if (landmask[j*masknlons+i] != 0 && landmask[j*masknlons+i] != 1) {
        fprintf(stderr, "Error: land mask must consist of 0 and 1 values only, but has %d at position %d, %d\n",
                landmask[j*masknlons+i], i, j);
        exit(1);
      }
    }
  }

  err = nc_open(argv[5], NC_NOWRITE, &coordncid); handle(err);
  err = nc_inq_varid (coordncid, argv[6], &coordvarid); handle(err);
  err = nc_inq_varndims(coordncid, coordvarid, &ndims); handle(err);
  printf("%s: variable %s has %d dimensions\n", argv[5], argv[6], ndims);



  /* so far, everything looks OK, now lets start the real work */

  if (samesize) {
    printf("drainage map and mask have same size\n");
    drainage_direction = drainage_direction_raw;
  } else {
    /* cut out original region from drainage_direction_raw */
    printf("cutting out original region from drainage map\n");
    drainage_direction = (int *)malloc(masknlats*masknlons*sizeof(*drainage_direction));
    for (j = 0; j < masknlats; j++) {
      for (i = 0; i < masknlons; i++) {
        drainage_direction[j*masknlons+i] = drainage_direction_raw[(j+masknlons/2)*nlons+i+masknlons/2];
      }
    }
    free(drainage_direction_raw);
  }
  drainage_direction_raw = NULL;
  nlons = -424242;
  nlats = -434343;

  /* scan edges of cut-out drainage-direction. off-map directions are most probably positive now */
  for (i = 0; i < masknlons; i++) {
    int d;
    d = drainage_direction[0*masknlons+i]; /* south edge */
    if (5 == d || 6 == d || 7 == d) { /* SW, S, SE */ drainage_direction[0*masknlons+i] = -d; }
    d = drainage_direction[(masknlats-1)*masknlons+i]; /* north edge */
    if (1 == d || 2 == d || 3 == d) { /* NE, N, NW */ drainage_direction[(masknlats-1)*masknlons+i] = -d; }
  }
  
  for (j = 0; j < masknlats; j++) {
    int d;
    d = drainage_direction[j*masknlons+0]; /* west edge */
    if (3 == d || 4 == d || 5 == d) { /* NW, W, SW */ drainage_direction[j*masknlons+0] = -d; }
    d = drainage_direction[j*masknlons+(masknlons-1)]; /* east edge */
    if (1 == d || 7 == d || 8 == d) { /* NE, SE, E */ drainage_direction[j*masknlons+(masknlons-1)] = -d; }
  }

  /* now lets report the problematic cells */
  for (j = 0; j < masknlats; j++) {
    for (i = 0; i < masknlons; i++) {
      int d = drainage_direction[j*masknlons+i];
      if (d < 0 && landmask[j*masknlons+i] > 0) {
        printf("drainage_direction off-map %d at position %d, %d\n", d, i, j);
      }
    }
  }

  dest_map = (int *)malloc(masknlats*masknlons*sizeof(*dest_map));
  memset(dest_map, '\0', masknlats*masknlons*sizeof(*dest_map));
  flow_count = (int *)malloc(masknlats*masknlons*sizeof(*flow_count));
  memset(flow_count, '\0', masknlats*masknlons*sizeof(*flow_count));
  destrow = (int *)malloc(masknlats*masknlons*sizeof(*destrow));
  destcol = (int *)malloc(masknlats*masknlons*sizeof(*destcol));

  for (j = 0; j < masknlats; j++) {
    for (i = 0; i < masknlons; i++) {
      int pathlen = 0;
      int in = i, jn = j;
      if (landmask[j*masknlons+i] == 0) continue; /* ocean cell - skip it */
      printf("%d,%d", i, j);
      while (landmask[jn*masknlons+in] > 0 && pathlen < maxpathlen) {
        int d = (int)drainage_direction[jn*masknlons+in];
        if (d >0) {
          int i2 = in+nextdcol[d];
          int j2 = jn+nextdrow[d];
          if (i2 < 0 || i2 >= masknlons || j2 < 0 || j2 >= masknlats) {
            fflush(stdout);
            fprintf(stderr, "Error: direction %d at location %d,%d points outside map to cell %d,%d\n",
                    d, in, jn, i2, j2);
            exit(1);
          }
          in = i2;
          jn = j2;
        } else if (d < 0) {
          in = in+nextdcol[-d];
          jn = jn+nextdrow[-d];
          /* wrap around map edges */
          if (in < 0) in = masknlons-1; /* over west edge */
          if (in >= masknlons) in = 0;  /* over east edge */
          if (jn < 0) { /* across south pole */
            in = (in + (masknlons/2)) % masknlons; /* find opposite cell in circle */
            jn = 0;                                /* but still at south pole */
          }
          if (jn >= masknlats) { /* across north pole */
            in = (in + (masknlons/2)) % masknlons; /* find opposite cell in circle */
            jn = masknlats-1;                      /* but still at north pole */
          }
          /* Here, we could check that the newly-found target cell does not point back
           * to where we are now.
           * However, such a check could not detect loops with over more than 2 cells.
           * Thus, we rely on the maxpathlen limit check.
           */
        } else { /* d == 0 - in-land sink cell */
          printf(" in-land sink\n");
          break;
        }
        flow_count[jn*masknlons+in]++;
        printf(" %c%s -> %d,%d", d<0?'-':'+', dname[abs(d)], in, jn);
        pathlen++;
      } /* while (landmask && pathlen) */
      if (pathlen == maxpathlen) {
        printf(" - endless loop?\n");
        fflush(stdout);
        fprintf(stderr, "Error: max pathlen exceeded for source cell at %d,%d\n", i, j);
        /*exit(1);*/
      }
      printf("\n");
      /* found target cell, either in-land or at ocean coast */
      dest_map[jn*masknlons+in]++;
      destrow[j*masknlons+i] = jn;
      destcol[j*masknlons+i] = in;
    }
  }

  outfile = fopen(argv[7], "w");
  if (NULL == outfile) perror(argv[5]);

  if (samesize) {
    /* now we need to cut out the map center to get back the original map */
    /* n_lon, n_lat, western boundary in degrees */
    printf("cut out at samesize\n");
    fprintf(outfile, "%3d, %3d, %6.3f\n", (int)masknlons/2, (int)masknlats/2, -360.0/(masknlons/2)/2.0);
    fprintf(outfile, "(20i4)\n");
    for (j = masknlats/4; j < masknlats*3/4; j++) {
      for (i = masknlons/4; i < masknlons*3/4; i++) {
        int di = destcol[j*masknlons+i] - masknlons/4;
        int dj = destrow[j*masknlons+i] - masknlats/4;
        printf("%3d,%3d -> %3d,%3d / %3d,%3d -> %3d,%3d",
               i, j, destcol[j*masknlons+i], destrow[j*masknlons+i],
               i-(int)masknlons/4, j-(int)masknlats/4, di, dj);
        if        (di < 0           && dj < 0)           { // beyond lower left corner
          di += masknlons/2;
          dj = -dj;
        } else if (di < 0           && dj > masknlats/2) { // beyond upper left corner
          di += masknlons/2;
          dj = masknlats/2 - dj;
        } else if (di > masknlons/2 && dj < 0)           { // beyond lower right corner
        } else if (di > masknlons/2 && dj > masknlats/2) { // beyond upper right corner
        } else if (di < 0)                               { // beyond left edge
          di += masknlons/2;
        } else if (di > masknlons/2)                     { // beyond right edge
          di -= masknlons/2;
        } else if (dj < 0)                               { // beyond lower edge
          dj = -dj;
        } else if (dj > masknlats/2)                     { // beyond upper edge
          dj = masknlats/2 - dj;
        } else                                           { // already in center
          // NOP
        }
        printf(" -> %3d,%3d\n", di, dj);
        destcol[j*masknlons+i] = di;
        destrow[j*masknlons+i] = dj;
      }
    }
    
    for (j = masknlats/4; j < masknlats*3/4; j++) {
      int outcol = 0;
      for (i = masknlons/4; i < masknlons*3/4; i++) {
        fprintf(outfile, "%4d", destcol[j*masknlons+i]+1); /* plus one because rivers.F90 expects Fortran array indices */
        if (20 == ++outcol) { fprintf(outfile, "\n"); outcol = 0; }
      }
      fprintf(outfile, "\n");
    }
    for (j = masknlats/4; j < masknlats*3/4; j++) {
      int outcol = 0;
      for (i = masknlons/4; i < masknlons*3/4; i++) {
        fprintf(outfile, "%4d", destrow[j*masknlons+i]+1); /* plus one because rivers.F90 expects Fortran array indices */
        if (20 == ++outcol) { fprintf(outfile, "\n"); outcol = 0; }
      }
      fprintf(outfile, "\n");
    }
    
  } else {
    /* n_lon, n_lat, western boundary in degrees */
    fprintf(outfile, "%3d, %3d, %6.3f\n", (int)masknlons, (int)masknlats, -360.0/masknlons/2.0);
    fprintf(outfile, "(20i4)\n");
    for (j = 0; j < masknlats; j++) {
      int outcol = 0;
      for (i = 0; i < masknlons; i++) {
        fprintf(outfile, "%4d", destcol[j*masknlons+i]);
        if (20 == ++outcol) { fprintf(outfile, "\n"); outcol = 0; }
      }
      fprintf(outfile, "\n");
    }
    for (j = 0; j < masknlats; j++) {
      int outcol = 0;
      for (i = 0; i < masknlons; i++) {
        fprintf(outfile, "%4d", destrow[j*masknlons+i]);
        if (20 == ++outcol) { fprintf(outfile, "\n"); outcol = 0; }
      }
      fprintf(outfile, "\n");
    }
  }


  printf("done\n");

  return 0;
}
