/**
 * \file lpjbin2netcdf.c
 *
 * $Id$
 *
 * Convert a binary LPJ data file to netcdf format.
 * Needs an LPJ grid description file that matches the data file.
 * Currently handles soil description files and lake fraction files.
 * It is assumed here that the soil description / lake fraction file has less than 255 soil types,
 * i.e. the data for each cell is stored in exactly one byte.
 * In that case, LPJ uses soil description / lake fraction files without header.
 *
 * TODO: handle more LPJ data types.
 */

#include "lpj.h"
#include "netcdf.h"


static char *myname;

static void myusage(char *m) {
  if (m) fprintf(stderr, "%s\n", m);
  fprintf(stderr, "usage: %s [-r dlonxdlat] [-t {soil|lake}] lpj_grid_file lpj_bin_file netcdf_file\n"
          "reads an LPJ grid description and an LPJ binary data file, and writes the data in NetCDF format (CF-1.0, regular lat-lon grid)\n",
          myname);
  exit(EXIT_FAILURE);
}


#define ERR(e) { if (e) {printf("Error: %s\n", nc_strerror(e)); return 2;} }



int main(int argc, char **argv)
{
  Filename filename;
  Coord *grid;
  Coord resolution;
  Coordfile file_coord;
  int i, j, err;
  Bool lpjerr;
  size_t ferr;
  unsigned char *bytedata, *griddedbytedata;

  int nlons, nlats, ngrid;
  double *lons, *lats, /* cell centers */
    *blons, *blats; /* cell boundaries */
  double *celllons, *celllats; /* lons and lats for each cell extracted from Coord */

  int *lpj_to_regular_indices;
  int *regular_to_lpj_indices;
  enum filetypes {
    FT_UNKNOWN = 0,
    FT_SOILCODE = 1,
    FT_LAKEFRAC = 2
  } filetype = FT_UNKNOWN;
  
  struct filetypename {
    char *name;
    enum filetypes type;
  } *filetypep, filetypenames[] = {
    { "soil",     FT_SOILCODE },
    { "soilcode", FT_SOILCODE },
    { "lake",     FT_LAKEFRAC },
    { "lakefrac", FT_LAKEFRAC },
    { NULL,       FT_UNKNOWN }
  };

  resolution.lat = 0.5; /* hardcoded here for 720x360 grid */
  resolution.lon = 0.5;

  myname = argv[0];

  /* parse options */
  for (argv++, argc--; argc > 1; argc--, argv++) {
    if ('-' == argv[0][0]) { /* found an option */
      switch(argv[0][1]) {
      case 'r': /* -r dlatxdlon */
        if (argc < 2) { myusage("not enough parameters, option -r needs dlonxdlat"); }
        if (sscanf(argv[1], "%lfx%lf", &(resolution.lon), &(resolution.lat)) != 2) { myusage("option -r needs dlonxdlat"); }
        argc--, argv++;
        break;
      case 't': /* -t type ; type = {soil,lakes} [TODO: more types] */
        if (argc < 2) { myusage("not enough parameters, option -t needs type (soilcode or lakefrac)"); }
        for (filetypep = filetypenames; filetypep->name; filetypep++) {
          if (!strcmp(filetypep->name, argv[1])) { filetype = filetypep->type; break; }
        }
        if (FT_UNKNOWN == filetype) { myusage("unknown type specification for option -r (need soilcode or lakefrac)"); }
        argc--, argv++;
        break;
      default:
        myusage("unknown option");
        break;
      }
    }  else {
      break;
    }
  }
  
  if (FT_UNKNOWN == filetype) { myusage("unknown type specification - need option -t"); }

  if (argc != 3) myusage("need three file names");

  filename.fmt = CLM;
  filename.name = argv[1];
  file_coord = opencoord(&filename);
  if (NULL == file_coord) { perror(argv[1]); myusage("cannot open LPJ grid description file"); }
  ngrid = numcoord(file_coord);
  printf("LPJ grid description has %d cells\n", ngrid);

  grid = newvec(Coord, ngrid);
  if (NULL == grid) { perror("cannot allocate coordinates array"); exit(1); }

  celllons = newvec(double, ngrid);
  celllats = newvec(double, ngrid);
  for (i = 0; i < ngrid; i++) {
    lpjerr = readcoord(file_coord, grid+i, resolution);
    if (lpjerr) { perror(argv[0]); fprintf(stderr, "cannot read coordinates nr %d from %s\n", i, argv[0]); exit(1); }
    celllons[i] = grid[i].lon;
    celllats[i] = grid[i].lat;
  }

  /* now compute axis information for regular NetCDF grid */
  nlons = 360.0 / resolution.lat; /* implicit truncation to int here */
  nlats = 180.0 / resolution.lon; /* implicit truncation to int here */

  lons = newvec(double, nlons);
  lats = newvec(double, nlats);
  blons = newvec(double, nlons+1);
  blats = newvec(double, nlats+1);
  /* LPJ ususally uses longitudes -180..180 */
  for (i = 0; i < nlons; i++) { lons[i] = i*resolution.lon - 180.0 + (resolution.lon/2.0); }
  for (i = 0; i < nlons+1; i++) { blons[i] = i*resolution.lon - 180.0; }
  for (j = 0; j < nlats; j++) { lats[j] = j*resolution.lat - 90.0 + (resolution.lat/2.0); }
  for (j = 0; j < nlats+1; j++) { blats[j] = j*resolution.lat - 180.0; }

  /* set up LPJ to regular grid mapping */
  lpj_to_regular_indices = newvec(int, ngrid);
  regular_to_lpj_indices = newvec(int, nlons*nlats);
  for (i = 0; i < nlons*nlats; i++) regular_to_lpj_indices[i] = -1;
  for (i = 0; i < ngrid; i++) {
    lpj_to_regular_indices[i] = (int)((grid[i].lon - lons[0]) / resolution.lon + 0.5)
      + (int)((grid[i].lat - lats[0]) / resolution.lat + 0.5) * nlons;
    if (lpj_to_regular_indices[i] < 0 || lpj_to_regular_indices[i] >= nlons*nlats) {
      fprintf(stderr, "Error: Invalid LPJCell-to-regular index %d for Cell %d at %g,%g\n",
              lpj_to_regular_indices[i], i, grid[i].lon, grid[i].lat);
      fflush(stdout); fflush(stderr);
      abort();
    }
    // check that each entry in lpj_to_regular_indices[] is unique
    if (-1 != regular_to_lpj_indices[lpj_to_regular_indices[i]]) {
      fprintf(stderr, "Error: duplicate LPJCell-to-regular index %d for Cell %d at %g,%g\n",
              lpj_to_regular_indices[i], i, grid[i].lon, grid[i].lat);
      fflush(stdout); fflush(stderr);
      abort();
    }
    regular_to_lpj_indices[lpj_to_regular_indices[i]] = i;
  }

  switch(filetype) {
  case FT_SOILCODE:
  case FT_LAKEFRAC:
    {
      /*Header header*/
      FILE * datafile;
      /*Bool swap_data;*/
      /*int nsoil = 14;*/
      long datafilelen;

      datafile = fopen(argv[1], "r");
      if (NULL == datafile) { perror(argv[1]); myusage("cannot open LPJ data description file"); }
      fseek(datafile, 0, SEEK_END);
      datafilelen = ftell(datafile);
      if (datafilelen != ngrid) {
        /* TODO: handle data files with 2-byte ints */
        fprintf(stderr, "LPJ grid description has %d cells, but data file %s has %ld bytes\n",
                ngrid, argv[1], datafilelen);
        fflush(stdout); fflush(stderr);
        exit(1);
      }

      fseek(datafile, 0, SEEK_SET);
      bytedata = (unsigned char *)malloc(sizeof(unsigned char) * (ngrid));
      ferr = fread(bytedata, 1, ngrid, datafile);
      if (ferr != ngrid) { perror("error reading LPJ data"); exit(1); }
    
      griddedbytedata = (unsigned char *)malloc(sizeof(unsigned char) * (nlons*nlats));
      for (i = 0; i < nlons*nlats; i++) griddedbytedata[i] = 255;  /* missing value */
      for (i = 0; i < ngrid; i++) griddedbytedata[lpj_to_regular_indices[i]] = bytedata[i];
      break;
    }
  }

  {
    int ncid;
    int xdimid, xbdimid, ydimid, ybdimid;
    int xvarid, xbvarid, yvarid, ybvarid, datavarid;
#ifdef DO_COMPRESSED_GRID
    /* storing the cell data in compressed scheme, according to CF-1.0 section 5, using attribute coordinates */
    /* this yields a nice netcdf file, but neither ferret nor ncview make use of the coordinates attribute */
    /* also, compression by gathering is not supported in those tools */
    int celldimid, celllondimid, celllatdimid;
    int celllonvarid, celllatvarid, datacellvarid;
#endif
    unsigned char fillbyte = 255;
    int dimids[4];
    float scale_factor, add_offset;

    err = nc_create(argv[2], NC_CLOBBER, &ncid);
    if (err) { fprintf(stderr, "nc_open(%s): %s\n", argv[2], nc_strerror(err)); exit(EXIT_FAILURE); }

    err = nc_put_att_text(ncid, NC_GLOBAL, "Conventions", strlen("CF-1.0"), "CF-1.0");

    err = nc_def_dim(ncid, "lon", nlons, &xdimid);
    err = nc_def_dim(ncid, "lat", nlats, &ydimid);
    err = nc_def_dim(ncid, "lonb", nlons+1, &xbdimid);
    err = nc_def_dim(ncid, "latb", nlats+1, &ybdimid);
#ifdef DO_COMPRESSED_GRID
    err = nc_def_dim(ncid, "cell", file_coord->n, &celldimid);
    err = nc_def_dim(ncid, "celllon", file_coord->n, &celllondimid);
    err = nc_def_dim(ncid, "celllat", file_coord->n, &celllatdimid);
#endif
    err = nc_def_var(ncid, "lon", NC_FLOAT, 1, &xdimid, &xvarid);
    err = nc_put_att_text(ncid, xvarid, "units", strlen("degrees_east"), "degrees_east");
    err = nc_put_att_text(ncid, xvarid, "edges", strlen("lonb"), "lonb");
    err = nc_put_att_text(ncid, xvarid, "cartesian_axis", strlen("X"), "X");

    err = nc_def_var(ncid, "lat", NC_FLOAT, 1, &ydimid, &yvarid);
    err = nc_put_att_text(ncid, yvarid, "edges", strlen("latb"), "latb");
    err = nc_put_att_text(ncid, yvarid, "units", strlen("degrees_north"), "degrees_north");
    err = nc_put_att_text(ncid, yvarid, "cartesian_axis", strlen("Y"), "Y");

    err = nc_def_var(ncid, "lonb", NC_FLOAT, 1, &xbdimid, &xbvarid);
    err = nc_put_att_text(ncid, xbvarid, "units", strlen("degrees_east"), "degrees_east");
    err = nc_put_att_text(ncid, xbvarid, "cartesian_axis", strlen("X"), "X");

    err = nc_def_var(ncid, "latb", NC_FLOAT, 1, &ybdimid, &ybvarid);
    err = nc_put_att_text(ncid, ybvarid, "units", strlen("degrees_north"), "degrees_north");
    err = nc_put_att_text(ncid, ybvarid, "cartesian_axis", strlen("Y"), "Y");
#ifdef DO_COMPRESSED_GRID
    err = nc_def_var(ncid, "celllon", NC_FLOAT, 1, &celldimid, &celllonvarid);
    err = nc_put_att_text(ncid, celllonvarid, "units", strlen("degrees_east"), "degrees_east");
    err = nc_put_att_text(ncid, celllonvarid, "cartesian_axis", strlen("X"), "X");
    err = nc_put_att_text(ncid, celllonvarid, "long_name", strlen("cell longitude"), "cell longitude");

    err = nc_def_var(ncid, "celllat", NC_FLOAT, 1, &celldimid, &celllatvarid);
    err = nc_put_att_text(ncid, celllatvarid, "units", strlen("degrees_north"), "degrees_north");
    err = nc_put_att_text(ncid, celllatvarid, "cartesian_axis", strlen("Y"), "Y");
    err = nc_put_att_text(ncid, celllatvarid, "long_name", strlen("cell latitude"), "cell latitude");
#endif
    dimids[0] = ydimid; dimids[1] = xdimid;
    switch(filetype) {
    case FT_SOILCODE:
      err = nc_def_var(ncid, "soilcode", NC_BYTE, 2, dimids, &datavarid);
      scale_factor = 1.0; add_offset = 0.0;
      err = nc_put_att_float(ncid, datavarid, "scale_factor", NC_BYTE, 1, &scale_factor);
      err = nc_put_att_float(ncid, datavarid, "add_offset", NC_BYTE, 1, &add_offset);
      err = nc_put_att_text(ncid, datavarid, "units", strlen(""), "");
      err = nc_put_att_text(ncid, datavarid, "long_name", strlen("LPJ soil code [unitless number]"), "LPJ soil code [unitless number]");
      err = nc_put_att_uchar(ncid, datavarid, "_FillValue", NC_BYTE, 1, &fillbyte);
      ERR(err);
      err = nc_put_att_uchar(ncid, datavarid, "missing_value", NC_BYTE, 1, &fillbyte);
      ERR(err);
#ifdef DO_COMPRESSED_GRID
      dimids[0] = celldimid;
      err = nc_def_var(ncid, "soilcode_comp", NC_BYTE, 1, dimids, &datacellvarid);
      err = nc_put_att_float(ncid, datacellvarid, "scale_factor", NC_BYTE, 1, &scale_factor);
      err = nc_put_att_float(ncid, datacellvarid, "add_offset", NC_BYTE, 1, &add_offset);
      err = nc_put_att_text(ncid, datacellvarid, "units", strlen(""), "");
      err = nc_put_att_text(ncid, datacellvarid, "long_name", strlen("LPJ soil code [unitless number] compacted storage"), "LPJ soil code [unitless number] compacted storage");
      err = nc_put_att_text(ncid, datacellvarid, "coordinates", strlen("celllat celllon"), "celllat celllon");
      err = nc_put_att_uchar(ncid, datacellvarid, "_FillValue", NC_BYTE, 1, &fillbyte);
      ERR(err);
      err = nc_put_att_uchar(ncid, datacellvarid, "missing_value", NC_BYTE, 1, &fillbyte);
      ERR(err);
#endif
      break;
    case FT_LAKEFRAC:
      err = nc_def_var(ncid, "lakefrac", NC_BYTE, 2, dimids, &datavarid);
      scale_factor = 0.01; add_offset = 0.0;
      err = nc_put_att_float(ncid, datavarid, "scale_factor", NC_FLOAT, 1, &scale_factor);
      err = nc_put_att_float(ncid, datavarid, "add_offset", NC_FLOAT, 1, &add_offset);
      err = nc_put_att_text(ncid, datavarid, "units", strlen(""), "");
      err = nc_put_att_text(ncid, datavarid, "long_name", strlen("lake fraction [unitless number 0..1]"), "lake fraction [unitless number 0..1]");
      err = nc_put_att_uchar(ncid, datavarid, "_FillValue", NC_BYTE, 1, &fillbyte);
      ERR(err);
      err = nc_put_att_uchar(ncid, datavarid, "missing_value", NC_BYTE, 1, &fillbyte);
      ERR(err);
#ifdef DO_COMPRESSED_GRID
      dimids[0] = celldimid;
      err = nc_def_var(ncid, "soilcode_comp", NC_BYTE, 1, dimids, &datacellvarid);
      err = nc_put_att_float(ncid, datacellvarid, "scale_factor", NC_FLOAT, 1, &scale_factor);
      err = nc_put_att_float(ncid, datacellvarid, "add_offset", NC_FLOAT, 1, &add_offset);
      err = nc_put_att_text(ncid, datacellvarid, "units", strlen(""), "");
      err = nc_put_att_text(ncid, datacellvarid, "long_name", strlen("lake fraction [unitless number 0..1]"), "lake fraction [unitless number 0..1]");
      err = nc_put_att_text(ncid, datacellvarid, "coordinates", strlen("celllat celllon"), "celllat celllon");
      err = nc_put_att_uchar(ncid, datacellvarid, "_FillValue", NC_BYTE, 1, &fillbyte);
      ERR(err);
      err = nc_put_att_uchar(ncid, datacellvarid, "missing_value", NC_BYTE, 1, &fillbyte);
      ERR(err);
#endif
      break;
    default:
      myusage("dont know how to write NetCDF metadata for this LPJ data file type");
      break;
    }
    err = nc_enddef(ncid);

    err = nc_put_var_double(ncid, xvarid, lons);
    ERR(err);
    err = nc_put_var_double(ncid, yvarid, lats);
    ERR(err);
    err = nc_put_var_double(ncid, xbvarid, blons);
    ERR(err);
    err = nc_put_var_double(ncid, ybvarid, blats);
    ERR(err);
#ifdef DO_COMPRESSED_GRID
    err = nc_put_var_double(ncid, celllonvarid, celllons);
    ERR(err);
    err = nc_put_var_double(ncid, celllatvarid, celllats);
    ERR(err);
#endif
    switch(filetype) {
    case FT_SOILCODE:
    case FT_LAKEFRAC:
      err = nc_put_var_ubyte(ncid, datavarid, griddedbytedata);
      ERR(err);
#ifdef DO_COMPRESSED_GRID
      err = nc_put_var_ubyte(ncid, datacellvarid, bytedata);
      ERR(err);
#endif
      break;
    default:
      myusage("dont know how to write NetCDF data for this LPJ data file type");
      break;
    }
    err = nc_close(ncid);
    ERR(err);
  }


  return 0;
}
