Source code for pyunicorn.core.netcdf_dictionary

# This file is part of pyunicorn.
# Copyright (C) 2008--2024 Jonathan F. Donges and pyunicorn authors
# URL: <https://www.pik-potsdam.de/members/donges/software-2/software>
# License: BSD (3-clause)
#
# Please acknowledge and cite the use of this software and its authors
# when results are used in publications or published elsewhere.
#
# You can use the following reference:
# J.F. Donges, J. Heitzig, B. Beronov, M. Wiedermann, J. Runge, Q.-Y. Feng,
# L. Tupikina, V. Stolbova, R.V. Donner, N. Marwan, H.A. Dijkstra,
# and J. Kurths, "Unified functional network and nonlinear time series analysis
# for complex systems science: The pyunicorn package"

"""
Provides classes for saving and loading NetCDF files from and to
appropriate Python dictionaries, allowing NetCDF4 compression methods.
"""

#
#  Imports
#

import numpy as np

try:
    from h5netcdf.legacyapi import Dataset
except ImportError:
    try:
        from netCDF4 import Dataset
    except ImportError:
        print("pyunicorn: Packages netCDF4 or h5netcdf could not be loaded. "
              "Some functionality in class NetCDFDictionary might not be "
              "available!")


#
#  Define class NetCDF
#

[docs] class NetCDFDictionary: """ Encapsulates appropriate dictionary following NetCDF conventions. Also contains methods to load data from NetCDF and NetCDF4 files. """ # TODO: implement silence_level consistently
[docs] def __init__(self, data_dict=None, silence_level=0): """ Return a NetCDF object containing an appropriately structured dictionary. If no data_dict is given, a default quasi-empty dictionary is created. :type data_dict: dictionary :arg data_dict: Contains data in a structure following NetCDF conventions: {"global_attributes": {}, "dimensions": {}, "variables": {"obs": {"array": (), "dims": (), "attributes": ()}}} :type silence_level: int >= 0 :arg silence_level: The higher, the less progress info is output. """ if data_dict is None: data_dict = { "global_attributes": { "title": "Quasi-empty default dictionary"}, "dimensions": {"x": 1}, "variables": { "obs": {"array": np.array((1,)), "dims": ('x',), "attributes": {"long_name": "observable"}}}} self.dict = data_dict self.silence_level = silence_level """(int >= 0) The higher, the less progress info is output. """
[docs] def __str__(self): """ Return a string representation of the object. """ text = (f'NetCDFDictionary:\nGlobal attributes:\n' f'{self.dict["global_attributes"]}\nVariables:') for key in self.dict["variables"].keys(): text += (f'\n\t{key}\t-> array shape' f'{self.dict["variables"][key]["array"].shape}') return text
# # Define methods for NetCDF4 files via NetCDF4 module #
[docs] @staticmethod def from_file(file_name, with_array='all'): """ Load NetCDF4 file into a dictionary. Supported file types ``file_type`` are: - "NetCDF" - "NetCDF4" :arg str file_name: The name of the data file. :arg [str] with_array: Names of data arrays to be loaded completely. :rtype: NetCDF instance """ # Open NetCDF4 file try: cdf = Dataset(file_name, "r") print(f"MODULE: File {file_name} opened.") except RuntimeError: print(f"MODULE: File {file_name} couldn't be opened.") return None # Create empty dictionary structure content = {"global_attributes": {}, "dimensions": {}, "variables": {}} # Copy all global attributes and all dimensions content["global_attributes"] = cdf.__dict__ for dim_name, dim_obj in cdf.dimensions.iteritems(): content["dimensions"][dim_name] = len(cdf.dimensions[dim_name]) # Loop over variables for var in cdf.variables.keys(): # Create empty dictionary for variable var content["variables"][var] = {"array": {}, "type": {}, "dims": {}, "attributes": {}} # Copy type, dimensions and variable attributes content["variables"][var]["type"] = cdf.variables[var].dtype.char content["variables"][var]["dims"] = cdf.variables[var].dimensions content["variables"][var]["attributes"] = \ cdf.variables[var].__dict__ # Load data if wanted if var in with_array or with_array == 'all': try: content["variables"][var]["array"] = cdf.variables[var][:] print(f"MODULE: Array {var} loaded to dictionary.") except MemoryError: print(f"Memory Error during loading of array {var}") except RuntimeError: print(f"Other Error during loading of array {var}") try: content["variables"][var]["array"] = \ content["variables"][var]["array"].astype('float32') print(f"MODULE: Array {var} converted to 'float32'.") except MemoryError: print("MODULE: Memory Error during conversion of " f"array {var}.") except RuntimeError: print("MODULE: Other Error during conversion of " f"array {var}.") # If a scale_factor is given in the variable, rescale array if "scale_factor" in content["variables"][var]["attributes"]: content["variables"][var]["array"] *= \ cdf.variables[var].scale_factor del content["variables"][var]["attributes"]["scale_factor"] if "add_offset" in content["variables"][var]["attributes"]: content["variables"][var]["array"] += \ cdf.variables[var].add_offset del content["variables"][var]["attributes"]["add_offset"] # Recalculate actual_range ar_max = content["variables"][var]["array"].max() ar_min = content["variables"][var]["array"].min() content["variables"][var]["attributes"]["actual_range"] = \ np.array([ar_min, ar_max]) print("MODULE: Dictionary loaded from NetCDF file.") cdf.close() return NetCDFDictionary(content)
# FIXME: createDimension - length of time variable should be "unlimited" # TODO: Change file_name automatically if file already exists
[docs] def to_file(self, file_name, compress=False, comp_level=6, least_significant_digit=10): """ Write NetCDF4 file by using appropriate dictionary. :arg str file_name: The name of the data file. :arg bool compress: Determines whether the data should be compressed. :arg int comp_level: Level of compression, between 0 (no compression, fastest) and 9 (strongest compression, slowest). :arg int least_significant_digit: Last precise digit. """ # Check dictionary for empty entries for val in self.dict.keys(): if not self.dict[val]: print("MODULE: Entry {val} is empty.") print(f"MODULE: If {file_name} already existed, old file will be " "overwritten.") # Format can be: # NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4 cdf = Dataset(file_name, "w", format="NETCDF4") # Write global attributes for val in self.dict["global_attributes"]: setattr(cdf, val, self.dict["global_attributes"][val]) # Write dimensions with given lengths for val in self.dict["dimensions"]: if val == "time": cdf.createDimension(val, self.dict["dimensions"][val]) else: cdf.createDimension(val, self.dict["dimensions"][val]) # Write variables for var in self.dict["variables"]: # Check variable dictionary for empty entries for key in self.dict["variables"][var].keys(): if not self.dict["variables"][var][key] and key != "type": print(f"MODULE: Entry {key} in variable {var} is empty.") var_type = self.dict["variables"][var]["array"].dtype.char try: var_ = cdf.createVariable( var, var_type, self.dict["variables"][var]["dims"], zlib=compress, complevel=comp_level, least_significant_digit=least_significant_digit) except RuntimeError: print(f"MODULE: Couldn't create variable {var} " "in NetCDF file.") # Copy the array var_[:] = self.dict["variables"][var]["array"] # Calculate actual_range for variables ar_max = self.dict["variables"][var]["array"].max() ar_min = self.dict["variables"][var]["array"].min() self.dict["variables"][var]["attributes"]["actual_range"] = \ np.array([ar_min, ar_max]) # Write all variable attributes to dictionary for att in self.dict["variables"][var]["attributes"]: setattr(var_, att, self.dict["variables"][var]["attributes"][att]) cdf.close() print("MODULE: Dictionary saved as NetCDF file {file_name}.")