r3leaf-earth · mueller-fr · Feb 20, 2025 · Feb 21, 2025 · Feb 22, 2025 · Feb 25, 2025
diff --git a/README.md b/README.md
@@ -1,11 +1,14 @@
 # read-netcdf
-This is a parser for netcdf-files storing climate data for Europe and Germany. It currently supports reading files from 5 datasets (4 are actually netCDF):
+This is a parser for files storing climate data for Europe and Germany. It currently supports reading files from 6 datasets (5 are actually netCDF, one fixed width text file). Datasources are Copernicus CDS (Climate Data Store) and the DWD (Deutscher Wetterdienst).
 
+Copernicus CDS:
   * Regional Climate Projections from **CORDEX** climate models regarding the variable 'tas' (average daily temperature) for Europe
   * a **Heatwaves** and Cold Spells dataset for Europe (climate projections)
   * a **Temperature Statistics** dataset for Europe (climate projections)
+  * a **Climate Indicators** dataset for Europe (reanalysis and climate projections) regarding precipitation variables
+DWD:
   * **HOSTRADA**, historical data (until present, still updated) for Germany
-  * TRY for Germany with a present and future representative year
+  * TRY for Germany with a present and future representative year (fixed width textfiles)
 
 The scripts named `read_...` reads from netCDF or other data files, which you have to download from climate data stores. See the section on [prerequisites](#Prerequisites) below for datasets from Copernicus Climate Data Store. NOTE, that they write to outfiles and usually append to them! This is useful for collecting data from monthly datafiles into one outfile containing the data for a whole year.
 
@@ -29,7 +32,7 @@ The "Deutscher Wetterdienst" has some good [advice about using climate projectio
 3. Climate Projection computations are not forecasts
 4. ...
 
-## How to Run the Script
+## How to Run the Script 'read_cordex_tas.py'
 ### Prerequisites:
 * you need an .nc-file, from the CORDEX. You can download one at Copernicus' Climate Data Store
    * create a free account. Logged in, make selections in the form below (link)
@@ -49,7 +52,7 @@ The "Deutscher Wetterdienst" has some good [advice about using climate projectio
 ## What's up with that File 'variables_values_info.md'?
 It contains some information about the structure of the data in the .nc file.
 You can reproduce the outputs (or get different ones) with the steps below, alternatively check out 
-'explore_netCDF_dataset.py'.
+'explore_netCDF_dataset.py' for an interactive session.
 
 Since CORDEX-data have a standardized structure, you might get the same responses from your .nc-file.
 
@@ -77,7 +80,7 @@ python
 ```
 
 ## Contributing
-We appreciate your contributions! In fact, we decided to open-source this simple script mainly to connect with others working on similar topics. Leave us a note in Discussions!
+We appreciate your contributions! In fact, we decided to open-source these simple scripts mainly to connect with others working on similar topics. Leave us a note in Discussions!
 
 ### How to Contribute to the Code
 Just open a PR or an Issue.

diff --git a/netcdf_dataset_commons.py b/netcdf_dataset_commons.py
@@ -0,0 +1,80 @@
+import netCDF4
+
+
+def get_index_of_closest_entry(entries, value):
+    # Compute the squared distances (fast approximation)
+    distances = (entries - value) ** 2
+    # return index of min entry
+    return distances.argmin()
+
+
+class DerivedDataset:
+
+    def __init__(self, path_to_file, main_variable):
+        # PREPARE THE DATASET
+        dataset = netCDF4.Dataset(path_to_file, 'r')
+
+        # CHECK the main variable
+        dataset_variables = dataset.variables.keys()
+        if main_variable not in dataset_variables:
+            print("Your main variable must match the dataset variables: ",
+                  dataset_variables)
+            print("Your current main variable:", main_variable)
+            print("ABORTING... Please run again with the correct variable appended.")
+            exit()
+
+        # reading locations from the dataset
+        self.lats = dataset.variables['lat'][:]
+        self.lons = dataset.variables['lon'][:]
+
+        self.dataset_times = dataset.variables['time']
+        self.dataset_variable = dataset.variables[main_variable][:]
+        self.dataset_main_unit = dataset.variables[main_variable].units
+
+
+    # a method to find the index of the grid point closest to the desired location
+    def get_closest_gridpoints_indices(self, desired_lat, desired_lon):
+
+        lat_index = get_index_of_closest_entry(self.lats, desired_lat)
+        lon_index = get_index_of_closest_entry(self.lons, desired_lon)
+
+        return lat_index, lon_index
+
+
+    def get_coordinates_from_indices(self, location_coordinates):
+        return self.lats[location_coordinates[0]], self.lons[location_coordinates[1]]
+
+
+    # wrapper method to use for more than one location at once
+    def find_closests_grid_points_indices(self, desired_locations):
+
+        # collecting the indices on the grid closest to each desired location
+        closest_grid_points_indices = []
+
+        for location in desired_locations:
+            grid_point_for_location = self.get_closest_gridpoints_indices(location[0], location[1])
+            closest_grid_points_indices.append(grid_point_for_location)
+
+        return closest_grid_points_indices
+
+    # wrapper function to use for more than one tuple of indices at once
+    def get_many_pretty_coordinates(self, index_tuple_list, decimal_places=1):
+        pretty_coordinates = []
+        for indexlat_indexlon in index_tuple_list:
+            lat, lon = self.get_coordinates_from_indices(indexlat_indexlon)
+            rounded_coordinates = lat.round(decimal_places), lon.round(decimal_places)
+            pretty_coordinates.append(rounded_coordinates)
+        return pretty_coordinates
+
+    def get_pretty_times(self):
+        ds_times = self.dataset_times
+        times_as_dates = netCDF4.num2date(ds_times[:], ds_times.units, ds_times.calendar)
+        return times_as_dates
+
+    def get_pretty_data(self, grid_index):
+        values = self.dataset_variable
+        values_for_location_many_decimal_places = values[:, grid_index[0], grid_index[1]].compressed()
+        return values_for_location_many_decimal_places.round(1)
+
+    def get_dataset_main_unit(self):
+        return self.dataset_main_unit
diff --git a/read_cds_cii_netcdf_res_point25.py b/read_cds_cii_netcdf_res_point25.py
@@ -0,0 +1,145 @@
+# READ PRECIPITATION FROM CII for Europe derived from Reanalysis and Projections NETCDF ##################
+#
+# Works for one give location and the hardcoded variable "prAdjust". If your datafile has another variable, please change it.
+#
+# Summary:
+#   * finds closest grid points to the location
+#   * reads a netCDF-file into a netCDF4.dataset, reads location from the commandline
+# 	* finds the closest grid point for the lat-lon coordinates from a given heatwaves-netCDF file
+#   * uses hardcoded variable name"prAdjust"
+#   * converts the time points from the file to python datetimes
+#   * collects the values into a file and
+#   * prints some output to stdout
+#
+# Dataset:
+#   Climate indicators for Europe from 1940 to 2100 derived from reanalysis and climate projections
+#   https://cds.climate.copernicus.eu/datasets/sis-ecde-climate-indicators/?tab=download
+#
+# Output:
+#     Converting file to dataset...
+#     Searching grid point for location:  50.0 11.0
+#     Closest grid point found is: 50.0 11.0
+#     prAdjust
+#     reading times...
+#     Writing to file  out/12_total_precipitation-projections-yearly-rcp_8_5-cclm4_8_17-mpi_esm_lr-r1i1p1-grid-v1.csv
+#     1955     880.7
+#     1965     707.1
+#     1975     835.7
+#     1985     880.0
+#     ...
+#     2095     1293.1
+
+#
+# Prerequisites:
+#     * install the libraries you do not have yet (netCDF4, numpy, ...), e.g. by
+#         * pip install -r requirements.txt
+#     * you need an .nc-file from the Dataset mentioned above. You can download one at Copernicus' Climate Data Store
+#         * create a free account. Logged in, make selections in the form (tab=download)
+#         * Submit the form, download the file and unzip it
+#	  * if you wish, change the years to be printed out
+#
+# Usage:
+# 	python read_cds_cii_netcdf_res_point25.py /path/to/filename.nc "50.,11.799"
+#
+#############################################################
+import os
+import sys
+
+import netCDF4
+import numpy as np
+
+# GET THE FILE
+
+path_to_file = sys.argv[1]
+print("Converting file to dataset...")
+dataset = netCDF4.Dataset(path_to_file, 'r')
+
+# # GEO LOCATION
+
+coordinates = sys.argv[2].split(',')
+location_lat = float(coordinates[0])
+location_lon = float(coordinates[1])
+
+# print("Reading lat and lon values from dataset...")
+lat, lon = dataset.variables['lat'], dataset.variables['lon']
+
+# extract lat/lon values (in degrees) to numpy arrays
+latvals = lat[:]
+lonvals = lon[:]
+
+
+
+# a function to find the index of the grid point closest to the desired location
+def get_index_of_min(distances):
+    return distances.argmin()
+    # if distances is multidimensional, e.g. containing both lat and lon, use uravel, too
+    # min_index = np.unravel_index(np.argmin(distances), distances.shape)
+    # return min_index
+
+
+def get_closest_gridpoints_indices(lats, lons, desired_lat, desired_lon):
+    # Compute the squared differences (fast approximation)
+    lat_diffs = (lats - desired_lat) ** 2
+    lon_diffs = (lons - desired_lon) ** 2
+
+    # min and index of
+    lat_index = get_index_of_min(lat_diffs)
+    lon_index = get_index_of_min(lon_diffs)
+
+    return lat_index, lon_index
+
+
+print("Searching grid point for location: ", location_lat, location_lon)
+grid_lat_index, grid_lon_index = get_closest_gridpoints_indices(latvals, lonvals, location_lat, location_lon)
+print("Closest grid point found is:", lat[grid_lat_index].round(2), lon[grid_lon_index].round(2))
+
+variable_name = "prAdjust"
+print(variable_name)
+
+def filename_from_path(path_to_nc_file):
+    filename_with_extension = path_to_nc_file.split("/")[-1]
+    filename = filename_with_extension.split(".")[0]
+    return filename
+
+infile_name = filename_from_path(path_to_file)
+
+#path_to_outfile = "out/"+ variable_name+ "_" + infile_name + ".csv"
+path_to_outfile = "out/"+ infile_name + ".csv"
+
+# HWD_EU_climate(time,lat,lon)
+
+values = dataset.variables[variable_name][:]
+values_for_location_many_decimal_places = values[:, grid_lat_index, grid_lon_index].compressed()
+values_for_location_rounded = values_for_location_many_decimal_places.round(1)
+# print(heat_days)
+
+# DATE
+print("reading times...")
+ds_times = dataset.variables['time']
+times_as_dates = netCDF4.num2date(ds_times[:], ds_times.units, ds_times.calendar)
+# print(times_as_dates[2].year)
+
+# WRITE TO FILE (or PRINT) DATE WITH ITS PREDICTED TEMPERATURE (or other climate variable)
+# if newfile write header, therefore:
+exists = os.path.exists(path_to_outfile)
+is_newfile = not exists
+
+os.makedirs(os.path.dirname(path_to_outfile), exist_ok=True)
+
+with open(path_to_outfile, 'a+') as outfile:
+    print("Writing to file ", path_to_outfile)
+    if is_newfile:
+        # write column headers
+        outfile.write(str(location_lat) + "," + str(location_lon) + "\n")
+        outfile.write("time; " + variable_name + "\n")
+
+    for i, number_of_days in enumerate(values_for_location_rounded):
+        year = times_as_dates[i].year
+
+        number_of_days_or_temperature_pretty = '%7.1f' % number_of_days
+
+        line = f"{year};{number_of_days_or_temperature_pretty}\n"
+        outfile.write(line)
+
+        if year % 5 == 0 and year % 10 != 0:
+            print(year, "\t", number_of_days)