-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase.py
130 lines (102 loc) · 4.69 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import abc
import logging
from dataclasses import asdict, dataclass
import xarray as xr
from dscreator.cfarray.attributes import DatasetAttrsDiscrete
from dscreator.cfarray.time_series import timeseriescoords
from dscreator.cfarray.trajectory import trajectorycoords
from dscreator.cfarray.base import idarray
from dscreator.cfarray.dims import TIME
@dataclass
class DatasetBuilder(abc.ABC):
uuid: str
dataset_name: str
"""The name of the dataset"""
station_name: str
"""The name of the station, added to cf_role"""
grouping: str
"""Grouping, for example project, lower cases and underscores used to construct the storage path"""
is_acdd: bool
"""If the dataset should have ACDD attributes"""
@abc.abstractmethod
def create(self, data_dict: dict[str, dict]) -> xr.Dataset:
"""Entrypoint for creating a xarray dataset"""
pass
@abc.abstractmethod
def variable_attributes(self, variable_name: str) -> dict:
"""Match variable name to C&F
Match variable name to the climate and forecast convention based on the given variable code.
Standard names are found at http://vocab.nerc.ac.uk/collection/P07/current/
online unit list on https://ncics.org/portfolio/other-resources/udunits2/
"""
pass
def add_acdd(self, ds: xr.Dataset):
"""Add ACDD attributes to a xarray dataset
Add attributes following the Attribute Convention for Data Discovery to a dataset
"""
logging.info(f"Adding ACDD attributes")
ds.attrs.update(asdict(self.dataset_attributes(ds)))
@abc.abstractmethod
def dataset_attributes(self, ds: xr.Dataset) -> DatasetAttrsDiscrete:
pass
@dataclass
class TimeseriesDatasetBuilder(DatasetBuilder):
def create(self, data_dict: dict[str, list]) -> xr.Dataset:
"""Entrypoint for creating a xarray dataset
The data_dict should contain the following keys:
- time (list of datetime) coordinate and dimension
- latitude (list of float with length 1) coordinate
- longitude (list of float with length 1) coordinate
- any other data variables data variables
The data variables should have the same length as the time list. The data variables will be match to
the climate and forecast convention based on the given variable code, see the variable_attributes method in the subclass.
The latitude and longitude should have a length of 1, as they are the same for all the time steps.
"""
ds = xr.Dataset.from_dict(
{k: dict(dims=(TIME), data=data_dict[k]) for k in data_dict if k not in ["latitude", "longitude"]}
)
ds = ds.assign_coords(
timeseriescoords(
time=ds.time,
latitude=data_dict["latitude"][0],
longitude=data_dict["longitude"][0],
)
)
for variable in ds.data_vars:
ds[variable].attrs = self.variable_attributes(variable)
ds.attrs["id"] = self.uuid
ds["station_name"] = idarray(self.station_name, "timeseries_id")
ds.attrs["featureType"] = "timeSeries"
if self.is_acdd and ds.sizes["time"] > 0:
# need to have data to add acdd
self.add_acdd(ds)
return ds
@dataclass
class TrajectoryDatasetBuilder(DatasetBuilder):
def create(self, data_dict: dict[str, list]) -> xr.Dataset:
"""Entrypoint for creating a xarray dataset
The data_dict should contain the following keys:
- time (list of datetime) coordinate and dimension
- latitude (list of float) coordinate
- longitude (list of float) coordinate
- any other data variables data variables
All lists should have the same length and time should be increasing. The data variables will be match to
the climate and forecast convention based on the given variable code, see the variable_attributes method in the subclass.
"""
ds = xr.Dataset.from_dict({k: dict(dims=(TIME), data=data_dict[k]) for k in data_dict})
ds = ds.assign_coords(
trajectorycoords(
time=data_dict["time"],
latitude=data_dict["latitude"],
longitude=data_dict["longitude"],
)
)
for variable in ds.data_vars:
ds[variable].attrs = self.variable_attributes(variable)
ds.attrs["id"] = self.uuid
ds["trajectory_name"] = idarray(self.station_name, "trajectory_id")
ds.attrs["featureType"] = "trajectory"
if self.is_acdd and ds.sizes["time"] > 0:
# need to have data to add acdd
self.add_acdd(ds)
return ds