From 297374a55241375396ca5321264dfebb82ffaaa6 Mon Sep 17 00:00:00 2001 From: bekozi Date: Fri, 19 May 2017 09:37:36 -0600 Subject: [PATCH] ENH: Added predicate to request dataset - Required minor fix to NetCDF variable loading --- doc/changelog.rst | 36 ++++++++++++++----- src/ocgis/driver/base.py | 24 +++++++++++-- src/ocgis/driver/nc.py | 7 +++- src/ocgis/driver/request/core.py | 9 ++++- src/ocgis/exc.py | 8 +++++ .../test_driver/test_request/test_core.py | 22 +++++++++++- 6 files changed, 91 insertions(+), 15 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 07e123f05..e451582d9 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,28 +1,46 @@ -========== -Change Log -========== +=========== +Change Logs +=========== .. _backwards-compatibility-v1.3: -Version ``2.x`` Backwards Compatibility ---------------------------------------- +Version 2.0.0 Change Log +------------------------ .. note:: Version ``1.3.x`` will be maintained for bug fixes and dependency upgrades. It is recommended for all users to upgrade to ``v2.x``. Some changes in ``v2.x`` will break backwards compatbility with ``v1.3.x``. These changes are listed below. If any of these changes affect your workflow, please post a `GitHub Issue `_ or contact the `support list `_. -* Changed dimension map format. See :ref:`configuring-a-dimension-map` for the new configuration. Use :meth:`~ocgis.DimensionMap.from_old_style_dimension_map` to convert old-style dimension maps. +:class:`~ocgis.RequestDataset` +++++++++++++++++++++++++++++++ + +* Changed default coordinate system to :class:`~ocgis.crs.Spherical` from :class:`~ocgis.crs.WGS84`. See :ref:`default-coordinate-system` for guidance on OpenClimateGIS coordinate systems. * Removed :class:`Inspect` object. Use the :meth:`~ocgis.RequestDataset.inspect` method. +* Changed dimension map format. See :ref:`configuring-a-dimension-map` for the new configuration. Use :meth:`~ocgis.DimensionMap.from_old_style_dimension_map` to convert old-style dimension maps. +* Removed ``alias`` parameters and attributes. Aliases are replaced by explicit name parameters (see :ref:`rename_variable ` for example). +* Removed :meth:`RequestDataset.inspect_as_dict` method. + +:class:`~ocgis.RequestDatasetCollection` +++++++++++++++++++++++++++++++++++++++++ + * Removed the :class:`RequestDatasetCollection` object in favor of request dataset or field sequences. + +:class:`~ocgis.OcgOperations` ++++++++++++++++++++++++++++++ + +* Changed default coordinate system to :class:`~ocgis.crs.Spherical` from :class:`~ocgis.crs.WGS84`. See :ref:`default-coordinate-system` for guidance on OpenClimateGIS coordinate systems. +* All collection variables are subset by shared dimensions (i.e. anything sharing a spatial dimension). Use the ``predicate`` argument to :class:`~ocgis.RequestDataset` to include/exclude variables. * Removed unique dimension identifers (``TID``, ``LID``, etc.) from tabular outputs. Unique geometry identifiers are maintained for foreign key file relationships. -* Removed ``alias`` parameters and attributes. Aliases are replaced by explicit name parameters (see :ref:`rename_variable ` for example). * Changed default unique identifier for no geometry from ``1`` to ``None``. -* Changed default coordinate system to :class:`~ocgis.crs.Spherical` from :class:`~ocgis.crs.WGS84`. See :ref:`default-coordinate-system` for guidance on OpenClimateGIS coordinate systems. * Removed ``headers`` argument from operations. The tabular structure has been streamlined in ``v2.x`` by removing extraneous identifier variables. * Removed global unique identifier as a default property of all variable objects. Dataset geometry identifers are now unique within a subset operation. * Removed check for `data` (the coordinate masking is still evaluated for empty subsets) masking following a subset to avoid loading all data from file to retrieve the mask. * Changed logging output directory to a nested ``logs`` directory inside output directory when ``add_auxiliary_files`` is ``True``. * Changed masked values in tabular formats to ``None`` from the numeric fill value. -* Removed :meth:`RequestDataset.inspect_as_dict` method. * Changed :ref:`search_radius_mult key` default to ``None``. Point subsetting will now use the point geometry for intersects operations. Point geometries are no longer buffered by default. * Removed UGRID conversion. Use `ugrid-tools `_ to convert to ESMF Unstructured Format. + +:class:`~ocgis.Inspect` ++++++++++++++++++++++++ + +* Removed :class:`Inspect` object. Use the :meth:`~ocgis.RequestDataset.inspect` method. \ No newline at end of file diff --git a/src/ocgis/driver/base.py b/src/ocgis/driver/base.py index 3e4ce6caa..7c433fd21 100644 --- a/src/ocgis/driver/base.py +++ b/src/ocgis/driver/base.py @@ -13,7 +13,7 @@ from ocgis.collection.field import Field from ocgis.constants import MPIWriteMode, TagName, KeywordArgument from ocgis.driver.dimension_map import DimensionMap -from ocgis.exc import DefinitionValidationError, NoDataVariablesFound, DimensionMapError +from ocgis.exc import DefinitionValidationError, NoDataVariablesFound, DimensionMapError, VariableMissingMetadataError from ocgis.util.helpers import get_group from ocgis.util.logging_ocgis import ocgis_lh from ocgis.variable.base import SourcedVariable, VariableCollection @@ -291,7 +291,20 @@ def get_metadata(self): :rtype: dict """ - return self._get_metadata_main_() + metadata_subclass = self._get_metadata_main_() + + # Use the predicate (filter) if present on the request dataset. + # TODO: Should handle groups? + pred = self.rd.predicate + if pred is not None: + to_pop = [] + for var_name in metadata_subclass['variables'].keys(): + if not pred(var_name): + to_pop.append(var_name) + for var_name in to_pop: + metadata_subclass['variables'].pop(var_name) + + return metadata_subclass def get_source_metadata_as_json(self): # tdk: test @@ -729,7 +742,12 @@ def get_dump_report_for_group(group, global_attributes_name='global', indent=0): def get_variable_metadata_from_request_dataset(driver, variable): - return get_group(driver.metadata_source, variable.group, has_root=False)['variables'][variable._source_name] + variables_metadata = get_group(driver.metadata_source, variable.group, has_root=False)['variables'] + try: + ret = variables_metadata[variable._source_name] + except KeyError: + raise VariableMissingMetadataError(variable._source_name) + return ret def iter_all_group_keys(ddict, entry=None, has_root=True): diff --git a/src/ocgis/driver/nc.py b/src/ocgis/driver/nc.py index a192fb5aa..7dfb44c51 100644 --- a/src/ocgis/driver/nc.py +++ b/src/ocgis/driver/nc.py @@ -393,10 +393,15 @@ def read_from_collection(target, request_dataset, parent=None, name=None, source ret = VariableCollection(attrs=get_netcdf_attributes(target), parent=parent, name=name, source_name=source_name, uid=uid) + pred = request_dataset.predicate for varname, ncvar in target.variables.items(): + if pred is not None and not pred(varname): + continue source_name = varname name = rename_variable_map.get(varname, varname) - ret[name] = SourcedVariable(name=name, request_dataset=request_dataset, parent=ret, source_name=source_name) + sv = SourcedVariable(name=name, request_dataset=request_dataset, parent=ret, source_name=source_name) + ret[name] = sv + for group_name, ncgroup in list(target.groups.items()): child = read_from_collection(ncgroup, request_dataset, parent=ret, name=group_name, uid=uid) ret.add_child(child) diff --git a/src/ocgis/driver/request/core.py b/src/ocgis/driver/request/core.py index e9cccfffc..b2bd81241 100644 --- a/src/ocgis/driver/request/core.py +++ b/src/ocgis/driver/request/core.py @@ -114,6 +114,12 @@ class RequestDataset(AbstractRequestObject): :param opened: An open file used as a write target for the driver. :type opened: varies by ``driver`` class :param int uid: A unique identifier for the request dataset. + :param predicate: A filter function returning ``True`` if a variable should be included in the output field. The + function should take a single argument which is a sequence of string variable names. This function is applied + directly to the metadata before other functions (i.e. identifying data variables). + :type predicate: `function` + + >>> predicate = lambda x: x.startswith('w') .. _time units: http://netcdf4-python.googlecode.com/svn/trunk/docs/netCDF4-module.html#num2date .. _time calendar: http://netcdf4-python.googlecode.com/svn/trunk/docs/netCDF4-module.html#num2date @@ -124,7 +130,7 @@ def __init__(self, uri=None, variable=None, units=None, time_range=None, time_re time_subset_func=None, level_range=None, conform_units_to=None, crs='auto', t_units=None, t_calendar=None, t_conform_units_to=None, grid_abstraction='auto', dimension_map=None, field_name=None, driver=None, regrid_source=True, regrid_destination=False, metadata=None, - format_time=True, opened=None, uid=None, rename_variable=None): + format_time=True, opened=None, uid=None, rename_variable=None, predicate=None): self._is_init = True @@ -134,6 +140,7 @@ def __init__(self, uri=None, variable=None, units=None, time_range=None, time_re self._time_region = None self._time_subset_func = None + self.predicate = predicate if dimension_map is not None and isinstance(dimension_map, dict): dimension_map = DimensionMap.from_dict(dimension_map) self._dimension_map = dimension_map diff --git a/src/ocgis/exc.py b/src/ocgis/exc.py index 411554bd6..0040f0b29 100644 --- a/src/ocgis/exc.py +++ b/src/ocgis/exc.py @@ -422,3 +422,11 @@ class DimensionMapError(OcgException): def __init__(self, entry_key, message): msg = "Error with entry key '{}': {}".format(entry_key, message) super(DimensionMapError, self).__init__(message=msg) + + +class VariableMissingMetadataError(OcgException): + """Raised when variable metadata cannot be found.""" + + def __init__(self, variable_name): + msg = 'Variable is missing metadata: {}'.format(variable_name) + super(VariableMissingMetadataError, self).__init__(message=msg) diff --git a/src/ocgis/test/test_ocgis/test_driver/test_request/test_core.py b/src/ocgis/test/test_ocgis/test_driver/test_request/test_core.py index d62a62aa9..710e42044 100644 --- a/src/ocgis/test/test_ocgis/test_driver/test_request/test_core.py +++ b/src/ocgis/test/test_ocgis/test_driver/test_request/test_core.py @@ -2,7 +2,7 @@ import numpy as np -from ocgis import RequestDataset +from ocgis import RequestDataset, Variable from ocgis.collection.field import Field from ocgis.constants import TagName, MiscName, DimensionMapKey from ocgis.driver.nc import DriverNetcdf, DriverNetcdfCF @@ -96,6 +96,26 @@ def test_init_field_name(self): self.assertIsNone(field.source_name) field.load() + def test_system_predicate(self): + """Test creating a request dataset with a predicate.""" + + path = self.get_temporary_file_path('foo.nc') + field = self.get_field() + to_exclude = Variable(name='exclude') + field.add_variable(to_exclude) + field.write(path) + + rd = RequestDataset(uri=path, predicate=lambda x: not x.startswith('exclude')) + self.assertNotIn('exclude', rd.metadata['variables']) + actual = rd.get() + self.assertNotIn('exclude', actual) + + # Test predicate affects data variable identification. + path = self.get_temporary_file_path('foo.nc') + rd = RequestDataset(uri=path, predicate=lambda x: x != 'foo') + with self.assertRaises(NoDataVariablesFound): + assert rd.variable + @attr('cfunits') def test_conform_units_to(self): rd = self.get_request_dataset_netcdf(variable='a', units='celsius', conform_units_to='fahrenheit')