From 86169f05a9880e80bb036375c3107617272a8fee Mon Sep 17 00:00:00 2001 From: rosepearson Date: Mon, 21 Oct 2024 16:48:12 +1300 Subject: [PATCH 01/13] Fixed bug in stopbanks, where the stopbanks are not clipped to the catchment extents --- src/geofabrics/processor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index cea28d12..6b61268f 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -3874,6 +3874,9 @@ def load_stopbanks(self) -> bool: if stopbanks_path.is_file(): stopbanks = geopandas.read_file(stopbanks_path) + stopbanks = stopbanks.clip(self.catchment_geometry.land).sort_index( + ascending=True + ) if "width" not in stopbanks.columns and source == "osm": message = ( "For an 'osm' source, the stopbanks file is generated by " From 0c4aa6d4ce9bdc7bd87212a1bc9f7244f7019c8f Mon Sep 17 00:00:00 2001 From: rosepearson Date: Fri, 25 Oct 2024 13:11:37 +1300 Subject: [PATCH 02/13] Added capability for removing features/areas prior to any final intrpolation. Intended uses include stopbanks down --- src/geofabrics/dem.py | 43 +++++++++++++++++++++++++++++++++++-- src/geofabrics/processor.py | 26 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index c09afac2..29e0490c 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -42,11 +42,11 @@ def chunk_mask(mask, chunk_size): return mask -def clip_mask(arr, geometry, chunk_size): +def clip_mask(arr, geometry, chunk_size, invert=False): mask = ( xarray.ones_like(arr, dtype=numpy.float16) .compute() - .rio.clip(geometry, drop=False) + .rio.clip(geometry, drop=False, invert=invert) .notnull() ) if chunk_size is not None: @@ -298,6 +298,7 @@ class DemBase(abc.ABC): "coarse DEM": 5, "patch": 6, "stopbanks": 7, + "masked feature": 8, "interpolated": 0, "no data": -1, } @@ -1051,6 +1052,44 @@ def interpolate_ocean_bathymetry(self, bathy_contours, method="linear"): method="first", ) + def clip_within_polygon( + self, + polygon_paths: list, + label: str + ): + """ Clip existing DEM to remove areas within the polygons """ + dem_bounds = geopandas.GeoDataFrame( + geometry=[shapely.geometry.box(*self._dem.rio.bounds())], + crs=self.catchment_geometry.crs) + clip_polygon = [] + for path in polygon_paths: + clip_polygon.append( + geopandas.read_file(path).to_crs(self.catchment_geometry.crs)) + clip_polygon = pandas.concat(clip_polygon).dissolve() + clip_polygon = clip_polygon.clip(dem_bounds) + clip_polygon.area.sum() > self.catchment_geometry.resolution ** 2: + self.logger.info(f"Clipping to remove all features in polygons {polygon_paths}") + mask = clip_mask( + arr=self._dem.z, + geometry=clip_polygon.geometry, + chunk_size=self.chunk_size, + ) + self._dem["z"] = self._dem.z.where( + ~mask, + numpy.nan, + ) + self._dem["data_source"] = self._dem.data_source.where( + ~mask, + self.SOURCE_CLASSIFICATION[label], + ) + self._dem["lidar_source"] = self._dem.lidar_source.where( + ~mask, self.SOURCE_CLASSIFICATION["no data"] + ) + self._write_netcdf_conventions_in_place(self._dem, self.catchment_geometry.crs) + else: + self.logger.warning(f"No clipping. Polygons {polygon_paths} do not overlap DEM.") + + def interpolate_elevations_within_polygon( self, elevations: geometry.EstimatedElevationPoints, diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 6b61268f..546c216a 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -1377,6 +1377,32 @@ def add_hydrological_features( self.clean_cached_file(cached_file) cached_file = temp_file + if "feature_masking" in self.instructions["data_paths"]: + # Remove values inside feature_masking polygons - e.g. to mask stopbanks + subfolder = self.get_instruction_path(key="subfolder") + file_names = [] + for file_name in self.instructions["data_paths"]["feature_masking"]: + file_name = pathlib.Path(file_name) + if not file_name.is_absolute(): + file_name = subfolder / file_name + file_names.append(file_name) + + self.logger.info(f"Removing values in feature_masking: {file_names}") + + if len(file_names) > 0: + hydrologic_dem.clip_within_polygon( + polygons=file_names, + label="masked feature", + ) + temp_file = temp_folder / "dem_feature_masking.nc" + self.logger.info( + f"Save temp DEM with feature_masking added to netCDF: {temp_file}" + ) + hydrologic_dem.save_and_load_dem(temp_file) + if cached_file is not None: + self.clean_cached_file(cached_file) + cached_file = temp_file + def run(self): """This method executes the geofabrics generation pipeline to produce geofabric derivatives.""" From 2c06b6768226a93ead6844b5a0ff51b33524ac32 Mon Sep 17 00:00:00 2001 From: rosepearson Date: Fri, 25 Oct 2024 15:28:32 +1300 Subject: [PATCH 03/13] fix typo --- src/geofabrics/dem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index 29e0490c..9b386615 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -1067,7 +1067,7 @@ def clip_within_polygon( geopandas.read_file(path).to_crs(self.catchment_geometry.crs)) clip_polygon = pandas.concat(clip_polygon).dissolve() clip_polygon = clip_polygon.clip(dem_bounds) - clip_polygon.area.sum() > self.catchment_geometry.resolution ** 2: + if clip_polygon.area.sum() > self.catchment_geometry.resolution ** 2: self.logger.info(f"Clipping to remove all features in polygons {polygon_paths}") mask = clip_mask( arr=self._dem.z, From d536853877d6cd08e0d19e11c123f2bf88693bb1 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 25 Oct 2024 02:29:03 +0000 Subject: [PATCH 04/13] fixup: Format Python code with Black --- src/geofabrics/dem.py | 73 +++++---- src/geofabrics/processor.py | 160 +++++++++++-------- tests/test_many_stages_waikanae/test_case.py | 4 +- 3 files changed, 130 insertions(+), 107 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index 9b386615..5870033d 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -402,7 +402,7 @@ def save_and_load_dem( ) self.save_dem(filename=filename, dem=self._dem) del self._dem - gc.collect() + gc.collect() self._dem = self._load_dem(filename=filename) @staticmethod @@ -438,7 +438,6 @@ def _write_netcdf_conventions_in_place( crs_dict A dict with horizontal and vertical CRS information. """ - dem.rio.write_crs(crs_dict["horizontal"], inplace=True) dem.rio.write_transform(inplace=True) @@ -1052,25 +1051,25 @@ def interpolate_ocean_bathymetry(self, bathy_contours, method="linear"): method="first", ) - def clip_within_polygon( - self, - polygon_paths: list, - label: str - ): - """ Clip existing DEM to remove areas within the polygons """ + def clip_within_polygon(self, polygon_paths: list, label: str): + """Clip existing DEM to remove areas within the polygons""" dem_bounds = geopandas.GeoDataFrame( geometry=[shapely.geometry.box(*self._dem.rio.bounds())], - crs=self.catchment_geometry.crs) + crs=self.catchment_geometry.crs, + ) clip_polygon = [] for path in polygon_paths: clip_polygon.append( - geopandas.read_file(path).to_crs(self.catchment_geometry.crs)) + geopandas.read_file(path).to_crs(self.catchment_geometry.crs) + ) clip_polygon = pandas.concat(clip_polygon).dissolve() clip_polygon = clip_polygon.clip(dem_bounds) - if clip_polygon.area.sum() > self.catchment_geometry.resolution ** 2: - self.logger.info(f"Clipping to remove all features in polygons {polygon_paths}") + if clip_polygon.area.sum() > self.catchment_geometry.resolution**2: + self.logger.info( + f"Clipping to remove all features in polygons {polygon_paths}" + ) mask = clip_mask( - arr=self._dem.z, + arr=self._dem.z, geometry=clip_polygon.geometry, chunk_size=self.chunk_size, ) @@ -1085,10 +1084,13 @@ def clip_within_polygon( self._dem["lidar_source"] = self._dem.lidar_source.where( ~mask, self.SOURCE_CLASSIFICATION["no data"] ) - self._write_netcdf_conventions_in_place(self._dem, self.catchment_geometry.crs) + self._write_netcdf_conventions_in_place( + self._dem, self.catchment_geometry.crs + ) else: - self.logger.warning(f"No clipping. Polygons {polygon_paths} do not overlap DEM.") - + self.logger.warning( + f"No clipping. Polygons {polygon_paths} do not overlap DEM." + ) def interpolate_elevations_within_polygon( self, @@ -2755,27 +2757,30 @@ def add_lidar( self._write_netcdf_conventions_in_place(self._dem, self.catchment_geometry.crs) def add_roads(self, roads_polygon: dict): - """Set roads to paved and unpaved roughness values. + """Set roads to paved and unpaved roughness values. - Parameters - ---------- + Parameters + ---------- - roads_polygon - Dataframe with polygon and associated roughness values - """ + roads_polygon + Dataframe with polygon and associated roughness values + """ - - # Set unpaved roads - mask = clip_mask( - self._dem.z, roads_polygon[roads_polygon["surface"]=="unpaved"].geometry, self.chunk_size - ) - self._dem["zo"] = self._dem.zo.where(~mask, self.default_values["unpaved"]) - # Then set paved roads - mask = clip_mask( - self._dem.z, roads_polygon[roads_polygon["surface"]=="paved"].geometry, self.chunk_size - ) - self._dem["zo"] = self._dem.zo.where(~mask, self.default_values["paved"]) - self._write_netcdf_conventions_in_place(self._dem, self.catchment_geometry.crs) + # Set unpaved roads + mask = clip_mask( + self._dem.z, + roads_polygon[roads_polygon["surface"] == "unpaved"].geometry, + self.chunk_size, + ) + self._dem["zo"] = self._dem.zo.where(~mask, self.default_values["unpaved"]) + # Then set paved roads + mask = clip_mask( + self._dem.z, + roads_polygon[roads_polygon["surface"] == "paved"].geometry, + self.chunk_size, + ) + self._dem["zo"] = self._dem.zo.where(~mask, self.default_values["paved"]) + self._write_netcdf_conventions_in_place(self._dem, self.catchment_geometry.crs) def _add_tiled_lidar_chunked( self, diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 546c216a..53d805d7 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -1074,7 +1074,7 @@ def run(self): status = raw_dem.add_patch( patch_path=coarse_dem_path, label="coarse DEM", layer="z" ) - if status: # Only update if patch sucessfully added + if status: # Only update if patch sucessfully added temp_file = temp_folder / f"raw_dem_{coarse_dem_path.stem}.nc" self.logger.info(f"Save temp raw DEM to netCDF: {temp_file}") raw_dem.save_and_load_dem(temp_file) @@ -1124,7 +1124,7 @@ def add_hydrological_features( temp_folder: pathlib.Path, ): """Add in any bathymetry data - ocean or river""" - + cached_file = None # Check for ocean bathymetry. Interpolate offshore if significant @@ -1389,7 +1389,7 @@ def add_hydrological_features( self.logger.info(f"Removing values in feature_masking: {file_names}") - if len(file_names) > 0: + if len(file_names) > 0: hydrologic_dem.clip_within_polygon( polygons=file_names, label="masked feature", @@ -1651,10 +1651,26 @@ def get_roughness_instruction(self, key: str): "minimum": 0.00001, "maximum": 5, "paved": 0.001, - "unpaved": 0.011 + "unpaved": 0.011, + }, + "roads": { + "source": "osm", + "ignore": [ + "pedestrian", + "footway", + "footpath", + "track", + "path", + "cycleway", + ], + "widths": { + "default": 8, + "residential": 8, + "tertiary": 12, + "secondary": 12, + "motorway": 12, + }, }, - "roads": {"source": "osm", "ignore": ["pedestrian", "footway", "footpath", "track", "path", "cycleway"], - "widths": {"default": 8, "residential": 8, "tertiary": 12, "secondary": 12, "motorway": 12}}, } if "roughness" in self.instructions and key in self.instructions["roughness"]: @@ -1679,10 +1695,14 @@ def get_roughness_instruction(self, key: str): def load_roads_osm(self) -> bool: """Download OpenStreetMap roads within the catchment BBox.""" - defaults = {"roads": "osm_roads.geojson", - "roads_polygon": "osm_roads_polygon.geojson"} + defaults = { + "roads": "osm_roads.geojson", + "roads_polygon": "osm_roads_polygon.geojson", + } roads_path = self.get_instruction_path("roads", defaults=defaults) - roads_polygon_path = self.get_instruction_path("roads_polygon", defaults=defaults) + roads_polygon_path = self.get_instruction_path( + "roads_polygon", defaults=defaults + ) if roads_polygon_path.is_file(): roads_polygon = geopandas.read_file(roads_path) @@ -1695,7 +1715,7 @@ def load_roads_osm(self) -> bool: if "roughness" not in roads_polygon.columns: message = ( "No roughnesses defined in the road polygon file. This is " - F"required. Please check {roads_polygon_path} and add." + f"required. Please check {roads_polygon_path} and add." ) self.logger.error(message) raise ValueError(message) @@ -1742,7 +1762,11 @@ def load_roads_osm(self) -> bool: element_dict["geometry"].append(element.geometry()) element_dict["OSM_id"].append(element.id()) element_dict["road"].append(element.tags()["highway"]) - surface = element.tags()["surface"] if "surface" in element.tags() else "unclassified" + surface = ( + element.tags()["surface"] + if "surface" in element.tags() + else "unclassified" + ) element_dict["surface"].append(surface) roads = ( geopandas.GeoDataFrame(element_dict, crs=self.OSM_CRS) @@ -1754,24 +1778,24 @@ def load_roads_osm(self) -> bool: road_instructions = self.get_roughness_instruction("roads") roads = roads[~roads["road"].isin(road_instructions["ignore"])] for paving in ["asphalt", "concrete"]: - roads.loc[roads["surface"]==paving, "surface"] = "paved" - logging.info(f"Surfaces of {roads[roads['surface']!='paved']['surface'].unique()} all assumed to be unpaved.") - roads.loc[roads["surface"]!="paved", "surface"] = "unpaved" - + roads.loc[roads["surface"] == paving, "surface"] = "paved" + logging.info( + f"Surfaces of {roads[roads['surface']!='paved']['surface'].unique()} all assumed to be unpaved." + ) + roads.loc[roads["surface"] != "paved", "surface"] = "unpaved" + # Add roughness' roughness = self.get_roughness_instruction("default_values") roads["roughness"] = roughness["paved"] - roads.loc[roads["surface"]!="paved", "roughness"] = roughness["unpaved"] - + roads.loc[roads["surface"] != "paved", "roughness"] = roughness["unpaved"] + # Add widths roads["width"] = road_instructions["widths"]["default"] for key, value in road_instructions["widths"].items(): - roads.loc[roads["road"]==key, "width"] = value - + roads.loc[roads["road"] == key, "width"] = value + # Clip to land - roads = roads.clip(self.catchment_geometry.land).sort_index( - ascending=True - ) + roads = roads.clip(self.catchment_geometry.land).sort_index(ascending=True) # Save files roads.to_file(roads_path) @@ -1866,7 +1890,7 @@ def run(self): roughness_dem.save_and_load_dem(temp_file) self.clean_cached_file(temp_folder / "raw_lidar_zo.nc") cached_file = temp_file - + # Add roads to roughness roughness_dem.add_roads(roads_polygon=roads) @@ -1878,10 +1902,8 @@ def run(self): cached_file = temp_file # save results - result_file = self.get_instruction_path('result_geofabric') - self.logger.info( - f"Write out the geofabric to netCDF: {result_file}" - ) + result_file = self.get_instruction_path("result_geofabric") + self.logger.info(f"Write out the geofabric to netCDF: {result_file}") self.save_dem( filename=result_file, dataset=roughness_dem.dem, @@ -3130,7 +3152,7 @@ def __init__(self, json_instructions: json, debug: bool = True): self.debug = debug def load_dem(self, filename: pathlib.Path): - """ Load a DEM""" + """Load a DEM""" chunk_size = self.get_processing_instructions("chunk_size") if chunk_size is not None: chunks = {"x": chunk_size, "y": chunk_size} @@ -3238,9 +3260,7 @@ def minimum_elevation_in_polygon( # clip to polygon and return minimum elevation return float(small_z.rio.clip([geometry]).min()) - def estimate_closed_elevations( - self, waterways: geopandas.GeoDataFrame - ): + def estimate_closed_elevations(self, waterways: geopandas.GeoDataFrame): """Sample the DEM around the tunnels to estimate the bed elevation.""" # Return if already generated. Otherwise calculate. @@ -3254,7 +3274,7 @@ def estimate_closed_elevations( closed_waterways["polygon"] = closed_waterways.buffer( closed_waterways["width"].to_numpy() ) - + # If no closed waterways write out empty files and return if len(closed_waterways) == 0: closed_waterways["z"] = [] @@ -3269,11 +3289,11 @@ def estimate_closed_elevations( for index, row in closed_waterways.iterrows(): dem_file = self.get_result_file_path(key="raw_dem", index=index) dem = self.load_dem(filename=dem_file) - polygon = shapely.ops.clip_by_rect(row.polygon, *dem.rio.bounds()) + polygon = shapely.ops.clip_by_rect(row.polygon, *dem.rio.bounds()) if polygon.area > 0: - elevations.append(self.minimum_elevation_in_polygon( - geometry=polygon, dem=dem - )) + elevations.append( + self.minimum_elevation_in_polygon(geometry=polygon, dem=dem) + ) else: elevations.append(numpy.nan) @@ -3326,9 +3346,7 @@ def estimate_closed_elevations( ].to_file(polygon_file) points_exploded.to_file(elevation_file) - def estimate_open_elevations( - self, waterways: geopandas.GeoDataFrame - ): + def estimate_open_elevations(self, waterways: geopandas.GeoDataFrame): """Sample the DEM along the open waterways to enforce a decreasing elevation.""" # Return if already generated. Otherwise calculate. @@ -3340,7 +3358,7 @@ def estimate_open_elevations( # Define polygons open_waterways = waterways[numpy.logical_not(waterways["tunnel"])] open_waterways = open_waterways[~open_waterways.geometry.isna()] - + # sample polygons at end of each waterway and order uphill first open_waterways["start_elevation"] = numpy.nan open_waterways["end_elevation"] = numpy.nan @@ -3348,16 +3366,13 @@ def estimate_open_elevations( dem_file = self.get_result_file_path(key="raw_dem", index=index) dem = self.load_dem(filename=dem_file) - waterway = shapely.ops.clip_by_rect(row.geometry, *dem.rio.bounds()) - start_elevation = ( - self.minimum_elevation_in_polygon( - geometry=waterway.interpolate(0).buffer(row.width), dem=dem - ) + waterway = shapely.ops.clip_by_rect(row.geometry, *dem.rio.bounds()) + start_elevation = self.minimum_elevation_in_polygon( + geometry=waterway.interpolate(0).buffer(row.width), dem=dem ) - end_elevation = ( - self.minimum_elevation_in_polygon( - geometry=waterway.interpolate(1, normalized=True).buffer(row.width), dem=dem - ) + end_elevation = self.minimum_elevation_in_polygon( + geometry=waterway.interpolate(1, normalized=True).buffer(row.width), + dem=dem, ) if start_elevation < end_elevation: waterway = waterway.reverse() @@ -3365,7 +3380,6 @@ def estimate_open_elevations( open_waterways.loc[index, "start_elevation"] = start_elevation open_waterways.loc[index, "end_elevation"] = end_elevation open_waterways.loc[index, "geometry"] = waterway - # Remove any waterways without data to assess elevations nan_filter = ( @@ -3397,7 +3411,7 @@ def sample(geometry): resolution = self.get_resolution() number_of_samples = int(numpy.ceil(geometry.length / resolution)) normalised_sample_indices = ( - numpy.array(range(number_of_samples + 1)) / number_of_samples + numpy.array(range(number_of_samples + 1)) / number_of_samples ) sampled_multipoints = shapely.geometry.MultiPoint( @@ -3466,11 +3480,13 @@ def create_dem(self, waterways: geopandas.GeoDataFrame) -> xarray.Dataset: # Create DEM over the waterway region # Save out the waterway polygons as a file with a single multipolygon waterways_polygon_file = self.get_result_file_path( - key="waterways_polygon", index=index) + key="waterways_polygon", index=index + ) waterways_polygon = geopandas.GeoDataFrame( - geometry=[row.geometry.buffer(row.width)], crs=waterways.crs) + geometry=[row.geometry.buffer(row.width)], crs=waterways.crs + ) waterways_polygon.to_file(waterways_polygon_file) - + # Create DEM generation instructions dem_instructions = self.instructions dem_instruction_paths = dem_instructions["data_paths"] @@ -3479,7 +3495,7 @@ def create_dem(self, waterways: geopandas.GeoDataFrame) -> xarray.Dataset: if "general" not in dem_instructions: dem_instructions["general"] = {} dem_instructions["general"]["ignore_clipping"] = True - + # Create the ground DEM file if this has not be created yet!\ self.logger.info(f"Generating DEM for waterway {index}.") runner = RawLidarDemGenerator(self.instructions) @@ -3487,7 +3503,7 @@ def create_dem(self, waterways: geopandas.GeoDataFrame) -> xarray.Dataset: del runner gc.collect() xarray.backends.file_manager.FILE_CACHE.clear() - return + return def load_waterways(self) -> bool: """Download OpenStreetMap waterways and tunnels within the catchment BBox.""" @@ -3684,7 +3700,7 @@ def __init__(self, json_instructions: json, debug: bool = True): self.debug = debug def load_dem(self, filename: pathlib.Path): - """ Load a DEM""" + """Load a DEM""" chunk_size = self.get_processing_instructions("chunk_size") if chunk_size is not None: chunks = {"x": chunk_size, "y": chunk_size} @@ -3778,15 +3794,13 @@ def maximum_elevation_in_polygon( if dem.x[-1] - dem.x[0] > 0 else slice(bbox[2], bbox[0]) ) - #breakpoint() + # breakpoint() small_z = dem.z.sel(x=x_slice, y=y_slice) # clip to polygon and return minimum elevation return float(small_z.max()) - def estimate_elevations_simple( - self, stopbanks: geopandas.GeoDataFrame - ): + def estimate_elevations_simple(self, stopbanks: geopandas.GeoDataFrame): """Sample the DEM around the tunnels to estimate the bed elevation.""" # Check if already generated @@ -3800,22 +3814,23 @@ def estimate_elevations_simple( dem_file = self.get_result_file_path(key="raw_dem", index=index) dem = self.load_dem(filename=dem_file) stopbanks.loc[index, "geometry"] = shapely.ops.clip_by_rect( - row.geometry, *dem.rio.bounds()) - + row.geometry, *dem.rio.bounds() + ) + # If no stopbanks return an empty result if len(stopbanks) == 0: stopbanks.drop(columns=["width"]).to_file(polygon_file) stopbanks["z"] = [] stopbanks.to_file(elevation_file) return - + # Sampled points along stopbanks to define crest elevation at def sample(geometry): """Sample evenly space poinst along polylines""" resolution = self.get_resolution() number_of_samples = int(numpy.ceil(geometry.length / resolution)) normalised_sample_indices = ( - numpy.array(range(number_of_samples + 1)) / number_of_samples + numpy.array(range(number_of_samples + 1)) / number_of_samples ) sampled_multipoints = shapely.geometry.MultiPoint( @@ -3823,8 +3838,10 @@ def sample(geometry): ) return sampled_multipoints - - stopbanks["points"] = stopbanks["geometry"].apply(lambda geometry: sample(geometry)) + + stopbanks["points"] = stopbanks["geometry"].apply( + lambda geometry: sample(geometry) + ) points = stopbanks.set_geometry("points", drop=True)[["geometry", "width"]] points = points.sort_index(ascending=True).explode( ignore_index=False, index_parts=True, column="geometry" @@ -3835,7 +3852,7 @@ def sample(geometry): for index, rows in points.groupby(level=0): dem_file = self.get_result_file_path(key="raw_dem", index=index) dem = self.load_dem(filename=dem_file) - #breakpoint() + # breakpoint() zs = rows["polygons"].apply( lambda geometry: self.maximum_elevation_in_polygon( geometry=geometry, dem=dem @@ -3862,16 +3879,17 @@ def sample(geometry): def create_dem(self, stopbanks: geopandas.GeoDataFrame) -> xarray.Dataset: """Create and return a DEM at a resolution 1.5x the waterway width.""" - # Check for DEM for each stopbank for index, row in stopbanks.iterrows(): dem_file = self.get_result_file_path(key="raw_dem", index=index) if not dem_file.is_file(): # Create DEM over each stopbank region stopbank_polygon_file = self.get_result_file_path( - key="stopbank_polygon", index=index) + key="stopbank_polygon", index=index + ) stopbank_polygon = geopandas.GeoDataFrame( - geometry=[row.geometry.buffer(row.width)], crs=stopbanks.crs) + geometry=[row.geometry.buffer(row.width)], crs=stopbanks.crs + ) stopbank_polygon.to_file(stopbank_polygon_file) # Update instructions for next stopbank diff --git a/tests/test_many_stages_waikanae/test_case.py b/tests/test_many_stages_waikanae/test_case.py index d7dc2874..02097933 100644 --- a/tests/test_many_stages_waikanae/test_case.py +++ b/tests/test_many_stages_waikanae/test_case.py @@ -156,14 +156,14 @@ def test_result_geofabric_linux(self): f"{number_above_threshold / len(diff_array.flatten()) * 100}%", ) # Compare the generated and benchmark roughnesses - '''diff_array = test.zo.data - benchmark.zo.data + """diff_array = test.zo.data - benchmark.zo.data numpy.testing.assert_array_almost_equal( test.zo.data, benchmark.zo.data, decimal=3, err_msg="The generated test has significantly different roughness from the " f"benchmark where there is LiDAR: {diff_array}", - )''' + )""" diff_array = ( test.zo.data[~numpy.isnan(test.zo.data)] - benchmark.zo.data[~numpy.isnan(test.zo.data)] From 9692734c3bb803d372f26537378c01c194940b39 Mon Sep 17 00:00:00 2001 From: rosepearson Date: Fri, 25 Oct 2024 16:08:20 +1300 Subject: [PATCH 05/13] fix typo --- src/geofabrics/dem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index 5870033d..bd795816 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -12,6 +12,7 @@ import typing import pathlib import geopandas +import pandas import shapely import dask import dask.array From be1a0d8fce705befc7173e7238cc03228fa720b8 Mon Sep 17 00:00:00 2001 From: rosepearson Date: Tue, 29 Oct 2024 14:18:27 +1300 Subject: [PATCH 06/13] Support for stopbanks down and added in a test --- src/geofabrics/dem.py | 14 ++-- src/geofabrics/processor.py | 2 +- .../test_stopbanks_waikanae/data/benchmark.nc | 4 +- .../test_stopbanks_waikanae/instruction.json | 4 +- tests/test_stopbanks_waikanae/test_case.py | 73 +++++++++---------- 5 files changed, 51 insertions(+), 46 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index bd795816..8420911b 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -356,11 +356,14 @@ def save_dem( self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs) if filename.suffix.lower() == ".nc": if compression is not None: - compression["grid_mapping"] = dem.encoding["grid_mapping"] + encoding_keys = ("_FillValue", "dtype", "scale_factor", "add_offset", "grid_mapping") encoding = {} for key in dem.data_vars: - compression["dtype"] = dem[key].dtype - encoding[key] = compression + dem[key] = dem[key].astype(geometry.RASTER_TYPE) + encoding[key] = {encoding_key: value for encoding_key, value in dem[key].encoding.items() if encoding_key in encoding_keys} + if "dtype" not in encoding[key]: + encoding[key]["dtype"] = dem[key].dtype + encoding[key] = {**encoding[key], **compression} dem.to_netcdf( filename, format="NETCDF4", engine="netcdf4", encoding=encoding ) @@ -1054,14 +1057,15 @@ def interpolate_ocean_bathymetry(self, bathy_contours, method="linear"): def clip_within_polygon(self, polygon_paths: list, label: str): """Clip existing DEM to remove areas within the polygons""" + crs = self.catchment_geometry.crs dem_bounds = geopandas.GeoDataFrame( geometry=[shapely.geometry.box(*self._dem.rio.bounds())], - crs=self.catchment_geometry.crs, + crs=crs["horizontal"], ) clip_polygon = [] for path in polygon_paths: clip_polygon.append( - geopandas.read_file(path).to_crs(self.catchment_geometry.crs) + geopandas.read_file(path).to_crs(crs["horizontal"]) ) clip_polygon = pandas.concat(clip_polygon).dissolve() clip_polygon = clip_polygon.clip(dem_bounds) diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 53d805d7..640c0e7b 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -1391,7 +1391,7 @@ def add_hydrological_features( if len(file_names) > 0: hydrologic_dem.clip_within_polygon( - polygons=file_names, + polygon_paths=file_names, label="masked feature", ) temp_file = temp_folder / "dem_feature_masking.nc" diff --git a/tests/test_stopbanks_waikanae/data/benchmark.nc b/tests/test_stopbanks_waikanae/data/benchmark.nc index a6a577f6..62f6acee 100644 --- a/tests/test_stopbanks_waikanae/data/benchmark.nc +++ b/tests/test_stopbanks_waikanae/data/benchmark.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b44d1b9f74d529e521fddc4a2d36f9edfe00043ec400f88136ebbf2dbdd6159f -size 38537 +oid sha256:802ba5c1f6b6fef96a2ce4d34d2dd2d386f5cf942b45f14834d4a8a2e0a15ae2 +size 38699 diff --git a/tests/test_stopbanks_waikanae/instruction.json b/tests/test_stopbanks_waikanae/instruction.json index b857030b..f462b2c4 100644 --- a/tests/test_stopbanks_waikanae/instruction.json +++ b/tests/test_stopbanks_waikanae/instruction.json @@ -58,6 +58,7 @@ "extents": "catchment.geojson", "raw_dem": "raw_dem.nc", "stopbanks": [{"extents": "stopbanks/stopbank_polygon.geojson", "elevations": "stopbanks/stopbank_elevation.geojson"}], + "feature_masking": ["stopbank_masking_polygon.geojson"], "result_dem": "test_dem.nc", "benchmark": "benchmark.nc" }, @@ -71,7 +72,8 @@ "general": { "z_labels": {"stopbanks": "z"}, "lidar_classifications_to_keep": [2, 9], - "interpolation": {"no_data": "linear"} + "interpolation": {"no_data": "linear"}, + "compression": 7 } } } diff --git a/tests/test_stopbanks_waikanae/test_case.py b/tests/test_stopbanks_waikanae/test_case.py index 5be76e3a..90585eb2 100644 --- a/tests/test_stopbanks_waikanae/test_case.py +++ b/tests/test_stopbanks_waikanae/test_case.py @@ -45,21 +45,28 @@ def setUpClass(cls): "datasets": {"vector": {"linz": {"key": linz_key}}} } - # create fake catchment boundary + crs = cls.instructions["dem"]["output"]["crs"]["horizontal"] + + # create and save fake catchment boundary x0 = 1771850 y0 = 5472250 x1 = 1772150 y1 = 5472600 catchment = shapely.geometry.Polygon([(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) - catchment = geopandas.GeoSeries([catchment]) - catchment = catchment.set_crs( - cls.instructions["dem"]["output"]["crs"]["horizontal"] - ) - - # save faked catchment boundary - used as land boundary as well + catchment = geopandas.GeoDataFrame(geometry=[catchment], crs=crs) catchment_file = cls.results_dir / "catchment.geojson" catchment.to_file(catchment_file) + # create and save stopbank masking polygon + x0 = 1771866 + y0 = 5472488 + x1 = 1771882 + y1 = 5472568 + catchment = shapely.geometry.Polygon([(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) + catchment = geopandas.GeoDataFrame(geometry=[catchment], crs=crs) + catchment_file = cls.results_dir / "stopbank_masking_polygon.geojson" + catchment.to_file(catchment_file) + # Run pipeline - download files and generated DEM runner.from_instructions_dict(cls.instructions) @@ -77,17 +84,21 @@ def test_result_windows(self): ) with rioxarray.rioxarray.open_rasterio(file_path, masked=True) as test: test.load() - # Compare the generated and benchmark elevations - diff_array = ( - test.z.data[~numpy.isnan(test.z.data)] - - benchmark.z.data[~numpy.isnan(benchmark.z.data)] - ) - logging.info(f"DEM elevation diff is: {diff_array[diff_array != 0]}") + + # Comparisons numpy.testing.assert_array_almost_equal( test.z.data, benchmark.z.data, - err_msg="The generated result_geofabric has different elevation data from " - "the benchmark_dem", + decimal=1, + err_msg="The generated test has significantly different elevation from the " + f"benchmark.", + ) + + numpy.testing.assert_array_almost_equal( + test.data_source.data, + benchmark.data_source.data, + err_msg="The generated test data_source layer differs from the " + f"benchmark.", ) # explicitly free memory as xarray seems to be hanging onto memory @@ -110,32 +121,20 @@ def test_result_linux(self): ) with rioxarray.rioxarray.open_rasterio(file_path, masked=True) as test: test.load() - # Get data generated from LiDAR - lidar_mask = (test.data_source.data == 1) & (benchmark.data_source.data == 1) - # Compare the generated and benchmark elevations - lidar_diff = test.z.data[lidar_mask] - benchmark.z.data[lidar_mask] + # Comparisons numpy.testing.assert_array_almost_equal( - test.z.data[lidar_mask], - benchmark.z.data[lidar_mask], - decimal=6, + test.z.data, + benchmark.z.data, + decimal=1, err_msg="The generated test has significantly different elevation from the " - f"benchmark where there is LiDAR: {lidar_diff}", - ) - - diff_array = ( - test.z.data[~numpy.isnan(test.z.data)] - - benchmark.z.data[~numpy.isnan(test.z.data)] + f"benchmark.", ) - logging.info(f"DEM array diff is: {diff_array[diff_array != 0]}") - threshold = 10e-6 - percent = 2.5 - number_above_threshold = len(diff_array[numpy.abs(diff_array) > threshold]) - self.assertTrue( - number_above_threshold < len(diff_array) * percent / 100, - f"More than {percent}% of DEM values differ by more than {threshold} on Linux test" - f" run: {diff_array[numpy.abs(diff_array) > threshold]} or " - f"{number_above_threshold / len(diff_array.flatten()) * 100}%", + numpy.testing.assert_array_almost_equal( + test.data_source.data, + benchmark.data_source.data, + err_msg="The generated test data_source layer differs from the " + f"benchmark.", ) # explicitly free memory as xarray seems to be hanging onto memory From 811b77c0e718b85ca200dc522f5e57ad30e948b0 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Oct 2024 01:19:05 +0000 Subject: [PATCH 07/13] fixup: Format Python code with Black --- src/geofabrics/dem.py | 18 +++++++++++++----- tests/test_stopbanks_waikanae/test_case.py | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index 8420911b..8126eed2 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -356,11 +356,21 @@ def save_dem( self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs) if filename.suffix.lower() == ".nc": if compression is not None: - encoding_keys = ("_FillValue", "dtype", "scale_factor", "add_offset", "grid_mapping") + encoding_keys = ( + "_FillValue", + "dtype", + "scale_factor", + "add_offset", + "grid_mapping", + ) encoding = {} for key in dem.data_vars: dem[key] = dem[key].astype(geometry.RASTER_TYPE) - encoding[key] = {encoding_key: value for encoding_key, value in dem[key].encoding.items() if encoding_key in encoding_keys} + encoding[key] = { + encoding_key: value + for encoding_key, value in dem[key].encoding.items() + if encoding_key in encoding_keys + } if "dtype" not in encoding[key]: encoding[key]["dtype"] = dem[key].dtype encoding[key] = {**encoding[key], **compression} @@ -1064,9 +1074,7 @@ def clip_within_polygon(self, polygon_paths: list, label: str): ) clip_polygon = [] for path in polygon_paths: - clip_polygon.append( - geopandas.read_file(path).to_crs(crs["horizontal"]) - ) + clip_polygon.append(geopandas.read_file(path).to_crs(crs["horizontal"])) clip_polygon = pandas.concat(clip_polygon).dissolve() clip_polygon = clip_polygon.clip(dem_bounds) if clip_polygon.area.sum() > self.catchment_geometry.resolution**2: diff --git a/tests/test_stopbanks_waikanae/test_case.py b/tests/test_stopbanks_waikanae/test_case.py index 90585eb2..104c7eb2 100644 --- a/tests/test_stopbanks_waikanae/test_case.py +++ b/tests/test_stopbanks_waikanae/test_case.py @@ -93,7 +93,7 @@ def test_result_windows(self): err_msg="The generated test has significantly different elevation from the " f"benchmark.", ) - + numpy.testing.assert_array_almost_equal( test.data_source.data, benchmark.data_source.data, From 86b386ccf12bebfdbb296b611d63160a34d1ad70 Mon Sep 17 00:00:00 2001 From: rosepearson Date: Tue, 29 Oct 2024 15:57:50 +1300 Subject: [PATCH 08/13] reversed changes to saving netcdf files --- src/geofabrics/dem.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index 8126eed2..d087bc24 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -356,24 +356,11 @@ def save_dem( self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs) if filename.suffix.lower() == ".nc": if compression is not None: - encoding_keys = ( - "_FillValue", - "dtype", - "scale_factor", - "add_offset", - "grid_mapping", - ) + compression["grid_mapping"] = dem.encoding["grid_mapping"] encoding = {} for key in dem.data_vars: - dem[key] = dem[key].astype(geometry.RASTER_TYPE) - encoding[key] = { - encoding_key: value - for encoding_key, value in dem[key].encoding.items() - if encoding_key in encoding_keys - } - if "dtype" not in encoding[key]: - encoding[key]["dtype"] = dem[key].dtype - encoding[key] = {**encoding[key], **compression} + compression["dtype"] = dem[key].dtype + encoding[key] = compression dem.to_netcdf( filename, format="NETCDF4", engine="netcdf4", encoding=encoding ) From 2fd3ca558fecd4e505e2d5cb7acbccb744cfbdd8 Mon Sep 17 00:00:00 2001 From: rosepearson Date: Tue, 29 Oct 2024 16:28:33 +1300 Subject: [PATCH 09/13] tidyup bbox definition for osm --- src/geofabrics/processor.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 640c0e7b..6689b55f 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -3542,12 +3542,7 @@ def load_waterways(self) -> bool: # Construct query query = OSMPythonTools.overpass.overpassQueryBuilder( - bbox=[ - bbox_lat_long.bounds.miny[0], - bbox_lat_long.bounds.minx[0], - bbox_lat_long.bounds.maxy[0], - bbox_lat_long.bounds.maxx[0], - ], + bbox=list(bbox_lat_long.total_bounds[[1,0,3,2]]), elementType="way", selector="waterway", out="body", @@ -3941,12 +3936,7 @@ def load_stopbanks(self) -> bool: elif "osm" == source: # Download from OSM and save bbox_lat_long = self.catchment_geometry.catchment.to_crs(self.OSM_CRS) - bbox = [ - bbox_lat_long.bounds.miny[0], - bbox_lat_long.bounds.minx[0], - bbox_lat_long.bounds.maxy[0], - bbox_lat_long.bounds.maxx[0], - ] + bbox = list(bbox_lat_long.total_bounds[[1,0,3,2]]) element_dict = { "geometry": [], "OSM_id": [], From 2273666d739783e957554c8ce40884550283384f Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Oct 2024 03:29:09 +0000 Subject: [PATCH 10/13] fixup: Format Python code with Black --- src/geofabrics/processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 6689b55f..2023bae5 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -3542,7 +3542,7 @@ def load_waterways(self) -> bool: # Construct query query = OSMPythonTools.overpass.overpassQueryBuilder( - bbox=list(bbox_lat_long.total_bounds[[1,0,3,2]]), + bbox=list(bbox_lat_long.total_bounds[[1, 0, 3, 2]]), elementType="way", selector="waterway", out="body", @@ -3936,7 +3936,7 @@ def load_stopbanks(self) -> bool: elif "osm" == source: # Download from OSM and save bbox_lat_long = self.catchment_geometry.catchment.to_crs(self.OSM_CRS) - bbox = list(bbox_lat_long.total_bounds[[1,0,3,2]]) + bbox = list(bbox_lat_long.total_bounds[[1, 0, 3, 2]]) element_dict = { "geometry": [], "OSM_id": [], From cbe9bb8d655d9e0ca40441d074aec03f72e99da4 Mon Sep 17 00:00:00 2001 From: rosepearson Date: Wed, 30 Oct 2024 08:38:42 +1300 Subject: [PATCH 11/13] Updated saving to hopefully ensure compression --- src/geofabrics/dem.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index d087bc24..da8406a3 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -353,14 +353,19 @@ def save_dem( ), "all DataArray variables of a xarray.Dataset must have a CRS" try: + for key in dem.data_vars: + dem[key] = dem[key].astype(geometry.RASTER_TYPE) self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs) if filename.suffix.lower() == ".nc": if compression is not None: - compression["grid_mapping"] = dem.encoding["grid_mapping"] + encoding_keys = ("_FillValue", "dtype", "scale_factor", "add_offset", "grid_mapping") encoding = {} for key in dem.data_vars: - compression["dtype"] = dem[key].dtype - encoding[key] = compression + encoding[key] = {encoding_key: value for encoding_key, value + in dem[key].encoding.items() if encoding_key in encoding_keys} + if "dtype" not in encoding[key]: + encoding[key]["dtype"] = dem[key].dtype + encoding[key] = {**encoding[key], **compression} dem.to_netcdf( filename, format="NETCDF4", engine="netcdf4", encoding=encoding ) From ce97dc4bf838b6ee3e9824d8c892ee49b250b679 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Oct 2024 19:52:21 +0000 Subject: [PATCH 12/13] fixup: Format Python code with Black --- src/geofabrics/dem.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index da8406a3..8b90e147 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -358,11 +358,20 @@ def save_dem( self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs) if filename.suffix.lower() == ".nc": if compression is not None: - encoding_keys = ("_FillValue", "dtype", "scale_factor", "add_offset", "grid_mapping") + encoding_keys = ( + "_FillValue", + "dtype", + "scale_factor", + "add_offset", + "grid_mapping", + ) encoding = {} for key in dem.data_vars: - encoding[key] = {encoding_key: value for encoding_key, value - in dem[key].encoding.items() if encoding_key in encoding_keys} + encoding[key] = { + encoding_key: value + for encoding_key, value in dem[key].encoding.items() + if encoding_key in encoding_keys + } if "dtype" not in encoding[key]: encoding[key]["dtype"] = dem[key].dtype encoding[key] = {**encoding[key], **compression} From e79459dda049f04a1a8c3e0176f08c1b562b13ca Mon Sep 17 00:00:00 2001 From: rosepearson Date: Wed, 30 Oct 2024 10:04:37 +1300 Subject: [PATCH 13/13] Updated package version --- pyproject.toml | 2 +- src/geofabrics/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6f176fa9..fc080aed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "geofabrics" -version = "1.1.22" +version = "1.1.23" description = "A package for creating geofabrics for flood modelling." readme = "README.md" authors = [{ name = "Rose pearson", email = "rose.pearson@niwa.co.nz" }] diff --git a/src/geofabrics/version.py b/src/geofabrics/version.py index 71e221c6..fda6eb8c 100644 --- a/src/geofabrics/version.py +++ b/src/geofabrics/version.py @@ -3,4 +3,4 @@ Contains the package version information """ -__version__ = "1.1.22" +__version__ = "1.1.23"