diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 7fe024637..0f4f70b5d 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -24,6 +24,8 @@ jobs: checks: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@branch-23.06 + with: + enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8929af177..a171c0872 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,6 +42,11 @@ repos: - id: codespell args: ["--config pyproject.toml"] additional_dependencies: ["tomli"] + - repo: https://github.com/rapidsai/dependency-file-generator + rev: v1.5.1 + hooks: + - id: rapids-dependency-file-generator + args: ["--clean"] default_language_version: python: python3 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 558001477..b3c4190b5 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -23,10 +23,12 @@ dependencies: - myst-parser - nbsphinx - ninja +- notebook - numpydoc - nvcc_linux-64=11.8 - pre-commit - pydata-sphinx-theme +- pydeck - pytest - pytest-cov - pytest-xdist @@ -34,6 +36,7 @@ dependencies: - rmm=23.06 - scikit-build>=0.13.1 - setuptools +- shapely - sphinx<6 - sysroot_linux-64==2.17 name: all_cuda-118_arch-x86_64 diff --git a/cpp/benchmarks/floating_point_equality.cu b/cpp/benchmarks/floating_point_equality.cu index 33f3e75b8..f0f99c4d8 100644 --- a/cpp/benchmarks/floating_point_equality.cu +++ b/cpp/benchmarks/floating_point_equality.cu @@ -45,14 +45,14 @@ template void generate_floats(FloatsIter begin, FloatsIter end) { using T = typename std::iterator_traits::value_type; - auto engine_x = deterministic_engine(std::distance(begin, end)); + auto engine_x = cuspatial::test::deterministic_engine(std::distance(begin, end)); auto lo = std::numeric_limits::min(); auto hi = std::numeric_limits::max(); - auto x_dist = make_uniform_dist(lo, hi); + auto x_dist = cuspatial::test::make_uniform_dist(lo, hi); - auto x_gen = value_generator{lo, hi, engine_x, x_dist}; + auto x_gen = cuspatial::test::value_generator{lo, hi, engine_x, x_dist}; thrust::tabulate( rmm::exec_policy(), begin, end, [x_gen] __device__(size_t n) mutable { return x_gen(n); }); diff --git a/cpp/benchmarks/points_in_range.cu b/cpp/benchmarks/points_in_range.cu index 06bf17d44..442fc18f4 100644 --- a/cpp/benchmarks/points_in_range.cu +++ b/cpp/benchmarks/points_in_range.cu @@ -15,6 +15,7 @@ */ #include + #include #include @@ -54,14 +55,14 @@ using namespace cuspatial; template void generate_points(PointsIter begin, PointsIter end, vec_2d range_min, vec_2d range_max) { - auto engine_x = deterministic_engine(std::distance(begin, end)); - auto engine_y = deterministic_engine(2 * std::distance(begin, end)); + auto engine_x = cuspatial::test::deterministic_engine(std::distance(begin, end)); + auto engine_y = cuspatial::test::deterministic_engine(2 * std::distance(begin, end)); - auto x_dist = make_uniform_dist(range_min.x, range_max.x); - auto y_dist = make_uniform_dist(range_min.y, range_max.y); + auto x_dist = cuspatial::test::make_uniform_dist(range_min.x, range_max.x); + auto y_dist = cuspatial::test::make_uniform_dist(range_min.y, range_max.y); - auto x_gen = value_generator{range_min.x, range_max.x, engine_x, x_dist}; - auto y_gen = value_generator{range_min.y, range_max.y, engine_y, y_dist}; + auto x_gen = cuspatial::test::value_generator{range_min.x, range_max.x, engine_x, x_dist}; + auto y_gen = cuspatial::test::value_generator{range_min.y, range_max.y, engine_y, y_dist}; thrust::tabulate(rmm::exec_policy(), begin, end, [x_gen, y_gen] __device__(size_t n) mutable { return vec_2d{x_gen(n), y_gen(n)}; diff --git a/cpp/include/cuspatial/experimental/detail/algorithm/is_point_in_polygon.cuh b/cpp/include/cuspatial/experimental/detail/algorithm/is_point_in_polygon.cuh index 9b506c145..73c3ebd29 100644 --- a/cpp/include/cuspatial/experimental/detail/algorithm/is_point_in_polygon.cuh +++ b/cpp/include/cuspatial/experimental/detail/algorithm/is_point_in_polygon.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cuspatial/experimental/detail/join/get_quad_and_local_point_indices.cuh b/cpp/include/cuspatial/experimental/detail/join/get_quad_and_local_point_indices.cuh index 39ee2d8b0..3c73a10e8 100644 --- a/cpp/include/cuspatial/experimental/detail/join/get_quad_and_local_point_indices.cuh +++ b/cpp/include/cuspatial/experimental/detail/join/get_quad_and_local_point_indices.cuh @@ -23,8 +23,11 @@ namespace cuspatial { namespace detail { -inline __device__ std::pair get_quad_and_local_point_indices( - uint32_t const global_index, uint32_t const* point_offsets, uint32_t const* point_offsets_end) +template +inline __device__ std::pair get_quad_and_local_point_indices( + IndexType const global_index, + PointOffsetIterator point_offsets_begin, + PointOffsetIterator point_offsets_end) { // Calculate the position in "point_offsets" that `global_index` falls between. // This position is the index of the poly/quad pair for this `global_index`. @@ -33,10 +36,10 @@ inline __device__ std::pair get_quad_and_local_point_indices // quadrant. Adding this zero-based position to the quadrant's first point position in the // quadtree yields the "global" position in the `point_indices` map. auto const local_point_offset = - thrust::upper_bound(thrust::seq, point_offsets, point_offsets_end, global_index) - 1; + thrust::upper_bound(thrust::seq, point_offsets_begin, point_offsets_end, global_index) - 1; return std::make_pair( // quad_poly_index - thrust::distance(point_offsets, local_point_offset), + thrust::distance(point_offsets_begin, local_point_offset), // local_point_index global_index - *local_point_offset); } diff --git a/cpp/include/cuspatial/experimental/detail/join/intersection.cuh b/cpp/include/cuspatial/experimental/detail/join/intersection.cuh index cb297ba7d..e6fbc03e4 100644 --- a/cpp/include/cuspatial/experimental/detail/join/intersection.cuh +++ b/cpp/include/cuspatial/experimental/detail/join/intersection.cuh @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include #include @@ -69,37 +71,32 @@ inline int32_t remove_non_quad_intersections(InputIterator input_begin, } template -inline std::pair find_intersections(KeyIterator keys_first, - LevelIterator levels_first, - IsInternalIterator is_internal_node_first, +inline std::pair find_intersections(point_quadtree_ref quadtree, BoundingBoxIterator bounding_box_first, NodeIndicesIterator node_indices, BBoxIndicesIterator bbox_indices, NodePairsIterator node_pairs, LeafPairsIterator leaf_pairs, int32_t num_pairs, - T x_min, - T y_min, + vec_2d const& v_min, T scale, int8_t max_depth, rmm::cuda_stream_view stream) { - auto nodes_first = thrust::make_zip_iterator(keys_first, levels_first, is_internal_node_first); + auto nodes_first = thrust::make_zip_iterator( + quadtree.key_begin(), quadtree.level_begin(), quadtree.internal_node_flag_begin()); thrust::transform( rmm::exec_policy(stream), thrust::make_zip_iterator(node_indices, bbox_indices), thrust::make_zip_iterator(node_indices, bbox_indices) + num_pairs, node_pairs, - [x_min, y_min, scale, max_depth, nodes = nodes_first, bboxes = bounding_box_first] __device__( + [v_min, scale, max_depth, nodes = nodes_first, bboxes = bounding_box_first] __device__( auto const& node_and_bbox) { auto const& node_idx = thrust::get<0>(node_and_bbox); auto const& bbox_idx = thrust::get<1>(node_and_bbox); @@ -109,24 +106,20 @@ inline std::pair find_intersections(KeyIterator keys_first, uint8_t const& level = thrust::get<1>(node); uint8_t const& is_internal_node = thrust::get<2>(node); - auto const& bbox = bboxes[bbox_idx]; - auto const& bbox_min = bbox.v1; - auto const& bbox_max = bbox.v2; - auto const& poly_x_min = bbox_min.x; - auto const& poly_y_min = bbox_min.y; - auto const& poly_x_max = bbox_max.x; - auto const& poly_y_max = bbox_max.y; + box const bbox = bboxes[bbox_idx]; + vec_2d const bbox_min = bbox.v1; + vec_2d const bbox_max = bbox.v2; T const key_x = utility::z_order_x(key); T const key_y = utility::z_order_y(key); T const level_scale = scale * (1 << (max_depth - 1 - level)); - T const node_x_min = x_min + (key_x + 0) * level_scale; - T const node_y_min = y_min + (key_y + 0) * level_scale; - T const node_x_max = x_min + (key_x + 1) * level_scale; - T const node_y_max = y_min + (key_y + 1) * level_scale; + T const node_x_min = v_min.x + (key_x + 0) * level_scale; + T const node_y_min = v_min.y + (key_y + 0) * level_scale; + T const node_x_max = v_min.x + (key_x + 1) * level_scale; + T const node_y_max = v_min.y + (key_y + 1) * level_scale; - if ((node_x_min > poly_x_max) || (node_x_max < poly_x_min) || (node_y_min > poly_y_max) || - (node_y_max < poly_y_min)) { + if ((node_x_min > bbox_max.x) || (node_x_max < bbox_min.x) || (node_y_min > bbox_max.y) || + (node_y_max < bbox_min.y)) { // if no overlap, return type = none_indicator return thrust::make_tuple(none_indicator, level, node_idx, bbox_idx); } diff --git a/cpp/include/cuspatial/experimental/detail/point_quadtree.cuh b/cpp/include/cuspatial/experimental/detail/point_quadtree.cuh index fb401a2bb..3cda7a0dc 100644 --- a/cpp/include/cuspatial/experimental/detail/point_quadtree.cuh +++ b/cpp/include/cuspatial/experimental/detail/point_quadtree.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -241,8 +241,8 @@ std::pair, point_quadtree> quadtree_on_points( T scale, int8_t max_depth, int32_t max_size, - rmm::mr::device_memory_resource* mr, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto num_points = thrust::distance(points_first, points_last); if (num_points <= 0) { diff --git a/cpp/include/cuspatial/experimental/detail/quadtree_bbox_filtering.cuh b/cpp/include/cuspatial/experimental/detail/quadtree_bbox_filtering.cuh index 5145fdba2..c6768720a 100644 --- a/cpp/include/cuspatial/experimental/detail/quadtree_bbox_filtering.cuh +++ b/cpp/include/cuspatial/experimental/detail/quadtree_bbox_filtering.cuh @@ -29,38 +29,27 @@ namespace cuspatial { -template +template std::pair, rmm::device_uvector> -join_quadtree_and_bounding_boxes(KeyIterator keys_first, - KeyIterator keys_last, - LevelIterator levels_first, - IsInternalIterator is_internal_nodes_first, - KeyIterator lengths_first, - KeyIterator offsets_first, +join_quadtree_and_bounding_boxes(point_quadtree_ref quadtree, BoundingBoxIterator bounding_boxes_first, BoundingBoxIterator bounding_boxes_last, - T x_min, - T y_min, + vec_2d const& v_min, T scale, int8_t max_depth, - rmm::mr::device_memory_resource* mr, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { static_assert(is_same>(), "Iterator value_type mismatch"); - auto const num_nodes = std::distance(keys_first, keys_last); auto const num_boxes = std::distance(bounding_boxes_first, bounding_boxes_last); // Count the number of top-level nodes to start. // This could be provided explicitly, but count_if should be fast enough. auto num_top_level_leaves = thrust::count_if(rmm::exec_policy(stream), - levels_first, - levels_first + num_nodes, + quadtree.level_begin(), + quadtree.level_end(), thrust::placeholders::_1 == 0); auto num_pairs = num_top_level_leaves * num_boxes; @@ -96,9 +85,7 @@ join_quadtree_and_bounding_boxes(KeyIterator keys_first, // Find intersections for all the top level quadrants and bounding boxes std::tie(num_parents, num_leaves) = - detail::find_intersections(keys_first, - levels_first, - is_internal_nodes_first, + detail::find_intersections(quadtree, bounding_boxes_first, // The top-level node indices detail::make_counting_transform_iterator( @@ -111,8 +98,7 @@ join_quadtree_and_bounding_boxes(KeyIterator keys_first, // found intersecting quadrant and bbox indices for output make_output_values_iter(), num_pairs, - x_min, - y_min, + v_min, scale, max_depth, stream); @@ -141,8 +127,8 @@ join_quadtree_and_bounding_boxes(KeyIterator keys_first, // Walk one level down and fill the current level's vectors with // the next level's quadrant info and bbox indices. std::tie(num_pairs, cur_types, cur_levels, cur_node_idxs, cur_bbox_idxs) = - detail::descend_quadtree(lengths_first, - offsets_first, + detail::descend_quadtree(quadtree.length_begin(), + quadtree.offset_begin(), num_parents, cur_types, cur_levels, @@ -152,9 +138,7 @@ join_quadtree_and_bounding_boxes(KeyIterator keys_first, // Find intersections for the the next level's quadrants and bboxes std::tie(num_parents, num_leaves) = - detail::find_intersections(keys_first, - levels_first, - is_internal_nodes_first, + detail::find_intersections(quadtree, bounding_boxes_first, cur_node_idxs.begin(), cur_bbox_idxs.begin(), @@ -163,8 +147,7 @@ join_quadtree_and_bounding_boxes(KeyIterator keys_first, // found intersecting quadrant and bbox indices for output make_output_values_iter(), num_pairs, - x_min, - y_min, + v_min, scale, max_depth, stream); @@ -177,7 +160,8 @@ join_quadtree_and_bounding_boxes(KeyIterator keys_first, // Copy the relevant node offsets into a temporary vector so we don't modify the quadtree column rmm::device_uvector tmp_node_offsets(num_results, stream); - auto const iter = thrust::make_permutation_iterator(offsets_first, out_node_idxs.begin()); + auto const iter = + thrust::make_permutation_iterator(quadtree.offset_begin(), out_node_idxs.begin()); thrust::copy(rmm::exec_policy(stream), iter, iter + num_results, tmp_node_offsets.begin()); diff --git a/cpp/include/cuspatial/experimental/detail/quadtree_point_in_polygon.cuh b/cpp/include/cuspatial/experimental/detail/quadtree_point_in_polygon.cuh new file mode 100644 index 000000000..db507b76a --- /dev/null +++ b/cpp/include/cuspatial/experimental/detail/quadtree_point_in_polygon.cuh @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +namespace cuspatial { +namespace detail { + +template +struct compute_poly_and_point_indices { + QuadOffsetsIterator quad_point_offsets_begin; + PointOffsetsIterator point_offsets_begin; + PointOffsetsIterator point_offsets_end; + PolyIndexIterator poly_indices_begin; + + compute_poly_and_point_indices(QuadOffsetsIterator quad_point_offsets_begin, + PointOffsetsIterator point_offsets_begin, + PointOffsetsIterator point_offsets_end, + PolyIndexIterator poly_indices_begin) + : quad_point_offsets_begin(quad_point_offsets_begin), + point_offsets_begin(point_offsets_begin), + point_offsets_end(point_offsets_end), + poly_indices_begin(poly_indices_begin) + { + } + + using IndexType = iterator_value_type; + + inline thrust::tuple __device__ + operator()(IndexType const global_index) const + { + auto const [quad_poly_index, local_point_index] = + get_quad_and_local_point_indices(global_index, point_offsets_begin, point_offsets_end); + auto const point_idx = quad_point_offsets_begin[quad_poly_index] + local_point_index; + auto const poly_idx = poly_indices_begin[quad_poly_index]; + return thrust::make_tuple(poly_idx, point_idx); + } +}; + +template +struct test_poly_point_intersection { + using T = iterator_vec_base_type; + using IndexType = iterator_value_type; + + test_poly_point_intersection(PointIterator points_first, MultiPolygonRange polygons) + : points_first(points_first), polygons(polygons) + { + } + + PointIterator points_first; + MultiPolygonRange polygons; + + inline bool __device__ operator()(thrust::tuple const& poly_point_idxs) + { + auto const poly_idx = thrust::get<0>(poly_point_idxs); + auto const point_idx = thrust::get<1>(poly_point_idxs); + + vec_2d const& point = points_first[point_idx]; + + return is_point_in_polygon(point, polygons[poly_idx][0]); + } +}; + +} // namespace detail + +template +std::pair, rmm::device_uvector> quadtree_point_in_polygon( + PolyIndexIterator poly_indices_first, + PolyIndexIterator poly_indices_last, + QuadIndexIterator quad_indices_first, + point_quadtree_ref quadtree, + PointIndexIterator point_indices_first, + PointIndexIterator point_indices_last, + PointIterator points_first, + MultiPolygonRange polygons, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using T = iterator_vec_base_type; + + CUSPATIAL_EXPECTS(polygons.num_multipolygons() == polygons.num_polygons(), + "Only one polygon per multipolygon currently supported."); + + auto num_poly_quad_pairs = std::distance(poly_indices_first, poly_indices_last); + + auto quad_lengths_iter = + thrust::make_permutation_iterator(quadtree.length_begin(), quad_indices_first); + + auto quad_offsets_iter = + thrust::make_permutation_iterator(quadtree.offset_begin(), quad_indices_first); + + // Compute a "local" set of zero-based point offsets from number of points in each quadrant + // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by + // `inclusive_scan` is the total number of points to be tested against any polygon. + rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); + + // inclusive scan of quad_lengths_iter + thrust::inclusive_scan(rmm::exec_policy(stream), + quad_lengths_iter, + quad_lengths_iter + num_poly_quad_pairs, + local_point_offsets.begin() + 1); + + // Ensure local point offsets starts at 0 + IndexType init{0}; + local_point_offsets.set_element_async(0, init, stream); + + // The last element is the total number of points to test against any polygon. + auto num_total_points = local_point_offsets.back_element(stream); + + // Allocate the output polygon and point index pair vectors + rmm::device_uvector poly_indices(num_total_points, stream); + rmm::device_uvector point_indices(num_total_points, stream); + + auto poly_and_point_indices = + thrust::make_zip_iterator(poly_indices.begin(), point_indices.begin()); + + // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) + auto point_xys_iter = thrust::make_permutation_iterator(points_first, point_indices_first); + + // Compute the combination of polygon and point index pairs. For each polygon/quadrant pair, + // enumerate pairs of (poly_index, point_index) for each point in each quadrant. + // + // In Python pseudocode: + // ``` + // pp_pairs = [] + // for polygon, quadrant in pq_pairs: + // for point in quadrant: + // pp_pairs.append((polygon, point)) + // ``` + // + auto global_to_poly_and_point_indices = detail::make_counting_transform_iterator( + 0, + detail::compute_poly_and_point_indices{quad_offsets_iter, + local_point_offsets.begin(), + local_point_offsets.end(), + poly_indices_first}); + + // Compute the number of intersections by removing (poly, point) pairs that don't intersect + auto num_intersections = thrust::distance( + poly_and_point_indices, + thrust::copy_if(rmm::exec_policy(stream), + global_to_poly_and_point_indices, + global_to_poly_and_point_indices + num_total_points, + poly_and_point_indices, + detail::test_poly_point_intersection{point_xys_iter, polygons})); + + poly_indices.resize(num_intersections, stream); + poly_indices.shrink_to_fit(stream); + point_indices.resize(num_intersections, stream); + point_indices.shrink_to_fit(stream); + + return std::pair{std::move(poly_indices), std::move(point_indices)}; +} + +} // namespace cuspatial diff --git a/cpp/include/cuspatial/experimental/detail/ranges/multipolygon_range.cuh b/cpp/include/cuspatial/experimental/detail/ranges/multipolygon_range.cuh index 49b00a87b..f62b68044 100644 --- a/cpp/include/cuspatial/experimental/detail/ranges/multipolygon_range.cuh +++ b/cpp/include/cuspatial/experimental/detail/ranges/multipolygon_range.cuh @@ -104,7 +104,7 @@ multipolygon_range::m _point_end(point_end) { static_assert(is_vec_2d>(), - "Point iterator must be iterators to floating point vec_2d types."); + "point_begin and point_end must be iterators to floating point vec_2d types."); CUSPATIAL_EXPECTS_VALID_MULTIPOLYGON_SIZES( num_points(), num_multipolygons() + 1, num_polygons() + 1, num_rings() + 1); diff --git a/cpp/include/cuspatial/experimental/point_quadtree.cuh b/cpp/include/cuspatial/experimental/point_quadtree.cuh index bdb31def4..88253468b 100644 --- a/cpp/include/cuspatial/experimental/point_quadtree.cuh +++ b/cpp/include/cuspatial/experimental/point_quadtree.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,19 +35,104 @@ namespace cuspatial { * @{ */ +struct point_quadtree_ref; + struct point_quadtree { // uint32_t vector of quad node keys rmm::device_uvector key; // uint8_t vector of quadtree levels rmm::device_uvector level; // bool vector indicating whether the node is a parent (true) or leaf (false) node - rmm::device_uvector is_internal_node; + rmm::device_uvector internal_node_flag; // uint32_t vector for the number of child nodes (if is_internal_node), or number of points rmm::device_uvector length; // uint32_t vector for the first child position (if is_internal_node), or first point position rmm::device_uvector offset; }; +struct point_quadtree_ref { + using key_iterator = decltype(point_quadtree::key)::const_iterator; + using level_iterator = decltype(point_quadtree::level)::const_iterator; + using internal_node_flag_iterator = decltype(point_quadtree::internal_node_flag)::const_iterator; + using length_iterator = decltype(point_quadtree::length)::const_iterator; + using offset_iterator = decltype(point_quadtree::offset)::const_iterator; + + /// Construct from a point_quadtree struct + point_quadtree_ref(point_quadtree const& quadtree) + : _key_begin(quadtree.key.begin()), + _key_end(quadtree.key.end()), + _level_begin(quadtree.level.begin()), + _level_end(quadtree.level.end()), + _internal_node_flag_begin(quadtree.internal_node_flag.begin()), + _internal_node_flag_end(quadtree.internal_node_flag.end()), + _length_begin(quadtree.length.begin()), + _length_end(quadtree.length.end()), + _offset_begin(quadtree.offset.begin()), + _offset_end(quadtree.offset.end()) + { + } + + /// Construct from iterators and size + point_quadtree_ref(key_iterator key_begin, + key_iterator key_end, + level_iterator level_begin, + internal_node_flag_iterator internal_node_flag_begin, + length_iterator length_begin, + offset_iterator offset_begin) + : _key_begin(key_begin), + _key_end(key_end), + _level_begin(level_begin), + _level_end(level_begin + std::distance(key_begin, key_end)), + _internal_node_flag_begin(internal_node_flag_begin), + _internal_node_flag_end(internal_node_flag_begin + std::distance(key_begin, key_end)), + _length_begin(length_begin), + _length_end(length_begin + std::distance(key_begin, key_end)), + _offset_begin(offset_begin), + _offset_end(offset_begin + std::distance(key_begin, key_end)) + { + } + + /// Return the number of keys in the quadtree + CUSPATIAL_HOST_DEVICE auto num_nodes() const { return std::distance(_key_begin, _key_end); } + + /// Return iterator to the first ring of the quadtree + CUSPATIAL_HOST_DEVICE auto key_begin() const { return _key_begin; } + /// Return iterator to the last ring of the quadtree + CUSPATIAL_HOST_DEVICE auto key_end() const { return _key_end; } + + /// Return iterator to the first level of the quadtree + CUSPATIAL_HOST_DEVICE auto level_begin() const { return _level_begin; } + /// Return iterator to the last level of the quadtree + CUSPATIAL_HOST_DEVICE auto level_end() const { return _level_end; } + + /// Return iterator to the first internal node flag of the quadtree + CUSPATIAL_HOST_DEVICE auto internal_node_flag_begin() const { return _internal_node_flag_begin; } + /// Return iterator to the last internal node flag of the quadtree + CUSPATIAL_HOST_DEVICE auto internal_node_flag_end() const { return _internal_node_flag_end; } + + /// Return iterator to the first length of the quadtree + CUSPATIAL_HOST_DEVICE auto length_begin() const { return _length_begin; } + /// Return iterator to the last length of the quadtree + CUSPATIAL_HOST_DEVICE auto length_end() const { return _length_end; } + + /// Return iterator to the first child / point offset of the quadtree + CUSPATIAL_HOST_DEVICE auto offset_begin() const { return _offset_begin; } + /// Return iterator to the last child / point offset of the quadtree + CUSPATIAL_HOST_DEVICE auto offset_end() const { return _offset_end; } + + protected: + key_iterator _key_begin; + key_iterator _key_end; + level_iterator _level_begin; + level_iterator _level_end; + internal_node_flag_iterator _internal_node_flag_begin; + internal_node_flag_iterator _internal_node_flag_end; + length_iterator _length_begin; + length_iterator _length_end; + offset_iterator _offset_begin; + offset_iterator _offset_end; +}; + /** * @brief Construct a quadtree structure from points. * @@ -61,35 +146,39 @@ struct point_quadtree { * @note All intermediate quadtree nodes will have fewer than `max_size` number of points. Leaf * nodes are permitted (but not guaranteed) to have >= `max_size` number of points. * - * @param points Iterator of x, y coordinates for each point. + * @tparam PointIterator Iterator over x/y points. Must meet the requirements of + * [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible. + * @tparam T the floating-point coordinate value type of the input x/y points. + * + * @param points_first Iterator to the beginning of the range of (x, y) points. + * @param points_last Iterator to the end of the range of (x, y) points. * @param vertex_1 Vertex of the area of interest bounding box * @param vertex_2 Vertex of the area of interest bounding box opposite `vertex_1` * @param scale Scale to apply to each x and y distance from min.x and min.y. * @param max_depth Maximum quadtree depth. * @param max_size Maximum number of points allowed in a node before it's split into 4 leaf nodes. + * @param stream The CUDA stream on which to perform computations * @param mr The optional resource to use for output device memory allocations. * - * All input iterators must have a `value_type` of `cuspatial::vec_2d` (x/y coordinates), and the - * output iterator must be able to accept for storage values of type `cuspatial::vec_2d` - * (Cartesian coordinates). + * @return Pair of UINT32 column of sorted keys to point indices and a point_quadtree * - * @tparam PointIt Iterator over x/y points. Must meet the requirements of - * [LegacyRandomAccessIterator][LinkLRAI] and be device-accessible. - * @tparam T the floating-point coordinate value type of the input x/y points. + * @pre Point iterators must have the same `vec_2d` value type, with the same underlying + * floating-point coordinate type (e.g. `cuspatial::vec_2d`). * - * @return Pair of UINT32 column of sorted keys to point indices and a point_quadtree + * [LinkLRAI]: https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator + * "LegacyRandomAccessIterator" */ -template > +template > std::pair, point_quadtree> quadtree_on_points( - PointIt points_first, - PointIt points_last, + PointIterator points_first, + PointIterator points_last, vec_2d vertex_1, vec_2d vertex_2, T scale, int8_t max_depth, int32_t max_size, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @} // end of doxygen group diff --git a/cpp/include/cuspatial/experimental/ranges/multipoint_range.cuh b/cpp/include/cuspatial/experimental/ranges/multipoint_range.cuh index 1d7e0a36c..7117d28cc 100644 --- a/cpp/include/cuspatial/experimental/ranges/multipoint_range.cuh +++ b/cpp/include/cuspatial/experimental/ranges/multipoint_range.cuh @@ -20,6 +20,8 @@ #include #include +#include + namespace cuspatial { /** diff --git a/cpp/include/cuspatial/experimental/ranges/multipolygon_range.cuh b/cpp/include/cuspatial/experimental/ranges/multipolygon_range.cuh index a977e71dd..bb472f15e 100644 --- a/cpp/include/cuspatial/experimental/ranges/multipolygon_range.cuh +++ b/cpp/include/cuspatial/experimental/ranges/multipolygon_range.cuh @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace cuspatial { @@ -107,6 +108,18 @@ class multipolygon_range { /// Return the iterator to the one past the last point in the range. CUSPATIAL_HOST_DEVICE auto point_end(); + /// Return the iterator to the first part offset in the range. + CUSPATIAL_HOST_DEVICE auto part_offset_begin() { return _part_begin; } + + /// Return the iterator to the one past the last part offset in the range. + CUSPATIAL_HOST_DEVICE auto part_offset_end() { return _part_end; } + + /// Return the iterator to the first ring offset in the range. + CUSPATIAL_HOST_DEVICE auto ring_offset_begin() { return _ring_begin; } + + /// Return the iterator to the one past the last ring offset in the range. + CUSPATIAL_HOST_DEVICE auto ring_offset_end() { return _ring_end; } + /// Given the index of a segment, return the index of the geometry (multipolygon) that contains /// the segment. Segment index is the index to the starting point of the segment. If the index is /// the last point of the ring, then it is not a valid index. This function returns diff --git a/cpp/include/cuspatial/experimental/spatial_join.cuh b/cpp/include/cuspatial/experimental/spatial_join.cuh index 5e9b7198d..741dba1a8 100644 --- a/cpp/include/cuspatial/experimental/spatial_join.cuh +++ b/cpp/include/cuspatial/experimental/spatial_join.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,49 +42,94 @@ namespace cuspatial { * eventual number of levels may be less than `max_depth` if the number of points is small or * `max_size` is large. * - * @param keys_first: start quadtree key iterator - * @param keys_last: end of quadtree key iterator - * @param levels_first: start quadtree levels iterator - * @param is_internal_nodes_first: start quadtree is_internal_node iterator - * @param lengths_first: start quadtree length iterator - * @param offsets_first: start quadtree offset iterator + * @param quadtree: Reference to a quadtree created using point_quadtree() * @param bounding_boxes_first: start bounding boxes iterator * @param bounding_boxes_last: end of bounding boxes iterator - * @param x_min The lower-left x-coordinate of the area of interest bounding box. - * @param y_min The lower-left y-coordinate of the area of interest bounding box. + * @param v_min The lower-left (x, y) corner of the area of interest bounding box. * @param scale Scale to apply to each x and y distance from x_min and y_min. * @param max_depth Maximum quadtree depth at which to stop testing for intersections. + * @param stream The CUDA stream on which to perform computations * @param mr The optional resource to use for output device memory allocations. * - * @throw cuspatial::logic_error If scale is less than or equal to 0 - * @throw cuspatial::logic_error If max_depth is less than 1 or greater than 15 - * * @return A pair of UINT32 bounding box and leaf quadrant offset device vectors: * - bbox_offset - indices for each polygon/linestring bbox that intersects with the quadtree. * - quad_offset - indices for each leaf quadrant intersecting with a polygon/linestring bbox. + * + * @throw cuspatial::logic_error If scale is less than or equal to 0 + * @throw cuspatial::logic_error If max_depth is less than 1 or greater than 15 */ -template > std::pair, rmm::device_uvector> join_quadtree_and_bounding_boxes( - KeyIterator keys_first, - KeyIterator keys_last, - LevelIterator levels_first, - IsInternalIterator is_internal_nodes_first, - KeyIterator lengths_first, - KeyIterator offsets_first, + point_quadtree_ref quadtree, BoundingBoxIterator bounding_boxes_first, BoundingBoxIterator bounding_boxes_last, - T x_min, - T y_min, + vec_2d const& v_min, T scale, int8_t max_depth, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Test whether the specified points are inside any of the specified polygons. + * + * Uses the (polygon, quadrant) pairs returned by `cuspatial::join_quadtree_and_bounding_boxes` to + * ensure only the points in the same quadrant as each polygon are tested for intersection. + * + * This pre-filtering can dramatically reduce the number of points tested per polygon, enabling + * faster intersection testing at the expense of extra memory allocated to store the quadtree and + * sorted point_indices. + * + * @param poly_indices_first iterator to beginning of sequence of polygon indices returned by + * cuspatial::join_quadtree_and_bounding_boxes + * @param poly_indices_first iterator to end of sequence of polygon indices returned by + * cuspatial::join_quadtree_and_bounding_boxes + * @param quad_indices_first iterator to beginning of sequence of quadrant indices returned by + * cuspatial::join_quadtree_and_bounding_boxes + * @param quadtree: Reference to a quadtree created using point_quadtree() + * @param point_indices_first iterator to beginning of sequence of point indices returned by + * `cuspatial::quadtree_on_points` + * @param point_indices_last iterator to end of sequence of point indices returned by + * `cuspatial::quadtree_on_points` + * @param points_first iterator to beginning of sequence of (x, y) points to test + * @param polygons multipolygon_range of polygons. + * @param stream The CUDA stream on which to perform computations + * @param mr The optional resource to use for output device memory allocations. + * + * @throw cuspatial::logic_error If the number of rings is less than the number of polygons. + * @throw cuspatial::logic_error If any ring has fewer than four vertices. + * @throw cuspatial::logic_error if the number of multipolygons does not equal the total number of + * multipolygons (one polygon per multipolygon) + * + * @return A pair of rmm::device_uvectors where each row represents a point/polygon intersection: + * polygon_offset - uint32_t polygon indices + * point_offset - uint32_t point indices + * + * @note Currently only supports single-polygon multipolygons. + * @note The returned polygon and point indices are offsets into the `poly_quad_pairs` input range + * and `point_indices` range, respectively. + * + **/ +template > +std::pair, rmm::device_uvector> quadtree_point_in_polygon( + PolyIndexIterator poly_indices_first, + PolyIndexIterator poly_indices_last, + QuadIndexIterator quad_indices_first, + point_quadtree_ref quadtree, + PointIndexIterator point_indices_first, + PointIndexIterator point_indices_last, + PointIterator points_first, + MultiPolygonRange polygons, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace cuspatial #include +#include diff --git a/cpp/include/cuspatial_test/base_fixture.hpp b/cpp/include/cuspatial_test/base_fixture.hpp index 54f8be5f6..4e925025b 100644 --- a/cpp/include/cuspatial_test/base_fixture.hpp +++ b/cpp/include/cuspatial_test/base_fixture.hpp @@ -31,8 +31,8 @@ class RMMResourceMixin { public: /** - * @brief Returns pointer to `device_memory_resource` that should be used for - * all tests inheriting from this fixture + * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting + * from this fixture. * @return pointer to memory resource */ rmm::mr::device_memory_resource* mr() { return _mr; } diff --git a/cpp/include/cuspatial_test/random.cuh b/cpp/include/cuspatial_test/random.cuh index 5d8e2b29e..bff8ddabf 100644 --- a/cpp/include/cuspatial_test/random.cuh +++ b/cpp/include/cuspatial_test/random.cuh @@ -33,6 +33,10 @@ #include #include +namespace cuspatial { + +namespace test { + /** * @brief Identifies a probability distribution type. */ @@ -164,3 +168,7 @@ struct point_generator { * @brief LCG pseudo-random engine. */ auto deterministic_engine(unsigned seed) { return thrust::minstd_rand{seed}; } + +} // namespace test + +} // namespace cuspatial diff --git a/cpp/src/indexing/construction/point_quadtree.cu b/cpp/src/indexing/construction/point_quadtree.cu index 501bd7cae..a276d0b32 100644 --- a/cpp/src/indexing/construction/point_quadtree.cu +++ b/cpp/src/indexing/construction/point_quadtree.cu @@ -83,8 +83,8 @@ struct dispatch_construct_quadtree { static_cast(scale), max_depth, max_size, - mr, - stream); + stream, + mr); auto size = static_cast(tree.key.size()); @@ -94,7 +94,7 @@ struct dispatch_construct_quadtree { cols.push_back(std::make_unique( cudf::data_type{cudf::type_id::UINT8}, size, tree.level.release())); cols.push_back(std::make_unique( - cudf::data_type{cudf::type_id::BOOL8}, size, tree.is_internal_node.release())); + cudf::data_type{cudf::type_id::BOOL8}, size, tree.internal_node_flag.release())); cols.push_back(std::make_unique( cudf::data_type{cudf::type_id::UINT32}, size, tree.length.release())); cols.push_back(std::make_unique( diff --git a/cpp/src/join/quadtree_bbox_filtering.cu b/cpp/src/join/quadtree_bbox_filtering.cu index f95f69cf7..8c427fe63 100644 --- a/cpp/src/join/quadtree_bbox_filtering.cu +++ b/cpp/src/join/quadtree_bbox_filtering.cu @@ -47,12 +47,6 @@ struct dispatch_quadtree_bounding_box_join { rmm::mr::device_memory_resource* mr, rmm::cuda_stream_view stream) { - auto const keys = quadtree.column(0); // uint32_t - auto const levels = quadtree.column(1); // uint8_t - auto const is_internal = quadtree.column(2); // uint8_t - auto const lengths = quadtree.column(3); // uint32_t - auto const offsets = quadtree.column(4); // uint32_t - auto bbox_min = cuspatial::make_vec_2d_iterator(bbox.column(0).template begin(), bbox.column(1).template begin()); auto bbox_max = cuspatial::make_vec_2d_iterator(bbox.column(2).template begin(), @@ -60,20 +54,22 @@ struct dispatch_quadtree_bounding_box_join { auto bbox_itr = cuspatial::make_box_iterator(bbox_min, bbox_max); - auto [bbox_offset, quad_offset] = join_quadtree_and_bounding_boxes(keys.begin(), - keys.end(), - levels.begin(), - is_internal.begin(), - lengths.begin(), - offsets.begin(), - bbox_itr, - bbox_itr + bbox.num_rows(), - static_cast(x_min), - static_cast(y_min), - static_cast(scale), - max_depth, - mr, - stream); + auto quadtree_ref = point_quadtree_ref(quadtree.column(0).begin(), // keys + quadtree.column(0).end(), + quadtree.column(1).begin(), // levels + quadtree.column(2).begin(), // is_internal_node + quadtree.column(3).begin(), // lengths + quadtree.column(4).begin()); // offsets + + auto [bbox_offset, quad_offset] = join_quadtree_and_bounding_boxes( + quadtree_ref, + bbox_itr, + bbox_itr + bbox.num_rows(), + cuspatial::vec_2d{static_cast(x_min), static_cast(y_min)}, + static_cast(scale), + max_depth, + stream, + mr); std::vector> cols{}; cols.push_back(std::make_unique(std::move(bbox_offset))); diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 8142e87ba..87b99f28e 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -14,85 +14,24 @@ * limitations under the License. */ -#include -#include +#include +#include +#include +#include -#include -#include - -#include #include #include -#include -#include #include #include #include #include #include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace cuspatial { namespace detail { namespace { - -template -struct compute_poly_and_point_indices { - QuadOffsetsIter quad_point_offsets; - uint32_t const* point_offsets; - uint32_t const* point_offsets_end; - cudf::column_device_view const poly_indices; - - inline thrust::tuple __device__ - operator()(cudf::size_type const global_index) const - { - // uint32_t quad_poly_index, local_point_index; - auto const [quad_poly_index, local_point_index] = - get_quad_and_local_point_indices(global_index, point_offsets, point_offsets_end); - uint32_t const point_idx = quad_point_offsets[quad_poly_index] + local_point_index; - uint32_t const poly_idx = poly_indices.element(quad_poly_index); - return thrust::make_tuple(poly_idx, point_idx); - } -}; - -template -struct test_poly_point_intersection { - PointIter points; - cudf::column_device_view const poly_offsets; - cudf::column_device_view const ring_offsets; - cudf::column_device_view const poly_points_x; - cudf::column_device_view const poly_points_y; - - inline bool __device__ operator()(thrust::tuple const& poly_point_idxs) - { - auto& poly_idx = thrust::get<0>(poly_point_idxs); - auto& point_idx = thrust::get<1>(poly_point_idxs); - auto point = points[point_idx]; - return is_point_in_polygon(thrust::get<0>(point), - thrust::get<1>(point), - poly_idx, - poly_offsets, - ring_offsets, - poly_points_x, - poly_points_y); - } -}; - struct compute_quadtree_point_in_polygon { template std::enable_if_t::value, std::unique_ptr> operator()( @@ -115,100 +54,45 @@ struct compute_quadtree_point_in_polygon { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // Wrapped in an IIFE so `local_point_offsets` is freed on return - auto [poly_idxs, point_idxs, num_intersections] = [&]() { - auto quad_lengths = quadtree.column(3); - auto quad_offsets = quadtree.column(4); - auto poly_indices = poly_quad_pairs.column(0); - auto quad_indices = poly_quad_pairs.column(1); - auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); - - auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin(), - quad_indices.begin()); - - auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin(), - quad_indices.begin()); - - // Compute a "local" set of zero-based point offsets from number of points in each quadrant - // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by - // `inclusive_scan` is the total number of points to be tested against any polygon. - rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); - - thrust::inclusive_scan(rmm::exec_policy(stream), - quad_lengths_iter, - quad_lengths_iter + num_poly_quad_pairs, - local_point_offsets.begin() + 1); - - // Ensure local point offsets starts at 0 - uint32_t init{0}; - local_point_offsets.set_element_async(0, init, stream); - - // The last element is the total number of points to test against any polygon. - auto num_total_points = local_point_offsets.back_element(stream); - - // Allocate memory for the polygon and point index pairs - rmm::device_uvector poly_idxs(num_total_points, stream); - rmm::device_uvector point_idxs(num_total_points, stream); - - auto poly_and_point_indices = - thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); - - // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) - auto point_xys_iter = thrust::make_permutation_iterator( - thrust::make_zip_iterator(point_x.begin(), point_y.begin()), - point_indices.begin()); - - // Compute the combination of polygon and point index pairs. For each polygon/quadrant pair, - // enumerate pairs of (poly_index, point_index) for each point in each quadrant. - // - // In Python pseudocode: - // ``` - // pp_pairs = [] - // for polygon, quadrant in pq_pairs: - // for point in quadrant: - // pp_pairs.append((polygon, point)) - // ``` - // - auto global_to_poly_and_point_indices = detail::make_counting_transform_iterator( - 0, - compute_poly_and_point_indices{ - quad_offsets_iter, - local_point_offsets.begin(), - local_point_offsets.end(), - *cudf::column_device_view::create(poly_indices, stream)}); - - // Compute the number of intersections by removing (poly, point) pairs that don't intersect - auto num_intersections = thrust::distance( - poly_and_point_indices, - thrust::copy_if(rmm::exec_policy(stream), - global_to_poly_and_point_indices, - global_to_poly_and_point_indices + num_total_points, - poly_and_point_indices, - test_poly_point_intersection{ - point_xys_iter, - *cudf::column_device_view::create(poly_offsets, stream), - *cudf::column_device_view::create(ring_offsets, stream), - *cudf::column_device_view::create(poly_points_x, stream), - *cudf::column_device_view::create(poly_points_y, stream)})); - - return std::make_tuple(std::move(poly_idxs), std::move(point_idxs), num_intersections); - }(); + auto poly_indices = poly_quad_pairs.column(0); + auto quad_indices = poly_quad_pairs.column(1); + + auto quadtree_ref = point_quadtree_ref(quadtree.column(0).begin(), // keys + quadtree.column(0).end(), + quadtree.column(1).begin(), // levels + quadtree.column(2).begin(), // is_internal_node + quadtree.column(3).begin(), // lengths + quadtree.column(4).begin()); // offsets + + auto multipolygons = + multipolygon_range(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(poly_offsets.size()), + poly_offsets.begin(), + poly_offsets.end(), + ring_offsets.begin(), + ring_offsets.end(), + make_vec_2d_iterator(poly_points_x.begin(), poly_points_y.begin()), + make_vec_2d_iterator(poly_points_x.end(), poly_points_y.end())); + + auto [poly_idx, point_idx] = + quadtree_point_in_polygon(poly_indices.begin(), + poly_indices.end(), + quad_indices.begin(), + quadtree_ref, + point_indices.begin(), + point_indices.end(), + make_vec_2d_iterator(point_x.begin(), point_y.begin()), + multipolygons, + stream, + mr); // Allocate output columns for the number of pairs that intersected - auto poly_idx_col = make_fixed_width_column(num_intersections, stream, mr); - auto point_idx_col = make_fixed_width_column(num_intersections, stream, mr); - auto poly_and_point_indices = thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); + auto num_intersections = poly_idx.size(); - // Note: no need to resize `poly_idxs` or `point_idxs` if we set the end iterator to - // `idxs.begin() + num_intersections`. - - // populate the polygon and point indices columns - thrust::copy( - rmm::exec_policy(stream), - poly_and_point_indices, - poly_and_point_indices + num_intersections, - thrust::make_zip_iterator(poly_idx_col->mutable_view().template begin(), - point_idx_col->mutable_view().template begin())); + auto poly_idx_col = std::make_unique( + cudf::data_type{cudf::type_id::UINT32}, num_intersections, poly_idx.release()); + auto point_idx_col = std::make_unique( + cudf::data_type{cudf::type_id::UINT32}, num_intersections, point_idx.release()); std::vector> cols{}; cols.reserve(2); @@ -268,15 +152,15 @@ std::unique_ptr quadtree_point_in_polygon(cudf::table_view const& p CUSPATIAL_EXPECTS(poly_points_x.size() == poly_points_y.size(), "numbers of vertices must be the same for both x and y columns"); - CUSPATIAL_EXPECTS_VALID_POLYGON_SIZES( - poly_points_x.size(), poly_offsets.size(), ring_offsets.size()); - CUSPATIAL_EXPECTS(poly_points_x.type() == poly_points_y.type(), "polygon columns must have the same data type"); CUSPATIAL_EXPECTS(point_x.type() == point_y.type(), "point columns must have the same data type"); CUSPATIAL_EXPECTS(point_x.type() == poly_points_x.type(), "points and polygons must have the same data type"); + CUSPATIAL_EXPECTS(poly_offsets.type() == ring_offsets.type(), + "offset columns must have the same data type"); + if (poly_quad_pairs.num_rows() == 0 || quadtree.num_rows() == 0 || point_indices.size() == 0 || poly_offsets.size() == 0) { std::vector> cols{}; diff --git a/cpp/src/utility/point_in_polygon.cuh b/cpp/src/utility/point_in_polygon.cuh deleted file mode 100644 index 75b49ef3f..000000000 --- a/cpp/src/utility/point_in_polygon.cuh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include - -namespace cuspatial { -namespace detail { - -template -inline __device__ bool is_point_in_polygon(T const x, - T const y, - cudf::size_type const poly_idx, - cudf::column_device_view const& poly_offsets, - cudf::column_device_view const& ring_offsets, - cudf::column_device_view const& poly_points_x, - cudf::column_device_view const& poly_points_y) -{ - bool in_polygon = false; - uint32_t poly_begin = poly_offsets.element(poly_idx); - uint32_t poly_end = poly_idx < poly_offsets.size() - 1 - ? poly_offsets.element(poly_idx + 1) - : ring_offsets.size(); - - for (uint32_t ring_idx = poly_begin; ring_idx < poly_end; ring_idx++) // for each ring - { - auto ring_begin = ring_offsets.element(ring_idx); - auto ring_end = ring_idx < ring_offsets.size() - 1 - ? ring_offsets.element(ring_idx + 1) - : poly_points_x.size(); - auto ring_len = ring_end - ring_begin; - for (auto point_idx = 0; point_idx < ring_len; ++point_idx) { - T x0 = poly_points_x.element(ring_begin + ((point_idx + 0) % ring_len)); - T y0 = poly_points_y.element(ring_begin + ((point_idx + 0) % ring_len)); - T x1 = poly_points_x.element(ring_begin + ((point_idx + 1) % ring_len)); - T y1 = poly_points_y.element(ring_begin + ((point_idx + 1) % ring_len)); - bool y_between_ay_by = y0 <= y && y < y1; // is y in range [ay, by) when ay < by - bool y_between_by_ay = y1 <= y && y < y0; // is y in range [by, ay) when by < ay - bool y_in_bounds = y_between_ay_by || y_between_by_ay; // is y in range [by, ay] - T run = x1 - x0; - T rise = y1 - y0; - - // The endpoint of the line segment is the same, and the segment degenerates to a point. - // This can happen in polygon vertices when the first and last vertex of the ring are - // the same. In this scenario, do not attempt ray casting on a degenerate point. - T constexpr zero = 0.0; - if (float_equal(run, zero) && float_equal(rise, zero)) continue; - - T rise_to_point = y - y0; - T run_to_point = x - x0; - - // If points are on the polygon edge, they are not contained in the polygon. - if (float_equal(run * rise_to_point, run_to_point * rise)) { return false; } - - if (y_in_bounds && x < (run / rise) * rise_to_point + x0) { in_polygon = not in_polygon; } - } - } - - return in_polygon; -} - -} // namespace detail -} // namespace cuspatial diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2d48d8f39..10e4d269b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -29,6 +29,10 @@ function(ConfigureTest CMAKE_TEST_NAME) ${CMAKE_TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON ) target_link_libraries(${CMAKE_TEST_NAME} GTest::gtest_main GTest::gmock_main cudf::cudftestutil cuspatial) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) @@ -53,15 +57,12 @@ ConfigureTest(HAVERSINE_TEST ConfigureTest(HAUSDORFF_TEST spatial/hausdorff_test.cpp) -ConfigureTest(JOIN_POINT_IN_POLYGON_SMALL_TEST - join/point_in_polygon_test_small.cpp) - -ConfigureTest(JOIN_POINT_IN_POLYGON_LARGE_TEST - join/point_in_polygon_test_large.cpp) - ConfigureTest(JOIN_POINT_TO_LINESTRING_SMALL_TEST join/point_to_nearest_linestring_test_small.cpp) +ConfigureTest(JOIN_POINT_IN_POLYGON_TEST + join/quadtree_point_in_polygon_test.cpp) + ConfigureTest(POINT_IN_POLYGON_TEST spatial/point_in_polygon_test.cpp) @@ -185,3 +186,9 @@ ConfigureTest(FIND_TEST_EXP experimental/find/find_and_combine_segments_test.cu experimental/find/find_points_on_segments_test.cu experimental/find/find_duplicate_points_test.cu) + +ConfigureTest(JOIN_POINT_IN_POLYGON_SMALL_TEST_EXP + experimental/join/quadtree_point_in_polygon_test_small.cu) + +ConfigureTest(JOIN_POINT_IN_POLYGON_LARGE_TEST_EXP + experimental/join/quadtree_point_in_polygon_test_large.cu) diff --git a/cpp/tests/experimental/indexing/point_quadtree_test.cu b/cpp/tests/experimental/indexing/point_quadtree_test.cu index 636f22c09..87c543e84 100644 --- a/cpp/tests/experimental/indexing/point_quadtree_test.cu +++ b/cpp/tests/experimental/indexing/point_quadtree_test.cu @@ -42,7 +42,7 @@ struct QuadtreeOnPointIndexingTest : public ::testing::Test { auto& key_d = tree.key; auto& level_d = tree.level; - auto& is_internal_node_d = tree.is_internal_node; + auto& is_internal_node_d = tree.internal_node_flag; auto& length_d = tree.length; auto& offset_d = tree.offset; diff --git a/cpp/tests/experimental/join/quadtree_point_in_polygon_test_large.cu b/cpp/tests/experimental/join/quadtree_point_in_polygon_test_large.cu new file mode 100644 index 000000000..6825f3c06 --- /dev/null +++ b/cpp/tests/experimental/join/quadtree_point_in_polygon_test_large.cu @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +/* + * The test uses the same quadtree structure as in pip_refine_test_small. However, the number of + * randomly generated points under all quadrants (min_size) are increased to be more than the + * number of threads per-block. + */ + +template +struct PIPRefineTestLarge : public cuspatial::test::BaseFixture { +}; + +using TestTypes = ::testing::Types; + +TYPED_TEST_CASE(PIPRefineTestLarge, TestTypes); + +template +inline auto generate_points( + std::vector> const& quads, + uint32_t points_per_quad, + std::size_t seed, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto engine = cuspatial::test::deterministic_engine(0); + auto uniform = cuspatial::test::make_normal_dist(0.0, 1.0); + auto pgen = cuspatial::test::point_generator(T{0.0}, T{1.0}, engine, uniform); + auto num_points = quads.size() * points_per_quad; + rmm::device_uvector> points(num_points, stream, mr); + + auto counting_iter = thrust::make_counting_iterator(seed); + thrust::transform( + rmm::exec_policy(stream), counting_iter, counting_iter + num_points, points.begin(), pgen); + + return points; +} + +TYPED_TEST(PIPRefineTestLarge, TestLarge) +{ + using T = TypeParam; + using cuspatial::vec_2d; + using cuspatial::test::make_device_vector; + + vec_2d v_min{0.0, 0.0}; + vec_2d v_max{8.0, 8.0}; + T const scale{1.0}; + uint8_t const max_depth{3}; + uint32_t const min_size{400}; + + std::vector> quads{{0, 2, 0, 2}, + {3, 4, 0, 1}, + {2, 3, 1, 2}, + {4, 6, 0, 2}, + {3, 4, 2, 3}, + {2, 3, 3, 4}, + {6, 7, 2, 3}, + {7, 8, 3, 4}, + {0, 4, 4, 8}}; + + auto points_in = generate_points(quads, min_size, 0, this->stream()); + + auto [point_indices, quadtree] = quadtree_on_points( + points_in.begin(), points_in.end(), v_min, v_max, scale, max_depth, min_size, this->stream()); + + auto points = rmm::device_uvector>(quads.size() * min_size, this->stream()); + thrust::gather(rmm::exec_policy(this->stream()), + point_indices.begin(), + point_indices.end(), + points_in.begin(), + points.begin()); + + auto multipoly_array = cuspatial::test::make_multipolygon_array({0, 1, 2, 3, 4}, + {0, 1, 2, 3, 4}, + {0, 4, 10, 14, 19}, + {// ring 1 + {2.488450, 5.856625}, + {1.333584, 5.008840}, + {3.460720, 4.586599}, + {2.488450, 5.856625}, + // ring 2 + {5.039823, 4.229242}, + {5.561707, 1.825073}, + {7.103516, 1.503906}, + {7.190674, 4.025879}, + {5.998939, 5.653384}, + {5.039823, 4.229242}, + // ring 3 + {5.998939, 1.235638}, + {5.573720, 0.197808}, + {6.703534, 0.086693}, + {5.998939, 1.235638}, + // ring 4 + {2.088115, 4.541529}, + {1.034892, 3.530299}, + {2.415080, 2.896937}, + {3.208660, 3.745936}, + {2.088115, 4.541529}}); + auto multipolygons = multipoly_array.range(); + + auto bboxes = + rmm::device_uvector>(multipolygons.num_polygons(), this->stream()); + + cuspatial::polygon_bounding_boxes(multipolygons.part_offset_begin(), + multipolygons.part_offset_end(), + multipolygons.ring_offset_begin(), + multipolygons.ring_offset_end(), + multipolygons.point_begin(), + multipolygons.point_end(), + bboxes.begin(), + T{0}, + this->stream()); + + auto [poly_indices, quad_indices] = cuspatial::join_quadtree_and_bounding_boxes( + quadtree, bboxes.begin(), bboxes.end(), v_min, scale, max_depth, this->stream()); + + auto [actual_poly_indices, actual_point_indices] = + cuspatial::quadtree_point_in_polygon(poly_indices.begin(), + poly_indices.end(), + quad_indices.begin(), + quadtree, + point_indices.begin(), + point_indices.end(), + points.begin(), + multipolygons, + this->stream()); + + thrust::stable_sort_by_key(rmm::exec_policy(this->stream()), + actual_point_indices.begin(), + actual_point_indices.end(), + actual_poly_indices.begin()); + + { // verify + rmm::device_uvector hits(points.size(), this->stream()); + auto hits_end = cuspatial::point_in_polygon(points.begin(), + points.end(), + multipolygons.part_offset_begin(), + multipolygons.part_offset_end(), + multipolygons.ring_offset_begin(), + multipolygons.ring_offset_end(), + multipolygons.point_begin(), + multipolygons.point_end(), + hits.begin(), + this->stream()); + + auto hits_host = cuspatial::test::to_host(hits); + + std::vector expected_poly_indices; + std::vector expected_point_indices; + + for (std::size_t point_index = 0; point_index < hits_host.size(); point_index++) { + // iterate over set bits + std::uint32_t bits = hits_host[point_index]; + while (bits != 0) { + std::uint32_t t = bits & -bits; // get only LSB + std::uint32_t poly_index = __builtin_ctz(bits); // get index of LSB + expected_poly_indices.push_back(poly_index); + expected_point_indices.push_back(point_index); + bits ^= t; // reset LSB to zero to advance to next set bit + } + } + + // TODO: shouldn't have to copy to device here, but I get Thrust compilation errors if I use + // host vectors and a host stable_sort_by_key. + auto d_expected_poly_indices = rmm::device_vector(expected_poly_indices); + auto d_expected_point_indices = rmm::device_vector(expected_point_indices); + + thrust::stable_sort_by_key(rmm::exec_policy(this->stream()), + d_expected_point_indices.begin(), + d_expected_point_indices.end(), + d_expected_poly_indices.begin()); + + CUSPATIAL_EXPECT_VECTORS_EQUIVALENT(d_expected_poly_indices, actual_poly_indices); + CUSPATIAL_EXPECT_VECTORS_EQUIVALENT(d_expected_point_indices, actual_point_indices); + } +} diff --git a/cpp/tests/experimental/join/quadtree_point_in_polygon_test_small.cu b/cpp/tests/experimental/join/quadtree_point_in_polygon_test_small.cu new file mode 100644 index 000000000..a4dfc1607 --- /dev/null +++ b/cpp/tests/experimental/join/quadtree_point_in_polygon_test_small.cu @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +/* + * A small test that it is suitable for manually visualizing point-polygon pairing results in a GIS + * environment. GPU results are compared with expected values embedded in code. However, the number + * of points in each quadrant is less than 32, the two kernels for point-in-polygon test are not + * fully tested. This is left for pip_refine_test_large. + */ +template +struct PIPRefineTestSmall : public cuspatial::test::BaseFixture { +}; + +using TestTypes = ::testing::Types; + +TYPED_TEST_CASE(PIPRefineTestSmall, TestTypes); + +TYPED_TEST(PIPRefineTestSmall, TestSmall) +{ + using T = TypeParam; + using cuspatial::vec_2d; + using cuspatial::test::make_device_vector; + + vec_2d v_min{0.0, 0.0}; + vec_2d v_max{8.0, 8.0}; + T const scale{1.0}; + uint8_t const max_depth{3}; + uint32_t const max_size{12}; + + auto points = make_device_vector>( + {{1.9804558865545805, 1.3472225743317712}, {0.1895259128530169, 0.5431061133894604}, + {1.2591725716781235, 0.1448705855995005}, {0.8178039499335275, 0.8138440641113271}, + {0.48171647380517046, 1.9022922214961997}, {1.3890664414691907, 1.5177694304735412}, + {0.2536015260915061, 1.8762161698642947}, {3.1907684812039956, 0.2621847215928189}, + {3.028362149164369, 0.027638405909631958}, {3.918090468102582, 0.3338651960183463}, + {3.710910700915217, 0.9937713340192049}, {3.0706987088385853, 0.9376313558467103}, + {3.572744183805594, 0.33184908855075124}, {3.7080407833612004, 0.09804238103130436}, + {3.70669993057843, 0.7485845679979923}, {3.3588457228653024, 0.2346381514128677}, + {2.0697434332621234, 1.1809465376402173}, {2.5322042870739683, 1.419555755682142}, + {2.175448214220591, 1.2372448404986038}, {2.113652420701984, 1.2774712415624014}, + {2.520755151373394, 1.902015274420646}, {2.9909779614491687, 1.2420487904041893}, + {2.4613232527836137, 1.0484414482621331}, {4.975578758530645, 0.9606291981013242}, + {4.07037627210835, 1.9486902798139454}, {4.300706849071861, 0.021365525588281198}, + {4.5584381091040616, 1.8996548860019926}, {4.822583857757069, 0.3234041700489503}, + {4.849847745942472, 1.9531893897409585}, {4.75489831780737, 0.7800065259479418}, + {4.529792124514895, 1.942673409259531}, {4.732546857961497, 0.5659923375279095}, + {3.7622247877537456, 2.8709552313924487}, {3.2648444465931474, 2.693039435509084}, + {3.01954722322135, 2.57810040095543}, {3.7164018490892348, 2.4612194182614333}, + {3.7002781846945347, 2.3345952955903906}, {2.493975723955388, 3.3999020934055837}, + {2.1807636574967466, 3.2296461832828114}, {2.566986568683904, 3.6607732238530897}, + {2.2006520196663066, 3.7672478678985257}, {2.5104987015171574, 3.0668114607133137}, + {2.8222482218882474, 3.8159308233351266}, {2.241538022180476, 3.8812819070357545}, + {2.3007438625108882, 3.6045900851589048}, {6.0821276168848994, 2.5470532680258002}, + {6.291790729917634, 2.983311357415729}, {6.109985464455084, 2.2235950639628523}, + {6.101327777646798, 2.5239201807166616}, {6.325158445513714, 2.8765450351723674}, + {6.6793884701899, 2.5605928243991434}, {6.4274219368674315, 2.9754616970668213}, + {6.444584786789386, 2.174562817047202}, {7.897735998643542, 3.380784914178574}, + {7.079453687660189, 3.063690547962938}, {7.430677191305505, 3.380489849365283}, + {7.5085184104988, 3.623862886287816}, {7.886010001346151, 3.538128217886674}, + {7.250745898479374, 3.4154469467473447}, {7.769497359206111, 3.253257011908445}, + {1.8703303641352362, 4.209727933188015}, {1.7015273093278767, 7.478882372510933}, + {2.7456295127617385, 7.474216636277054}, {2.2065031771469, 6.896038613284851}, + {3.86008672302403, 7.513564222799629}, {1.9143371250907073, 6.885401350515916}, + {3.7176098065039747, 6.194330707468438}, {0.059011873032214, 5.823535317960799}, + {3.1162712022943757, 6.789029097334483}, {2.4264509160270813, 5.188939408363776}, + {3.154282922203257, 5.788316610960881}}); + + // build a quadtree on the points + auto [point_indices, quadtree] = cuspatial::quadtree_on_points( + points.begin(), points.end(), v_min, v_max, scale, max_depth, max_size, this->stream()); + + auto multipoly_array = cuspatial::test::make_multipolygon_array({0, 1, 2, 3, 4}, + {0, 1, 2, 3, 4}, + {0, 4, 10, 14, 19}, + {// ring 1 + {2.488450, 5.856625}, + {1.333584, 5.008840}, + {3.460720, 4.586599}, + {2.488450, 5.856625}, + // ring 2 + {5.039823, 4.229242}, + {5.561707, 1.825073}, + {7.103516, 1.503906}, + {7.190674, 4.025879}, + {5.998939, 5.653384}, + {5.039823, 4.229242}, + // ring 3 + {5.998939, 1.235638}, + {5.573720, 0.197808}, + {6.703534, 0.086693}, + {5.998939, 1.235638}, + // ring 4 + {2.088115, 4.541529}, + {1.034892, 3.530299}, + {2.415080, 2.896937}, + {3.208660, 3.745936}, + {2.088115, 4.541529}}); + auto multipolygons = multipoly_array.range(); + + auto bboxes = + rmm::device_uvector>(multipolygons.num_polygons(), this->stream()); + + cuspatial::polygon_bounding_boxes(multipolygons.part_offset_begin(), + multipolygons.part_offset_end(), + multipolygons.ring_offset_begin(), + multipolygons.ring_offset_end(), + multipolygons.point_begin(), + multipolygons.point_end(), + bboxes.begin(), + T{0}, + this->stream()); + + auto [poly_indices, quad_indices] = cuspatial::join_quadtree_and_bounding_boxes( + quadtree, bboxes.begin(), bboxes.end(), v_min, scale, max_depth, this->stream(), this->mr()); + + auto [poly_offset, point_offset] = cuspatial::quadtree_point_in_polygon(poly_indices.begin(), + poly_indices.end(), + quad_indices.begin(), + quadtree, + point_indices.begin(), + point_indices.end(), + points.begin(), + multipolygons, + this->stream()); + + auto expected_poly_offset = + make_device_vector({3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3}); + auto expected_point_offset = make_device_vector( + {28, 29, 30, 31, 32, 33, 34, 35, 45, 46, 47, 48, 49, 50, 51, 52, 54, 62, 60}); + + CUSPATIAL_EXPECT_VECTORS_EQUIVALENT(poly_offset, expected_poly_offset); + CUSPATIAL_EXPECT_VECTORS_EQUIVALENT(point_offset, expected_point_offset); +} diff --git a/cpp/tests/experimental/spatial/hausdorff_test.cu b/cpp/tests/experimental/spatial/hausdorff_test.cu index c7b10ea7b..96a80595b 100644 --- a/cpp/tests/experimental/spatial/hausdorff_test.cu +++ b/cpp/tests/experimental/spatial/hausdorff_test.cu @@ -167,6 +167,6 @@ void generic_hausdorff_test() TYPED_TEST(HausdorffTest, 500Spaces100Points) { generic_hausdorff_test(); } -TYPED_TEST(HausdorffTest, 10000Spaces10Points) { generic_hausdorff_test(); } +TYPED_TEST(HausdorffTest, 1000Spaces10Points) { generic_hausdorff_test(); } TYPED_TEST(HausdorffTest, 10Spaces10000Points) { generic_hausdorff_test(); } diff --git a/cpp/tests/experimental/spatial/point_bounding_boxes_test.cu b/cpp/tests/experimental/spatial/point_bounding_boxes_test.cu index a1b0a0377..c9d726e3f 100644 --- a/cpp/tests/experimental/spatial/point_bounding_boxes_test.cu +++ b/cpp/tests/experimental/spatial/point_bounding_boxes_test.cu @@ -60,13 +60,13 @@ struct PointBoundingBoxesTest : public ::testing::Test { using TestTypes = ::testing::Types; TYPED_TEST_CASE(PointBoundingBoxesTest, TestTypes); -TYPED_TEST(PointBoundingBoxesTest, OneMillionSmallTrajectories) { this->run_test(1'000'000, 50); } +TYPED_TEST(PointBoundingBoxesTest, TenThousandSmallTrajectories) { this->run_test(10'000, 50); } -TYPED_TEST(PointBoundingBoxesTest, OneHundredLargeTrajectories) { this->run_test(100, 1'000'000); } +TYPED_TEST(PointBoundingBoxesTest, OneHundredLargeTrajectories) { this->run_test(100, 10'000); } -TYPED_TEST(PointBoundingBoxesTest, OneVeryLargeTrajectory) { this->run_test(1, 100'000'000); } +TYPED_TEST(PointBoundingBoxesTest, OneVeryLargeTrajectory) { this->run_test(1, 1'000'000); } TYPED_TEST(PointBoundingBoxesTest, TrajectoriesWithExpansion) { - this->run_test(1'000'000, 50, TypeParam{0.5}); + this->run_test(10'000, 50, TypeParam{0.5}); } diff --git a/cpp/tests/experimental/spatial/point_distance_test.cu b/cpp/tests/experimental/spatial/point_distance_test.cu index a6d873a27..517a4e605 100644 --- a/cpp/tests/experimental/spatial/point_distance_test.cu +++ b/cpp/tests/experimental/spatial/point_distance_test.cu @@ -45,7 +45,8 @@ #include -namespace cuspatial { +using cuspatial::multipoint_range; +using cuspatial::vec_2d; /** * @brief Generate `num_points` points on device @@ -57,9 +58,9 @@ struct PairwisePointDistanceTest : public ::testing::Test { std::size_t seed, rmm::cuda_stream_view stream = rmm::cuda_stream_default) { - auto engine = deterministic_engine(0); - auto uniform = make_normal_dist(0.0, 1.0); - auto pgen = point_generator(T{0.0}, T{1.0}, engine, uniform); + auto engine = cuspatial::test::deterministic_engine(0); + auto uniform = cuspatial::test::make_normal_dist(0.0, 1.0); + auto pgen = cuspatial::test::point_generator(T{0.0}, T{1.0}, engine, uniform); rmm::device_vector> points(num_points); auto counting_iter = thrust::make_counting_iterator(seed); thrust::transform( @@ -174,8 +175,9 @@ TYPED_TEST(PairwisePointDistanceTest, Empty) rmm::device_vector expected{}; rmm::device_vector got(points1.size()); - auto multipoint_1 = multipoint_range{ - multipoint_geom1.begin(), multipoint_geom1.end(), points1.begin(), points1.end()}; + auto multipoint_1 = + multipoint_range{ + multipoint_geom1.begin(), multipoint_geom1.end(), points1.begin(), points1.end()}; auto multipoint_2 = multipoint_range{ multipoint_geom2.begin(), multipoint_geom2.end(), points2.begin(), points2.end()}; @@ -200,7 +202,7 @@ TYPED_TEST(PairwisePointDistanceTest, OnePairSingleComponent) rmm::device_vector expected{std::vector{std::sqrt(T{2.0})}}; rmm::device_vector got(points1.size()); - auto multipoint_1 = multipoint_range{ + auto multipoint_1 = multipoint_range, decltype(points1.begin())>{ multipoint_geom1, multipoint_geom1 + num_pairs + 1, points1.begin(), points1.end()}; auto multipoint_2 = multipoint_range{ multipoint_geom2, multipoint_geom2 + num_pairs + 1, points2.begin(), points2.end()}; @@ -385,8 +387,8 @@ TYPED_TEST(PairwisePointDistanceTest, SingleComponentCompareWithShapely) rmm::device_vector dx1(x1), dy1(y1), dx2(x2), dy2(y2); rmm::device_vector got(dx1.size()); - auto p1_begin = make_vec_2d_iterator(dx1.begin(), dy1.begin()); - auto p2_begin = make_vec_2d_iterator(dx2.begin(), dy2.begin()); + auto p1_begin = cuspatial::make_vec_2d_iterator(dx1.begin(), dy1.begin()); + auto p2_begin = cuspatial::make_vec_2d_iterator(dx2.begin(), dy2.begin()); auto multipoints_1 = make_multipoint_range(dx1.size(), p1_geom, dx1.size(), p1_begin); auto multipoints_2 = make_multipoint_range(dx2.size(), p2_geom, dx2.size(), p2_begin); @@ -449,5 +451,3 @@ TYPED_TEST(PairwisePointDistanceTest, MultiComponentRandom) CUSPATIAL_EXPECT_VECTORS_EQUIVALENT(expected, got); EXPECT_EQ(expected.size(), std::distance(got.begin(), ret_it)); } - -} // namespace cuspatial diff --git a/cpp/tests/experimental/trajectory/derive_trajectories_test.cu b/cpp/tests/experimental/trajectory/derive_trajectories_test.cu index ca4384703..20ffd32c9 100644 --- a/cpp/tests/experimental/trajectory/derive_trajectories_test.cu +++ b/cpp/tests/experimental/trajectory/derive_trajectories_test.cu @@ -43,9 +43,9 @@ struct DeriveTrajectoriesTest : public ::testing::Test { using TestTypes = ::testing::Types; TYPED_TEST_CASE(DeriveTrajectoriesTest, TestTypes); -TYPED_TEST(DeriveTrajectoriesTest, OneMillionSmallTrajectories) +TYPED_TEST(DeriveTrajectoriesTest, TenThousandSmallTrajectories) { - auto data = cuspatial::test::trajectory_test_data(1'000'000, 50); + auto data = cuspatial::test::trajectory_test_data(10'000, 50); auto traj_ids = rmm::device_vector(data.ids.size()); auto traj_points = rmm::device_vector>(data.points.size()); @@ -66,7 +66,7 @@ TYPED_TEST(DeriveTrajectoriesTest, OneMillionSmallTrajectories) TYPED_TEST(DeriveTrajectoriesTest, OneHundredLargeTrajectories) { - auto data = cuspatial::test::trajectory_test_data(100, 1'000'000); + auto data = cuspatial::test::trajectory_test_data(100, 10'000); auto traj_ids = rmm::device_vector(data.ids.size()); auto traj_points = rmm::device_vector>(data.points.size()); @@ -87,7 +87,7 @@ TYPED_TEST(DeriveTrajectoriesTest, OneHundredLargeTrajectories) TYPED_TEST(DeriveTrajectoriesTest, OneVeryLargeTrajectory) { - auto data = cuspatial::test::trajectory_test_data(1, 100'000'000); + auto data = cuspatial::test::trajectory_test_data(1, 1'000'000); auto traj_ids = rmm::device_vector(data.ids.size()); auto traj_points = rmm::device_vector>(data.points.size()); diff --git a/cpp/tests/experimental/trajectory/trajectory_distances_and_speeds_test.cu b/cpp/tests/experimental/trajectory/trajectory_distances_and_speeds_test.cu index bebef9a6f..aa91751a6 100644 --- a/cpp/tests/experimental/trajectory/trajectory_distances_and_speeds_test.cu +++ b/cpp/tests/experimental/trajectory/trajectory_distances_and_speeds_test.cu @@ -88,19 +88,19 @@ struct TrajectoryDistancesAndSpeedsTest : public ::testing::Test { using TestTypes = ::testing::Types; TYPED_TEST_CASE(TrajectoryDistancesAndSpeedsTest, TestTypes); -TYPED_TEST(TrajectoryDistancesAndSpeedsTest, OneMillionSmallTrajectories) +TYPED_TEST(TrajectoryDistancesAndSpeedsTest, TenThousandSmallTrajectories) { - CUSPATIAL_RUN_TEST(this->run_test, 1'000'000, 50); + CUSPATIAL_RUN_TEST(this->run_test, 10'000, 50); } TYPED_TEST(TrajectoryDistancesAndSpeedsTest, OneHundredLargeTrajectories) { - CUSPATIAL_RUN_TEST(this->run_test, 100, 1'000'000); + CUSPATIAL_RUN_TEST(this->run_test, 100, 10'000); } TYPED_TEST(TrajectoryDistancesAndSpeedsTest, OneVeryLargeTrajectory) { - CUSPATIAL_RUN_TEST(this->run_test, 1, 100'000'000); + CUSPATIAL_RUN_TEST(this->run_test, 1, 1'000'000); } struct time_point_generator { diff --git a/cpp/tests/join/point_in_polygon_test_large.cpp b/cpp/tests/join/point_in_polygon_test_large.cpp deleted file mode 100644 index 9afc54506..000000000 --- a/cpp/tests/join/point_in_polygon_test_large.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -/* - * The test uses the same quadtree structure as in pip_refine_test_small. However, the number of - * randomly generated points under all quadrants (min_size) are increased to be more than the - * number of threads per-block. - */ - -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; - -template -struct PIPRefineTestLarge : public cudf::test::BaseFixture { -}; - -TYPED_TEST_CASE(PIPRefineTestLarge, cudf::test::FloatingPointTypes); - -template -inline auto generate_points(std::vector> const& quads, uint32_t points_per_quad) -{ - std::vector point_x(quads.size() * points_per_quad); - std::vector point_y(quads.size() * points_per_quad); - for (uint32_t i = 0, pos = 0; i < quads.size(); i++, pos += points_per_quad) { - cudf::test::UniformRandomGenerator dist_x{quads[i][0], quads[i][1]}; - cudf::test::UniformRandomGenerator dist_y{quads[i][2], quads[i][3]}; - std::generate(point_x.begin() + pos, point_x.begin() + pos + points_per_quad, [&]() mutable { - return dist_x.generate(); - }); - std::generate(point_y.begin() + pos, point_y.begin() + pos + points_per_quad, [&]() mutable { - return dist_y.generate(); - }); - } - return std::make_pair(std::move(point_x), std::move(point_y)); -} - -TYPED_TEST(PIPRefineTestLarge, TestLarge) -{ - using T = TypeParam; - using namespace cudf::test; - - double const x_min{0.0}; - double const x_max{8.0}; - double const y_min{0.0}; - double const y_max{8.0}; - double const scale{1.0}; - uint32_t const max_depth{3}; - uint32_t const min_size{400}; - - std::vector> quads{{0, 2, 0, 2}, - {3, 4, 0, 1}, - {2, 3, 1, 2}, - {4, 6, 0, 2}, - {3, 4, 2, 3}, - {2, 3, 3, 4}, - {6, 7, 2, 3}, - {7, 8, 3, 4}, - {0, 4, 4, 8}}; - - auto host_points = generate_points(quads, min_size); - auto& h_x = std::get<0>(host_points); - auto& h_y = std::get<1>(host_points); - auto x = fixed_width_column_wrapper(h_x.begin(), h_x.end()); - auto y = fixed_width_column_wrapper(h_y.begin(), h_y.end()); - - auto quadtree_pair = cuspatial::quadtree_on_points( - x, y, x_min, x_max, y_min, y_max, scale, max_depth, min_size, this->mr()); - - auto& quadtree = std::get<1>(quadtree_pair); - auto& point_indices = std::get<0>(quadtree_pair); - auto points = cudf::gather( - cudf::table_view{{x, y}}, *point_indices, cudf::out_of_bounds_policy::DONT_CHECK, this->mr()); - - auto poly_offsets = fixed_width_column_wrapper({0, 1, 2, 3, 4}); - auto ring_offsets = fixed_width_column_wrapper({0, 4, 10, 14, 19}); - auto poly_x = fixed_width_column_wrapper({// ring 1 - 2.488450, - 1.333584, - 3.460720, - 2.488450, - // ring 2 - 5.039823, - 5.561707, - 7.103516, - 7.190674, - 5.998939, - 5.039823, - // ring 3 - 5.998939, - 5.573720, - 6.703534, - 5.998939, - // ring 4 - 2.088115, - 1.034892, - 2.415080, - 3.208660, - 2.088115}); - auto poly_y = fixed_width_column_wrapper({// ring 1 - 5.856625, - 5.008840, - 4.586599, - 5.856625, - // ring 2 - 4.229242, - 1.825073, - 1.503906, - 4.025879, - 5.653384, - 4.229242, - // ring 3 - 1.235638, - 0.197808, - 0.086693, - 1.235638, - // ring 4 - 4.541529, - 3.530299, - 2.896937, - 3.745936, - 4.541529}); - - auto polygon_bboxes = - cuspatial::polygon_bounding_boxes(poly_offsets, ring_offsets, poly_x, poly_y, 0.0, this->mr()); - - auto polygon_quadrant_pairs = cuspatial::join_quadtree_and_bounding_boxes( - *quadtree, *polygon_bboxes, x_min, x_max, y_min, y_max, scale, max_depth, this->mr()); - - auto point_in_polygon_pairs = cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, - *quadtree, - *point_indices, - x, - y, - poly_offsets, - ring_offsets, - poly_x, - poly_y, - this->mr()); - - auto poly_idx = point_in_polygon_pairs->get_column(0).view(); - auto point_idx = point_in_polygon_pairs->get_column(1).view(); - - auto actual_poly_indices = cudf::test::to_host(poly_idx).first; - auto actual_point_indices = cudf::test::to_host(point_idx).first; - - thrust::stable_sort_by_key( - actual_point_indices.begin(), actual_point_indices.end(), actual_poly_indices.begin()); - - { - // verify - - auto hits = cuspatial::point_in_polygon( - points->get_column(0), points->get_column(1), poly_offsets, ring_offsets, poly_x, poly_y); - - auto hits_host = cudf::test::to_host(hits->view()).first; - - std::vector expected_poly_indices; - std::vector expected_point_indices; - - for (int point_index = 0; point_index < hits->size(); point_index++) { - // iterate over set bits - std::uint32_t bits = hits_host[point_index]; - while (bits != 0) { - std::uint32_t t = bits & -bits; // get only LSB - std::uint32_t poly_index = __builtin_ctz(bits); // get index of LSB - expected_poly_indices.push_back(poly_index); - expected_point_indices.push_back(point_index); - bits ^= t; // reset LSB to zero to advance to next set bit - } - } - - thrust::stable_sort_by_key( - expected_point_indices.begin(), expected_point_indices.end(), expected_poly_indices.begin()); - - auto poly_a = fixed_width_column_wrapper(expected_poly_indices.begin(), - expected_poly_indices.end()); - auto poly_b = - fixed_width_column_wrapper(actual_poly_indices.begin(), actual_poly_indices.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(poly_a, poly_b, verbosity); - - auto point_a = fixed_width_column_wrapper(expected_point_indices.begin(), - expected_point_indices.end()); - auto point_b = fixed_width_column_wrapper(actual_point_indices.begin(), - actual_point_indices.end()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(point_a, point_b, verbosity); - } -} diff --git a/cpp/tests/join/point_in_polygon_test_small.cpp b/cpp/tests/join/point_in_polygon_test_small.cpp deleted file mode 100644 index e3eb5191a..000000000 --- a/cpp/tests/join/point_in_polygon_test_small.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * A small test that it is suitable for manually visualizing point-polygon pairing results in a GIS - * environment GPU results are compared with expected values embedded in code However, the number of - * points in each quadrant is less than 32, the two kernels for point-in-polygon test are not fully - * tested. This is left for pip_refine_test_large. - */ -template -struct PIPRefineTestSmall : public cudf::test::BaseFixture { -}; - -TYPED_TEST_CASE(PIPRefineTestSmall, cudf::test::FloatingPointTypes); - -TYPED_TEST(PIPRefineTestSmall, TestSmall) -{ - using T = TypeParam; - using namespace cudf::test; - - double const x_min{0.0}; - double const x_max{8.0}; - double const y_min{0.0}; - double const y_max{8.0}; - double const scale{1.0}; - uint32_t const max_depth{3}; - uint32_t const min_size{12}; - - fixed_width_column_wrapper x( - {1.9804558865545805, 0.1895259128530169, 1.2591725716781235, 0.8178039499335275, - 0.48171647380517046, 1.3890664414691907, 0.2536015260915061, 3.1907684812039956, - 3.028362149164369, 3.918090468102582, 3.710910700915217, 3.0706987088385853, - 3.572744183805594, 3.7080407833612004, 3.70669993057843, 3.3588457228653024, - 2.0697434332621234, 2.5322042870739683, 2.175448214220591, 2.113652420701984, - 2.520755151373394, 2.9909779614491687, 2.4613232527836137, 4.975578758530645, - 4.07037627210835, 4.300706849071861, 4.5584381091040616, 4.822583857757069, - 4.849847745942472, 4.75489831780737, 4.529792124514895, 4.732546857961497, - 3.7622247877537456, 3.2648444465931474, 3.01954722322135, 3.7164018490892348, - 3.7002781846945347, 2.493975723955388, 2.1807636574967466, 2.566986568683904, - 2.2006520196663066, 2.5104987015171574, 2.8222482218882474, 2.241538022180476, - 2.3007438625108882, 6.0821276168848994, 6.291790729917634, 6.109985464455084, - 6.101327777646798, 6.325158445513714, 6.6793884701899, 6.4274219368674315, - 6.444584786789386, 7.897735998643542, 7.079453687660189, 7.430677191305505, - 7.5085184104988, 7.886010001346151, 7.250745898479374, 7.769497359206111, - 1.8703303641352362, 1.7015273093278767, 2.7456295127617385, 2.2065031771469, - 3.86008672302403, 1.9143371250907073, 3.7176098065039747, 0.059011873032214, - 3.1162712022943757, 2.4264509160270813, 3.154282922203257}); - - fixed_width_column_wrapper y( - {1.3472225743317712, 0.5431061133894604, 0.1448705855995005, 0.8138440641113271, - 1.9022922214961997, 1.5177694304735412, 1.8762161698642947, 0.2621847215928189, - 0.027638405909631958, 0.3338651960183463, 0.9937713340192049, 0.9376313558467103, - 0.33184908855075124, 0.09804238103130436, 0.7485845679979923, 0.2346381514128677, - 1.1809465376402173, 1.419555755682142, 1.2372448404986038, 1.2774712415624014, - 1.902015274420646, 1.2420487904041893, 1.0484414482621331, 0.9606291981013242, - 1.9486902798139454, 0.021365525588281198, 1.8996548860019926, 0.3234041700489503, - 1.9531893897409585, 0.7800065259479418, 1.942673409259531, 0.5659923375279095, - 2.8709552313924487, 2.693039435509084, 2.57810040095543, 2.4612194182614333, - 2.3345952955903906, 3.3999020934055837, 3.2296461832828114, 3.6607732238530897, - 3.7672478678985257, 3.0668114607133137, 3.8159308233351266, 3.8812819070357545, - 3.6045900851589048, 2.5470532680258002, 2.983311357415729, 2.2235950639628523, - 2.5239201807166616, 2.8765450351723674, 2.5605928243991434, 2.9754616970668213, - 2.174562817047202, 3.380784914178574, 3.063690547962938, 3.380489849365283, - 3.623862886287816, 3.538128217886674, 3.4154469467473447, 3.253257011908445, - 4.209727933188015, 7.478882372510933, 7.474216636277054, 6.896038613284851, - 7.513564222799629, 6.885401350515916, 6.194330707468438, 5.823535317960799, - 6.789029097334483, 5.188939408363776, 5.788316610960881}); - - auto quadtree_pair = cuspatial::quadtree_on_points( - x, y, x_min, x_max, y_min, y_max, scale, max_depth, min_size, this->mr()); - - auto& quadtree = std::get<1>(quadtree_pair); - auto& point_indices = std::get<0>(quadtree_pair); - - fixed_width_column_wrapper poly_offsets({0, 1, 2, 3, 4}); - fixed_width_column_wrapper ring_offsets({0, 4, 10, 14, 19}); - fixed_width_column_wrapper poly_x({// ring 1 - 2.488450, - 1.333584, - 3.460720, - 2.488450, - // ring 2 - 5.039823, - 5.561707, - 7.103516, - 7.190674, - 5.998939, - 5.039823, - // ring 3 - 5.998939, - 5.573720, - 6.703534, - 5.998939, - // ring 4 - 2.088115, - 1.034892, - 2.415080, - 3.208660, - 2.088115}); - fixed_width_column_wrapper poly_y({// ring 1 - 5.856625, - 5.008840, - 4.586599, - 5.856625, - // ring 2 - 4.229242, - 1.825073, - 1.503906, - 4.025879, - 5.653384, - 4.229242, - // ring 3 - 1.235638, - 0.197808, - 0.086693, - 1.235638, - // ring 4 - 4.541529, - 3.530299, - 2.896937, - 3.745936, - 4.541529}); - - auto polygon_bboxes = - cuspatial::polygon_bounding_boxes(poly_offsets, ring_offsets, poly_x, poly_y, 0.0, this->mr()); - - auto polygon_quadrant_pairs = cuspatial::join_quadtree_and_bounding_boxes( - *quadtree, *polygon_bboxes, x_min, x_max, y_min, y_max, scale, max_depth, this->mr()); - - auto point_in_polygon_pairs = cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, - *quadtree, - *point_indices, - x, - y, - poly_offsets, - ring_offsets, - poly_x, - poly_y, - this->mr()); - - CUSPATIAL_EXPECTS(point_in_polygon_pairs->num_columns() == 2, - "a polygon-quadrant pair table must have 2 columns"); - fixed_width_column_wrapper expected1( - {3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3}); - fixed_width_column_wrapper expected2( - {28, 29, 30, 31, 32, 33, 34, 35, 45, 46, 47, 48, 49, 50, 51, 52, 54, 62, 60}); - auto expected = cudf::table_view{{expected1, expected2}}; - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *point_in_polygon_pairs); -} diff --git a/cpp/tests/join/quadtree_point_in_polygon_test.cpp b/cpp/tests/join/quadtree_point_in_polygon_test.cpp new file mode 100644 index 000000000..93b32ffa7 --- /dev/null +++ b/cpp/tests/join/quadtree_point_in_polygon_test.cpp @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +using T = float; + +template +using wrapper = cudf::test::fixed_width_column_wrapper; + +struct QuadtreePointInPolygonErrorTest : public ::testing::Test { + auto prepare_test(cudf::column_view const& x, + cudf::column_view const& y, + cudf::column_view const& polygon_offsets, + cudf::column_view const& ring_offsets, + cudf::column_view const& polygon_x, + cudf::column_view const& polygon_y, + cuspatial::vec_2d v_min, + cuspatial::vec_2d v_max, + T scale, + uint32_t max_depth, + uint32_t min_size, + T expansion_radius) + { + using namespace cudf::test; + + auto quadtree_pair = cuspatial::quadtree_on_points( + x, y, v_min.x, v_max.x, v_min.y, v_max.y, scale, max_depth, min_size); + + auto& quadtree = std::get<1>(quadtree_pair); + auto& point_indices = std::get<0>(quadtree_pair); + + auto polygon_bboxes = cuspatial::polygon_bounding_boxes( + polygon_offsets, ring_offsets, polygon_x, polygon_y, expansion_radius); + + auto polygon_quadrant_pairs = cuspatial::join_quadtree_and_bounding_boxes( + *quadtree, *polygon_bboxes, v_min.x, v_max.x, v_min.y, v_max.y, scale, max_depth); + + return std::make_tuple(std::move(quadtree), + std::move(point_indices), + std::move(polygon_bboxes), + std::move(polygon_quadrant_pairs)); + } + + void SetUp() override + { + using namespace cudf::test; + + cuspatial::vec_2d v_min{0.0, 0.0}; + cuspatial::vec_2d v_max{8.0, 8.0}; + double const scale{1.0}; + uint32_t const max_depth{3}; + uint32_t const min_size{12}; + double const expansion_radius{0.0}; + + auto x_col = + wrapper({1.9804558865545805, 0.1895259128530169, 1.2591725716781235, 0.8178039499335275}); + auto y_col = + wrapper({1.3472225743317712, 0.5431061133894604, 0.1448705855995005, 0.8138440641113271}); + + auto polygon_offsets_col = wrapper({0, 4, 10}); + auto ring_offsets_col = wrapper({0, 4, 10}); + auto polygon_x_col = wrapper({// ring 1 + 2.488450, + 1.333584, + 3.460720, + 2.488450, + // ring 2 + 5.039823, + 5.561707, + 7.103516, + 7.190674, + 5.998939, + 5.039823}); + auto polygon_y_col = wrapper({// ring 1 + 2.488450, + 1.333584, + 3.460720, + 2.488450, + // ring 2 + 5.039823, + 5.561707, + 7.103516, + 7.190674, + 5.998939, + 5.039823}); + + std::tie(quadtree, point_indices, polygon_bboxes, polygon_quadrant_pairs) = + prepare_test(x_col, + y_col, + polygon_offsets_col, + ring_offsets_col, + polygon_x_col, + polygon_y_col, + v_min, + v_max, + scale, + max_depth, + min_size, + expansion_radius); + + x = x_col.release(); + y = y_col.release(); + polygon_offsets = polygon_offsets_col.release(); + ring_offsets = ring_offsets_col.release(); + polygon_x = polygon_x_col.release(); + polygon_y = polygon_y_col.release(); + } + + void TearDown() override {} + + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr polygon_offsets; + std::unique_ptr ring_offsets; + std::unique_ptr polygon_x; + std::unique_ptr polygon_y; + std::unique_ptr point_indices; + std::unique_ptr quadtree; + std::unique_ptr polygon_bboxes; + std::unique_ptr polygon_quadrant_pairs; +}; + +// test cudf::quadtree_point_in_polygon with empty inputs +TEST_F(QuadtreePointInPolygonErrorTest, test_empty) +{ + // empty point data + { + auto empty_point_indices = wrapper({}); + auto empty_x = wrapper({}); + auto empty_y = wrapper({}); + + auto results = cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + empty_point_indices, + empty_x, + empty_y, + *polygon_offsets, + *ring_offsets, + *polygon_x, + *polygon_y); + + auto expected_poly_offset = wrapper({}); + auto expected_point_offset = wrapper({}); + + auto expected = cudf::table_view{ + {cudf::column_view(expected_poly_offset), cudf::column_view(expected_point_offset)}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *results); + } + + // empty polygon data + { + auto empty_polygon_offsets = wrapper({}); + auto empty_ring_offsets = wrapper({}); + auto empty_polygon_x = wrapper({}); + auto empty_polygon_y = wrapper({}); + + auto results = cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + *point_indices, + *x, + *y, + empty_polygon_offsets, + empty_ring_offsets, + empty_polygon_x, + empty_polygon_y); + + auto expected_poly_offset = wrapper({}); + auto expected_point_offset = wrapper({}); + + auto expected = cudf::table_view{ + {cudf::column_view(expected_poly_offset), cudf::column_view(expected_point_offset)}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected, *results); + } +} + +TEST_F(QuadtreePointInPolygonErrorTest, type_mismatch) +{ + // x/y type mismatch + { + auto x_col = wrapper({1, 2, 3, 4}); + auto y_col = wrapper({1, 2, 3, 4}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + *point_indices, + x_col, + y_col, + *polygon_offsets, + *ring_offsets, + *polygon_x, + *polygon_y), + cuspatial::logic_error); + } + + // polygon_x/polygon_y type mismatch + { + auto polygon_x_col = wrapper({1, 2, 3, 4}); + auto polygon_y_col = wrapper({1, 2, 3, 4}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + *point_indices, + *x, + *y, + *polygon_offsets, + *ring_offsets, + polygon_x_col, + polygon_y_col), + cuspatial::logic_error); + } + + // x / polygon_x type mismatch + { + auto x_col = wrapper({1, 2, 3, 4}); + auto polygon_x_col = wrapper({1, 2, 3, 4}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + *point_indices, + x_col, + *y, + *polygon_offsets, + *ring_offsets, + polygon_x_col, + *polygon_y), + cuspatial::logic_error); + } +} + +TEST_F(QuadtreePointInPolygonErrorTest, offset_type_error) +{ + { + auto polygon_offsets_col = wrapper({0, 4, 10}); + auto ring_offsets_col = wrapper({0, 4, 10}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + *point_indices, + *x, + *y, + polygon_offsets_col, + ring_offsets_col, + *polygon_x, + *polygon_y), + cuspatial::logic_error); + } +} + +TEST_F(QuadtreePointInPolygonErrorTest, size_mismatch) +{ + { + auto polygon_offsets_col = wrapper({0, 4, 10}); + auto ring_offsets_col = wrapper({0, 4, 10}); + auto poly_x = wrapper({1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + auto poly_y = wrapper({1, 2, 3, 4, 5, 6}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + *point_indices, + *x, + *y, + polygon_offsets_col, + ring_offsets_col, + poly_x, + poly_y), + cuspatial::logic_error); + } + { + auto point_indices = wrapper({0, 1, 2, 3, 4}); + auto x = wrapper({1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + auto y = wrapper({1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + point_indices, + x, + y, + *polygon_offsets, + *ring_offsets, + *polygon_x, + *polygon_y), + cuspatial::logic_error); + } + { + auto point_indices = wrapper({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + auto x = wrapper({1, 2, 3, 4, 5}); + auto y = wrapper({1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + point_indices, + x, + y, + *polygon_offsets, + *ring_offsets, + *polygon_x, + *polygon_y), + cuspatial::logic_error); + } + { + auto point_indices = wrapper({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + auto x = wrapper({1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + auto y = wrapper({1, 2, 3, 4, 5}); + + EXPECT_THROW(cuspatial::quadtree_point_in_polygon(*polygon_quadrant_pairs, + *quadtree, + point_indices, + x, + y, + *polygon_offsets, + *ring_offsets, + *polygon_x, + *polygon_y), + cuspatial::logic_error); + } +} diff --git a/cpp/tests/utility_test/test_geometry_generators.cu b/cpp/tests/utility_test/test_geometry_generators.cu index 223eb6c73..5ecaab106 100644 --- a/cpp/tests/utility_test/test_geometry_generators.cu +++ b/cpp/tests/utility_test/test_geometry_generators.cu @@ -315,8 +315,8 @@ TEST_P(GeometryFactoryCountVerificationTest, CountsVerification) INSTANTIATE_TEST_SUITE_P( GeometryFactoryCountVerificationTests, GeometryFactoryCountVerificationTest, - ::testing::Combine(::testing::Values(1, 1000), // num_multipolygons - ::testing::Values(1, 30), // num_polygons_per_multipolygon - ::testing::Values(0, 100), // num_holes_per_polygon - ::testing::Values(3, 100) // num_sides_per_ring + ::testing::Combine(::testing::Values(1, 100), // num_multipolygons + ::testing::Values(1, 30), // num_polygons_per_multipolygon + ::testing::Values(0, 100), // num_holes_per_polygon + ::testing::Values(3, 100) // num_sides_per_ring )); diff --git a/dependencies.yaml b/dependencies.yaml index 5dcf27ecb..2dc4b7bb4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -14,6 +14,7 @@ files: - py_version - run_python - test_python + - notebooks test_cpp: output: none includes: @@ -172,6 +173,7 @@ dependencies: - ipython - notebook - shapely + - pydeck py_version: specific: - output_types: conda diff --git a/docs/source/user_guide/cuspatial_api_examples.ipynb b/docs/source/user_guide/cuspatial_api_examples.ipynb index 761b11831..bd78b237f 100644 --- a/docs/source/user_guide/cuspatial_api_examples.ipynb +++ b/docs/source/user_guide/cuspatial_api_examples.ipynb @@ -889,6 +889,161 @@ "print(gpu_polygons.head())" ] }, + { + "cell_type": "markdown", + "id": "008d320d-ca47-459f-9fff-8769494c8a61", + "metadata": {}, + "source": [ + "### cuspatial.pairwise_point_polygon_distance\n", + "\n", + "Using WGS 84 Pseudo-Mercator, distances are in meters." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "258c9a8c-7fe3-4047-80b7-00878d9fb2f1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pop_estcontinentnameiso_a3gdp_md_estgeometrydistance_fromdistance
0889953.0OceaniaFijiFJI5496MULTIPOLYGON (((20037508.343 -1812498.413, 200...Vatican City1.969350e+07
158005463.0AfricaTanzaniaTZA63177POLYGON ((3774143.866 -105758.362, 3792946.708...San Marino5.929777e+06
2603253.0AfricaW. SaharaESH907POLYGON ((-964649.018 3205725.605, -964597.245...Vaduz3.421172e+06
337589262.0North AmericaCanadaCAN1736425MULTIPOLYGON (((-13674486.249 6274861.394, -13...Lobamba1.296059e+07
4328239523.0North AmericaUnited States of AmericaUSA21433226MULTIPOLYGON (((-13674486.249 6274861.394, -13...Luxembourg8.174897e+06
\n", + "
" + ], + "text/plain": [ + " pop_est continent name iso_a3 gdp_md_est \\\n", + "0 889953.0 Oceania Fiji FJI 5496 \n", + "1 58005463.0 Africa Tanzania TZA 63177 \n", + "2 603253.0 Africa W. Sahara ESH 907 \n", + "3 37589262.0 North America Canada CAN 1736425 \n", + "4 328239523.0 North America United States of America USA 21433226 \n", + "\n", + " geometry distance_from \\\n", + "0 MULTIPOLYGON (((20037508.343 -1812498.413, 200... Vatican City \n", + "1 POLYGON ((3774143.866 -105758.362, 3792946.708... San Marino \n", + "2 POLYGON ((-964649.018 3205725.605, -964597.245... Vaduz \n", + "3 MULTIPOLYGON (((-13674486.249 6274861.394, -13... Lobamba \n", + "4 MULTIPOLYGON (((-13674486.249 6274861.394, -13... Luxembourg \n", + "\n", + " distance \n", + "0 1.969350e+07 \n", + "1 5.929777e+06 \n", + "2 3.421172e+06 \n", + "3 1.296059e+07 \n", + "4 8.174897e+06 \n", + "(GPU)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cities = geopandas.read_file(geopandas.datasets.get_path(\"naturalearth_cities\")).to_crs(3857)\n", + "countries = geopandas.read_file(geopandas.datasets.get_path(\"naturalearth_lowres\")).to_crs(3857)\n", + "\n", + "gpu_cities = cuspatial.from_geopandas(cities)\n", + "gpu_countries = cuspatial.from_geopandas(countries)\n", + "\n", + "dist = cuspatial.pairwise_point_polygon_distance(\n", + " gpu_cities.geometry[:len(gpu_countries)], gpu_countries.geometry\n", + ")\n", + "\n", + "gpu_countries[\"distance_from\"] = cities.name\n", + "gpu_countries[\"distance\"] = dist\n", + "\n", + "gpu_countries.head()" + ] + }, { "attachments": { "351aea0c-f37e-4ab9-bad2-c67bce69b5c3.png": { @@ -910,7 +1065,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "d1ade9da-c9e2-45c4-9685-dffeda3fd358", "metadata": { "tags": [] @@ -975,7 +1130,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "cc72a44d-a9bf-4432-9898-de899ac45869", "metadata": { "tags": [] @@ -993,7 +1148,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "1125fd17-afe1-4b9c-b48d-8842dd3700b3", "metadata": { "tags": [] @@ -1002,7 +1157,7 @@ { "data": { "text/plain": [ - "\n", + "\n", "[\n", " 0,\n", " 144\n", @@ -1010,7 +1165,7 @@ "dtype: int32" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1023,7 +1178,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "b281e3bb-42d4-4d60-9cb2-b7dcc20b4776", "metadata": { "tags": [] @@ -1046,7 +1201,7 @@ "Length: 144, dtype: geometry" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1058,7 +1213,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "e19873b9-2614-4242-ad67-caa47f807d04", "metadata": { "tags": [] @@ -1117,7 +1272,7 @@ "0 [9, 10, 10, 11, 11, 28, 12, 12, 13, 13, 14, 15... " ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1150,7 +1305,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "bf7b2256", "metadata": { "tags": [] @@ -1167,7 +1322,7 @@ "dtype: int64" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1252,7 +1407,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "id": "e3a0a9a3-0bdd-4f05-bcb5-7db4b99a44a3", "metadata": { "tags": [] @@ -1316,7 +1471,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "id": "023bd25a-35be-435d-ab0b-ecbd7a47e147", "metadata": { "tags": [] @@ -1375,7 +1530,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "784aff8e-c9ed-4a81-aa87-bf301b3b90af", "metadata": { "tags": [] @@ -1390,7 +1545,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "id": "fea24c78-cf5c-45c6-b860-338238e61323", "metadata": { "tags": [] diff --git a/notebooks/README.md b/notebooks/README.md index bd1835dbb..d0c2f430c 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -14,6 +14,7 @@ documentation tree, Notebook Title | Data set(s) | Notebook Description | External Download (Size) --- | --- | --- | --- [NYC Taxi Years Correlation](nyc_taxi_years_correlation.ipynb) | [NYC Taxi Yellow 01/2016, 01/2017, taxi zone data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page) | Demonstrates using Point in Polygon to correlate the NYC Taxi datasets pre-2017 `lat/lon` locations with the post-2017 `LocationID` for cross format comparisons. | Yes (~3GB) +[Stop Sign Counting By Zipcode Boundary](ZipCodes_Stops_PiP_cuSpatial.ipynb) | [Stop Sign Locations](https://wiki.openstreetmap.org/wiki/Tag:highway%3Dstop) [Zipcode Boundaries](https://catalog.data.gov/dataset/tiger-line-shapefile-2019-2010-nation-u-s-2010-census-5-digit-zip-code-tabulation-area-zcta5-na) [USA States Boundaries](https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative) | Demonstrates Quadtree Point-in-Polygon to categorize stop signs by zipcode boundaries. | Yes (~1GB) ## For more details Many more examples can be found in the [RAPIDS Notebooks diff --git a/notebooks/ZipCodes_Stops_PiP_cuSpatial.ipynb b/notebooks/ZipCodes_Stops_PiP_cuSpatial.ipynb new file mode 100644 index 000000000..c47f47753 --- /dev/null +++ b/notebooks/ZipCodes_Stops_PiP_cuSpatial.ipynb @@ -0,0 +1,686 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "458fe838-b143-4d31-9ddd-8efd0217f4a7", + "metadata": {}, + "source": [ + "# Stop Sign Counting By Zipcode in California" + ] + }, + { + "cell_type": "markdown", + "id": "6931011f-0d83-45ce-b254-4b5424b82624", + "metadata": {}, + "source": [ + "This notebook demonstrates the use of geodataframe joins with point-in-polygon in cuSpatial\n", + "\n", + "## Prerequisite: Datasets\n", + "\n", + "Datasets used:\n", + "1. Stops (Signs and Stop lines) dataset from OpenStreetMap\n", + "2. USA ZipCode boundaries from US Census Bureau\n", + "3. USA States boundaries from OpenStreetMap\n", + "\n", + "- OpenStreetMap is open data, licensed under [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/) by the [OpenStreetMap Foundation (OSMF)](https://wiki.osmfoundation.org/wiki/Main_Page).\n", + "- US Census Bureau data is open and free to use: https://www.census.gov/about/policies/open-gov/open-data.html\n", + "- TIGER/Line Shapefile, 2019, 2010 nation, U.S., 2010 Census 5-Digit ZIP Code Tabulation Area (ZCTA5) National, Metadata Updated: November 1, 2022.\" Accessed March xx, 2023. https://catalog.data.gov/dataset/tiger-line-shapefile-2019-2010-nation-u-s-2010-census-5-digit-zip-code-tabulation-area-zcta5-na\n", + "\n", + "Disclaimer: Each user is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "597750fe-518e-4944-8cbe-b84aea22481e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "USA_States.csv found\n", + "USA_Stops_Vertices.csv found\n", + "USA_Zipcodes_2019_Tiger.csv found\n" + ] + } + ], + "source": [ + "# Download the datasets and save as:\n", + "# 1. USA_Stops_Vertices.csv\n", + "# 2. USA_Zipcodes_2019_Tiger.csv\n", + "# 3. USA_States.csv\n", + "\n", + "!if [ ! -f \"USA_States.csv\" ]; then curl \"https://data.rapids.ai/cuspatial/benchmark/USA_States.csv\" -o USA_States.csv; else echo \"USA_States.csv found\"; fi\n", + "!if [ ! -f \"USA_Stops_Vertices.csv\" ]; then curl \"https://data.rapids.ai/cuspatial/benchmark/USA_Stops_Vertices.csv\" -o USA_Stops_Vertices.csv; else echo \"USA_Stops_Vertices.csv found\"; fi\n", + "!if [ ! -f \"USA_Zipcodes_2019_Tiger.csv\" ]; then curl \"https://data.rapids.ai/cuspatial/benchmark/USA_Zipcodes_2019_Tiger.csv\" -o USA_Zipcodes_2019_Tiger.csv; else echo \"USA_Zipcodes_2019_Tiger.csv found\"; fi" + ] + }, + { + "cell_type": "markdown", + "id": "497810f3-acf0-4472-a187-322413c9db11", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "Stop signs are an important symbol of city and road development in geographical information systems. This notebook processes all stop sign locations from the dataset, using spatial joins to locate them within the zipcode boundaries located within California. This notebook performs the following steps:\n", + "\n", + "1. Filters the zipcode boundaries located in California with spatial join.\n", + "2. Filters the stop signs located in all the zipcodes with spatial join.\n", + "3. Counts the stop signs by zipcode.\n", + "4. Visualize the result on map." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b2892e9a-ce32-4e5a-94a9-dd121d2a6ea3", + "metadata": {}, + "outputs": [], + "source": [ + "# Import Necessary Packages\n", + "import os\n", + "import cuspatial, cudf\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import cupy as cp\n", + "from shapely import wkt\n", + "import pydeck as pdk" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "086eda5a-8eda-46d8-8e90-54a2616799f8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Root folder for datasets\n", + "DATASET_ROOT = \"./\"\n", + "\n", + "def path_of(dataset):\n", + " return os.path.join(DATASET_ROOT, dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "044c84d9-2f82-4de5-a4c5-b7b9e5ef6b93", + "metadata": {}, + "source": [ + "## Load Dataset and Cleanup\n", + "\n", + "We load the datasets and store them as cuSpatial device dataframes. Note that the second cell below loads the dataset with cuDF, then adopts geopandas to parse the WKT (Well-Known Text) strings into shapely objects. This is a slow step performed on CPU and requires data transfer between device and host." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fcb48a9c-f968-486c-8cb8-7e0de6cc7f1c", + "metadata": {}, + "outputs": [], + "source": [ + "# Import Stop Sign CSV\n", + "d_stops = cudf.read_csv(path_of(\"USA_Stops_Vertices.csv\"), usecols=[\"x\", \"y\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b12cfa00-ad8d-4b9c-93cf-c1a511f8d513", + "metadata": {}, + "outputs": [], + "source": [ + "#Import CSV of Zipcodes\n", + "d_zip = cudf.read_csv(\n", + " path_of(\"USA_Zipcodes_2019_Tiger.csv\"),\n", + " usecols=[\"WKT\", \"ZCTA5CE10\", \"INTPTLAT10\", \"INTPTLON10\"])\n", + "d_zip.INTPTLAT10 = d_zip.INTPTLAT10.astype(\"float\")\n", + "d_zip.INTPTLON10 = d_zip.INTPTLON10.astype(\"float\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "583856a2-0b29-48c1-ade7-3840f640c5e7", + "metadata": {}, + "outputs": [], + "source": [ + "# Load WKT as shapely objects\n", + "h_zip = d_zip.to_pandas()\n", + "h_zip[\"WKT\"] = h_zip[\"WKT\"].apply(wkt.loads)\n", + "h_zip = gpd.GeoDataFrame(h_zip, geometry=\"WKT\", crs='epsg:4326')\n", + "\n", + "# Transfer back to GPU with cuSpatial\n", + "d_zip = cuspatial.from_geopandas(h_zip)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a13b228d-9a60-4f32-b548-fa6f4240e75e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Load State Boundaries\n", + "states = gpd.read_file(\"USA_States.csv\", geometry='WKT', crs='epsg:4326')\n", + "d_states = cuspatial.from_geopandas(states)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "33da801e-01a3-4c9f-bbba-0c61dc7677d9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "class QuadTree:\n", + " \"\"\"Helper class to use cuspatial quadtree interface\n", + " \"\"\"\n", + " def __init__(self,\n", + " df,\n", + " x_column,\n", + " y_column,\n", + " x_min=None,\n", + " x_max=None,\n", + " y_min=None,\n", + " y_max=None,\n", + " scale = -1,\n", + " max_depth = 15,\n", + " min_size = 12):\n", + "\n", + " self.x_min = df[x_column].min() if not x_min else x_min\n", + " self.x_max = df[x_column].max() if not x_max else x_max\n", + " self.y_min = df[y_column].min() if not y_min else y_min\n", + " self.y_max = df[y_column].max() if not y_max else y_max\n", + " \n", + " self.scale = scale\n", + " self.max_depth = max_depth\n", + " self.min_size = min_size\n", + "\n", + " self.point_df = df\n", + " self.x_column = x_column\n", + " self.y_column = y_column\n", + " \n", + " self.polygon_point_mapping = None\n", + " \n", + " self.d_points = cuspatial.GeoSeries.from_points_xy(\n", + " cudf.DataFrame({\"x\": df[x_column], \"y\": df[y_column]}\n", + " ).interleave_columns())\n", + " \n", + " self.point_indices, self.quadtree = (\n", + " cuspatial.quadtree_on_points(self.d_points,\n", + " self.x_min,\n", + " self.x_max,\n", + " self.y_min,\n", + " self.y_max,\n", + " self.scale,\n", + " self.max_depth,\n", + " self.min_size))\n", + "\n", + " def set_polygon(self, df, poly_column):\n", + " polys = df[poly_column]\n", + "\n", + " parts = polys.polygons.part_offset\n", + " rings = polys.polygons.ring_offset\n", + " x = polys.polygons.x\n", + " y = polys.polygons.y\n", + " \n", + " single_polys = cuspatial.GeoSeries.from_polygons_xy(\n", + " polys.polygons.xy, rings, parts, cp.arange(len(parts))\n", + " )\n", + " \n", + " geometries = cudf.Series(polys.polygons.geometry_offset)\n", + " \n", + " poly_bboxes = cuspatial.polygon_bounding_boxes(single_polys)\n", + " intersections = cuspatial.join_quadtree_and_bounding_boxes(\n", + " self.quadtree, poly_bboxes, self.x_min, self.x_max, self.y_min, self.y_max, self.scale, self.max_depth\n", + " )\n", + " polygon_point_mapping = cuspatial.quadtree_point_in_polygon(\n", + " intersections,\n", + " self.quadtree,\n", + " self.point_indices,\n", + " self.d_points,\n", + " single_polys\n", + " )\n", + "\n", + " # Update Polygon Index to MultiPolygon Index\n", + " polygon_index = geometries.searchsorted(polygon_point_mapping.polygon_index, side=\"right\")-1\n", + " polygon_point_mapping.polygon_index = polygon_index\n", + "\n", + " self.polygon_point_mapping = polygon_point_mapping.reset_index(drop=True)\n", + " \n", + " # Remap point indices\n", + " idx_of_idx = self.point_indices.take(\n", + " self.polygon_point_mapping.point_index\n", + " ).reset_index(drop=True)\n", + " self.polygon_point_mapping.point_index = idx_of_idx\n", + "\n", + " self.polygon_df = df\n", + "\n", + " def _subset_geodf(self, geodf, columns):\n", + " res = cudf.DataFrame()\n", + " for col in columns:\n", + " res[col] = geodf[col]\n", + " return res\n", + "\n", + " def points(self, columns = None):\n", + " if self.polygon_point_mapping is None:\n", + " raise ValueError(\"First set polygon dataframe.\")\n", + " \n", + " if not columns:\n", + " df = self.point_df\n", + " else:\n", + " df = self._subset_geodf(self.point_df, columns)\n", + "\n", + " if any(dtype == \"geometry\" for dtype in df.dtypes):\n", + " df = cuspatial.GeoDataFrame(df)\n", + " \n", + " mapping = self.polygon_point_mapping\n", + " res = df.iloc[mapping.point_index]\n", + " res = res.reset_index(drop=True)\n", + " res[\"polygon_index\"] = mapping.polygon_index\n", + " res[\"point_index\"] = mapping.point_index\n", + " return res\n", + "\n", + " def polygons(self, columns = None):\n", + " if self.polygon_point_mapping is None:\n", + " raise ValueError(\"First set polygon dataframe.\")\n", + " \n", + " if not columns:\n", + " df = self.polygon_df\n", + " else:\n", + " df = self._subset_geodf(self.polygon_df, columns)\n", + " \n", + " if any(dtype == \"geometry\" for dtype in df.dtypes):\n", + " df = cuspatial.GeoDataFrame(df)\n", + " \n", + " mapping = self.polygon_point_mapping\n", + " res = df.iloc[mapping.polygon_index]\n", + " res = res.reset_index(drop=True)\n", + " res[\"polygon_index\"] = mapping.polygon_index\n", + " res[\"point_index\"] = mapping.point_index\n", + " return res\n", + " \n", + " def point_left_join_polygon(self, point_columns=None, polygon_columns=None):\n", + " points = self.points(point_columns)\n", + " polygons = self.polygons(polygon_columns)\n", + " joined = points.merge(polygons, on=[\"polygon_index\", \"point_index\"], how=\"left\")\n", + " joined = joined.drop([\"polygon_index\", \"point_index\"], axis=1)\n", + " return cuspatial.GeoDataFrame(joined)" + ] + }, + { + "cell_type": "markdown", + "id": "eda8fb4c-39ec-44e2-9163-cbbdd91eeb1d", + "metadata": {}, + "source": [ + "## Filtering Zipcode by its Geometric Center\n", + "\n", + "The Zipcode Dataset contains boundaries for all zipcodes in the US. The below uses the geometric center (encoded in `INTPTLON10` and `INTPTLAT10` column) for each zipcode and uses cuspatial's quadtree interface to filter zipcodes located only in California." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c0cadafb-acae-41d6-bbca-c10a8201699c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/raid/wangm/dev/rapids/cuspatial/python/cuspatial/cuspatial/core/spatial/indexing.py:174: UserWarning: scale -1 is less than required minimum scale 0.009837776664632286. Clamping to minimum scale\n", + " warnings.warn(\n", + "/raid/wangm/dev/rapids/cuspatial/python/cuspatial/cuspatial/core/spatial/join.py:150: UserWarning: scale -1 is less than required minimum scale 0.009837776664632286. Clamping to minimum scale\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Use quadtree to filter zip codes\n", + "\n", + "# Build a point quadtree using the geometric center of the zip code region\n", + "zipcode_quadtree = QuadTree(d_zip, x_column=\"INTPTLON10\", y_column=\"INTPTLAT10\")\n", + "\n", + "# Pass boundary\n", + "zipcode_quadtree.set_polygon(d_states, poly_column='geometry')\n", + "\n", + "# Join state and zip code boundaries\n", + "zipcode_by_state = zipcode_quadtree.point_left_join_polygon([\"WKT\", \"ZCTA5CE10\"], [\"STUSPS\"])\n", + "\n", + "# Get Californian zipcodes\n", + "CA_zipcode = zipcode_by_state[zipcode_by_state.STUSPS == 'CA']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d2571a4a-a898-4e04-9fd2-21eb6b7a7f3e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1762, 33144)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(CA_zipcode), len(d_zip)" + ] + }, + { + "cell_type": "markdown", + "id": "ab387f5a-cf7e-49d4-b3c8-c5b4b059cd4d", + "metadata": {}, + "source": [ + "From the 33K zipcode dataset, 1.7K of them belong to California." + ] + }, + { + "cell_type": "markdown", + "id": "6446d81b-006a-4a0b-995b-a001c9b7766f", + "metadata": {}, + "source": [ + "## Join stop signs dataset with California Zipcode boundaries\n", + "\n", + "The below joins the stop sign dataset (460K data points) with all zip code boundaries in California (1700 data points)." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "370ee37c-1311-4f54-9b0c-afd862c489aa", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/raid/wangm/dev/rapids/cuspatial/python/cuspatial/cuspatial/core/spatial/indexing.py:174: UserWarning: scale -1 is less than required minimum scale 0.0029100948550503493. Clamping to minimum scale\n", + " warnings.warn(\n", + "/raid/wangm/dev/rapids/cuspatial/python/cuspatial/cuspatial/core/spatial/join.py:150: UserWarning: scale -1 is less than required minimum scale 0.0029100948550503493. Clamping to minimum scale\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Build a second quadtree with all stop signs in the US\n", + "stop_quadtree = QuadTree(d_stops, x_column='x', y_column='y')\n", + "\n", + "# Pass zip code polygons\n", + "stop_quadtree.set_polygon(CA_zipcode, poly_column=\"WKT\")\n", + "\n", + "# Join the stop signs and the zip code dataframe\n", + "stop_by_zipcode = stop_quadtree.point_left_join_polygon([\"x\", \"y\"], [\"ZCTA5CE10\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5674f74a-9315-4e1f-ac0d-c45a1b97ae3e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyZCTA5CE10
0-121.85809437.28078795136
1-121.85664837.27829595136
2-121.85544137.28037595136
3-121.85634337.28319595136
4-121.85660437.28100595136
\n", + "
" + ], + "text/plain": [ + " x y ZCTA5CE10\n", + "0 -121.858094 37.280787 95136\n", + "1 -121.856648 37.278295 95136\n", + "2 -121.855441 37.280375 95136\n", + "3 -121.856343 37.283195 95136\n", + "4 -121.856604 37.281005 95136\n", + "(GPU)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stop_by_zipcode.head()" + ] + }, + { + "cell_type": "markdown", + "id": "4b61f534-faa4-4465-b47b-fb717169f30e", + "metadata": {}, + "source": [ + "## Zipcode counting with cuDF\n", + "\n", + "The below uses [cuDF](https://docs.rapids.ai/api/cudf/stable/index.html) to count the number of stop signs per zip code. Then merge the geometry information from the zipcode dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "247d716c-4718-4aba-8d4f-5f816852194d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ZCTA5CE10\n", + "94901 131\n", + "94535 205\n", + "95112 103\n", + "95407 126\n", + "93933 205\n", + "Name: stop_count, dtype: int32" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Count the Stop Signs by California Zip Codes\n", + "stop_counts = stop_by_zipcode.groupby(\"ZCTA5CE10\").x.count().rename(\"stop_count\")\n", + "stop_counts.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "ccf31694-275d-4987-a318-79bc1ea79e73", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DataFrame Size: 1762\n" + ] + } + ], + "source": [ + "# Fetch the polygon boundaries\n", + "stop_counts_and_bounds = cuspatial.GeoDataFrame(CA_zipcode.merge(stop_counts, on=\"ZCTA5CE10\", how=\"left\"))\n", + "stop_counts_and_bounds[\"stop_count\"] = stop_counts_and_bounds[\"stop_count\"].astype(\"int\").fillna(0)\n", + "print(\"DataFrame Size: \", len(stop_counts_and_bounds))" + ] + }, + { + "cell_type": "markdown", + "id": "d1f2af42-affb-4e9e-ac24-b5583641a366", + "metadata": {}, + "source": [ + "## Visualization\n", + "\n", + "Now, we visualize the stop sign count results using [PyDeck](https://deckgl.readthedocs.io/en/latest/index.html).\n", + "Uncomment to run below:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "6a9cb2d6-c7d3-4063-9b24-47101dba0044", + "metadata": {}, + "outputs": [], + "source": [ + "# # Visualize the Dataset\n", + "\n", + "# # Move dataframe to host for visualization\n", + "# host_df = stop_counts_and_bounds.to_geopandas()\n", + "# host_df = host_df.rename({\"WKT\": \"geometry\"}, axis=1)\n", + "# host_df.head()\n", + "\n", + "# # Geo Center of CA: 120°4.9'W 36°57.9'N\n", + "# view_state = pdk.ViewState(\n", + "# **{\"latitude\": 33.96500, \"longitude\": -118.08167, \"zoom\": 6, \"maxZoom\": 16, \"pitch\": 95, \"bearing\": 0}\n", + "# )\n", + "\n", + "# gpd_layer = pdk.Layer(\n", + "# \"GeoJsonLayer\",\n", + "# data=host_df[[\"geometry\", \"stop_count\", \"ZCTA5CE10\"]],\n", + "# get_polygon=\"geometry\",\n", + "# get_elevation=\"stop_count\",\n", + "# extruded=True,\n", + "# elevation_scale=50,\n", + "# get_fill_color=[227,74,51],\n", + "# get_line_color=[255, 255, 255],\n", + "# auto_highlight=True,\n", + "# filled=True,\n", + "# wireframe=True,\n", + "# pickable=True\n", + "# )\n", + "\n", + "# tooltip = {\"html\": \"Stop Sign Count: {stop_count}
ZipCode: {ZCTA5CE10}\"}\n", + "\n", + "# r = pdk.Deck(\n", + "# gpd_layer,\n", + "# initial_view_state=view_state,\n", + "# map_style=pdk.map_styles.LIGHT,\n", + "# tooltip=tooltip,\n", + "# )\n", + "\n", + "# r.to_html(\"geopandas_layer.html\", notebook_display=False)" + ] + }, + { + "cell_type": "markdown", + "id": "e05fc526-c935-417f-9f53-0f13a0c6d02a", + "metadata": {}, + "source": [ + "### Open geopandas_layer.html to see visualization result\n", + "\n", + "![stop_per_state_map](https://github.com/isVoid/cuspatial/raw/notebook/zipcode_counting/notebooks/stop_states.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7611be2-6dbe-40a5-ae9e-51283737d3f2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/stop_states.png b/notebooks/stop_states.png new file mode 100644 index 000000000..1fb3b1b5a Binary files /dev/null and b/notebooks/stop_states.png differ diff --git a/python/cuspatial/cuspatial/__init__.py b/python/cuspatial/cuspatial/__init__.py index bb9a237c9..2f3a27267 100644 --- a/python/cuspatial/cuspatial/__init__.py +++ b/python/cuspatial/cuspatial/__init__.py @@ -10,6 +10,7 @@ pairwise_point_distance, pairwise_point_linestring_distance, pairwise_point_linestring_nearest_points, + pairwise_point_polygon_distance, point_in_polygon, points_in_spatial_window, polygon_bounding_boxes, diff --git a/python/cuspatial/cuspatial/_lib/cpp/distance/point_polygon_distance.pxd b/python/cuspatial/cuspatial/_lib/cpp/distance/point_polygon_distance.pxd new file mode 100644 index 000000000..020bd4574 --- /dev/null +++ b/python/cuspatial/cuspatial/_lib/cpp/distance/point_polygon_distance.pxd @@ -0,0 +1,17 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr + +from cudf._lib.cpp.column.column cimport column + +from cuspatial._lib.cpp.column.geometry_column_view cimport ( + geometry_column_view, +) + + +cdef extern from "cuspatial/distance/point_polygon_distance.hpp" \ + namespace "cuspatial" nogil: + cdef unique_ptr[column] pairwise_point_polygon_distance( + const geometry_column_view & multipoints, + const geometry_column_view & multipolygons + ) except + diff --git a/python/cuspatial/cuspatial/_lib/distance.pyx b/python/cuspatial/cuspatial/_lib/distance.pyx index 9319278ee..f3d68717e 100644 --- a/python/cuspatial/cuspatial/_lib/distance.pyx +++ b/python/cuspatial/cuspatial/_lib/distance.pyx @@ -1,12 +1,15 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. -from libcpp.memory cimport unique_ptr +from libcpp.memory cimport make_shared, shared_ptr, unique_ptr from libcpp.utility cimport move from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view +from cuspatial._lib.cpp.column.geometry_column_view cimport ( + geometry_column_view, +) from cuspatial._lib.cpp.distance.linestring_distance cimport ( pairwise_linestring_distance as c_pairwise_linestring_distance, ) @@ -16,7 +19,12 @@ from cuspatial._lib.cpp.distance.point_distance cimport ( from cuspatial._lib.cpp.distance.point_linestring_distance cimport ( pairwise_point_linestring_distance as c_pairwise_point_linestring_distance, ) +from cuspatial._lib.cpp.distance.point_polygon_distance cimport ( + pairwise_point_polygon_distance as c_pairwise_point_polygon_distance, +) from cuspatial._lib.cpp.optional cimport optional +from cuspatial._lib.cpp.types cimport collection_type_id, geometry_type_id +from cuspatial._lib.types cimport collection_type_py_to_c from cuspatial._lib.utils cimport unwrap_pyoptcol @@ -102,3 +110,35 @@ def pairwise_point_linestring_distance( c_linestring_points_xy, )) return Column.from_unique_ptr(move(c_result)) + + +def pairwise_point_polygon_distance( + point_collection_type, + Column multipoints, + Column multipolygons +): + + cdef collection_type_id point_multi_type = collection_type_py_to_c( + point_collection_type + ) + + cdef shared_ptr[geometry_column_view] c_multipoints = \ + make_shared[geometry_column_view]( + multipoints.view(), + point_multi_type, + geometry_type_id.POINT) + + cdef shared_ptr[geometry_column_view] c_multipolygons = \ + make_shared[geometry_column_view]( + multipolygons.view(), + collection_type_id.MULTI, + geometry_type_id.POLYGON) + + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(c_pairwise_point_polygon_distance( + c_multipoints.get()[0], c_multipolygons.get()[0] + )) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cuspatial/cuspatial/_lib/types.pxd b/python/cuspatial/cuspatial/_lib/types.pxd index 710724f47..6d3d1b3f9 100644 --- a/python/cuspatial/cuspatial/_lib/types.pxd +++ b/python/cuspatial/cuspatial/_lib/types.pxd @@ -2,6 +2,13 @@ from libc.stdint cimport uint8_t +from cuspatial._lib.cpp.types cimport collection_type_id, geometry_type_id + ctypedef uint8_t underlying_geometry_type_id_t ctypedef uint8_t underlying_collection_type_id_t + + +cdef geometry_type_id geometry_type_py_to_c(typ) except* + +cdef collection_type_id collection_type_py_to_c(typ) except* diff --git a/python/cuspatial/cuspatial/_lib/types.pyx b/python/cuspatial/cuspatial/_lib/types.pyx index 03354b5f1..103342cb2 100644 --- a/python/cuspatial/cuspatial/_lib/types.pyx +++ b/python/cuspatial/cuspatial/_lib/types.pyx @@ -28,3 +28,10 @@ class CollectionType(IntEnum): MULTI = ( collection_type_id.MULTI ) + + +cdef geometry_type_id geometry_type_py_to_c(typ : GeometryType): + return ( (typ.value)) + +cdef collection_type_id collection_type_py_to_c(typ : CollectionType): + return ( (typ.value)) diff --git a/python/cuspatial/cuspatial/core/binpreds/binpreds.py b/python/cuspatial/cuspatial/core/binpreds/binpreds.py index b5d4e6ba1..3e6c1654e 100644 --- a/python/cuspatial/cuspatial/core/binpreds/binpreds.py +++ b/python/cuspatial/cuspatial/core/binpreds/binpreds.py @@ -6,17 +6,40 @@ import cudf +import cuspatial from cuspatial.core._column.geocolumn import GeoColumn from cuspatial.core.binpreds.contains import contains_properly from cuspatial.utils.column_utils import ( contains_only_linestrings, contains_only_multipoints, + contains_only_points, contains_only_polygons, has_multipolygons, has_same_geometry, ) +class PreprocessorOutput: + """The output of the preprocess method of a binary predicate. + + This makes it possible to create a class that matches the necessary + signature of a geoseries.GeoColumnAccessor object. In some cases the + preprocessor may need to reorder the input data, in which case the + preprocessor will return a PreprocessorOutput object instead of a + GeoColumnAccessor.""" + + def __init__(self, coords, indices) -> None: + self.vertices = coords + self.indices = indices + + @property + def xy(self): + return self.vertices + + def point_indices(self): + return self.indices + + class BinaryPredicate(ABC): """BinaryPredicate is an abstract class that implements the binary predicate algorithm. The binary predicate algorithm is used to compute @@ -58,10 +81,12 @@ def preprocess(self, lhs, rhs): The right-hand-side of the internal binary predicate, may be reordered. """ - pass + return (lhs, rhs, cudf.RangeIndex(len(rhs))) @abstractmethod - def postprocess(self, point_indices, point_result): + def postprocess( + self, point_indices: cudf.Series, point_result: cudf.Series + ) -> cudf.Series: """Postprocess the output data for the binary predicate. This method should be implemented by subclasses. @@ -134,6 +159,15 @@ def __init__(self, lhs, rhs, align=True): (self.lhs, self.rhs) = lhs.align(rhs) if align else (lhs, rhs) self.align = align + def _cancel_op(self, lhs, rhs, result): + """Used to disable computation of the binary predicate. + + This occurs when the binary predicate is not supported for the + input types, and a final result can be computed only using + `preprocess` and `postprocess`.""" + self._op = lambda x, y: result + return (lhs, rhs, result) + def __call__(self) -> cudf.Series: """Return the result of the binary predicate.""" # Type disambiguation @@ -178,9 +212,9 @@ def preprocess(self, lhs, rhs): # Arrange into shape for calling point-in-polygon, intersection, or # equals point_indices = geom.point_indices() - from cuspatial.core.geoseries import GeoSeries - - final_rhs = GeoSeries(GeoColumn._from_points_xy(xy_points._column)) + final_rhs = cuspatial.core.geoseries.GeoSeries( + GeoColumn._from_points_xy(xy_points._column) + ) return (lhs, final_rhs, cudf.Series(point_indices)) def _should_use_quadtree(self): @@ -391,14 +425,16 @@ def _postprocess_brute_force_result(self, point_indices, point_result): # Result can be: # A Dataframe of booleans with n_points rows and up to 31 columns. - # Discrete math recombination if ( contains_only_linestrings(self.rhs) or contains_only_polygons(self.rhs) or contains_only_multipoints(self.rhs) ): - # process for completed linestrings, polygons, and multipoints. - # Not necessary for points. + # Compute the set of results for each point-in-polygon predicate. + # Group them by the original index, and sum the results. If the + # sum of points in the rhs feature is equal to the number of + # points found in the polygon, then the polygon contains the + # feature. point_result["idx"] = point_indices group_result = ( point_result.groupby("idx").sum().sort_index() @@ -487,3 +523,260 @@ def preprocess(self, lhs, rhs): if contains_only_polygons(rhs): (self.lhs, self.rhs) = (rhs, lhs) return super().preprocess(rhs, lhs) + + def postprocess(self, point_indices, point_result): + """Postprocess the output GeoSeries to ensure that they are of the + correct type for the predicate.""" + (hits, expected_count,) = self._count_results_in_multipoint_geometries( + point_indices, point_result + ) + result_df = hits.reset_index().merge( + expected_count.reset_index(), on="rhs_index" + ) + result_df["feature_in_polygon"] = ( + result_df["point_index_x"] >= result_df["point_index_y"] + ) + final_result = cudf.Series( + [False] * (point_indices.max().item() + 1) + ) # point_indices is zero index + final_result.loc[ + result_df["rhs_index"][result_df["feature_in_polygon"]] + ] = True + return final_result + + +class EqualsBinpred(BinaryPredicate): + def _offset_equals(self, lhs, rhs): + """Compute the pairwise length equality of two offset arrays""" + lhs_lengths = lhs[:-1] - lhs[1:] + rhs_lengths = rhs[:-1] - rhs[1:] + return lhs_lengths == rhs_lengths + + def _sort_interleaved_points_by_offset(self, coords, offsets, sort_order): + """Sort xy according to bins defined by offset. Sort order is a list + of column names to sort by. + + `_sort_interleaved_points_by_offset` creates a dataframe with the + following columns: + "sizes": an index for each object represented in `coords`. + "points": an index for each point in `coords`. + "xy_key": an index that maintains x/y ordering. + "xy": the x/y coordinates in `coords`. + + The dataframe is sorted according to keys passed in by the caller. + For sorting multipoints, the keys in order are "object_key", "xy", + "xy_key". This sorts the points in each multipoint into the same + bin defined by "object_key", then sorts the points in each bin by + x/y coordinates, and finally sorts the points in each bin by the + `xy_key` which maintains that the x coordinate precedes the y + coordinate. + + For sorting linestrings, the keys in order are "object_key", + "point_key", "xy_key". This sorts the points in each linestring + into the same bin defined by "object_key", then sorts the points + in each bin by point ordering, and finally sorts the points in + each bin by x/y ordering. + + Parameters + ---------- + coords : cudf.Series + interleaved x,y coordinates + offsets : cudf.Series + offsets into coords + sort_order : list + list of column names to sort by. One of "object_key", "point_key", + "xy_key", and "xy". + + Returns + ------- + cudf.Series + sorted interleaved x,y coordinates + """ + sizes = offsets[1:] - offsets[:-1] + object_key = ( + cudf.Series(cp.arange(len(sizes))) + .repeat(sizes * 2) + .reset_index(drop=True) + ) + point_key = cp.arange(len(coords) // 2).repeat(2)[::-1] + xy_key = cp.tile([0, 1], len(coords) // 2) + sorting_df = cudf.DataFrame( + { + "object_key": object_key, + "point_key": point_key, + "xy_key": xy_key, + "xy": coords, + } + ) + sorted_df = sorting_df.sort_values(by=sort_order).reset_index( + drop=True + ) + return sorted_df["xy"] + + def _sort_multipoint_series(self, coords, offsets): + """Sort xy according to bins defined by offset""" + result = self._sort_interleaved_points_by_offset( + coords, offsets, ["object_key", "xy", "xy_key"] + ) + result.name = None + return result + + def _sort_multipoints(self, lhs, rhs, initial): + lhs_sorted = self._sort_multipoint_series( + lhs.multipoints.xy, lhs.multipoints.geometry_offset + ) + rhs_sorted = self._sort_multipoint_series( + rhs.multipoints.xy, rhs.multipoints.geometry_offset + ) + lhs_result = cuspatial.core.geoseries.GeoSeries.from_multipoints_xy( + lhs_sorted, lhs.multipoints.geometry_offset + ) + rhs_result = cuspatial.core.geoseries.GeoSeries.from_multipoints_xy( + rhs_sorted, rhs.multipoints.geometry_offset + ) + lhs_result.index = lhs.index + rhs_result.index = rhs.index + return ( + lhs_result, + rhs_result, + initial, + ) + + def _reverse_linestrings(self, coords, offsets): + """Reverse the order of coordinates in a Arrow buffer of coordinates + and offsets.""" + result = self._sort_interleaved_points_by_offset( + coords, offsets, ["object_key", "point_key", "xy_key"] + ) + result.name = None + return result + + def _compare_linestrings_and_reversed(self, lhs, rhs, initial): + """Compare linestrings with their reversed counterparts.""" + lhs_xy = self._reverse_linestrings(lhs.xy, lhs.part_offset) + rhs_xy = self._reverse_linestrings(rhs.xy, rhs.part_offset) + return ( + PreprocessorOutput(lhs_xy, lhs.point_indices()), + PreprocessorOutput(rhs_xy, rhs.point_indices()), + initial, + ) + + def preprocess(self, lhs, rhs): + # Compare types + type_compare = lhs.feature_types == rhs.feature_types + # Any unmatched type is not equal + if (type_compare == False).all(): # noqa: E712 + # Override _op so that it will not be run. + return self._cancel_op(lhs, rhs, type_compare) + # Get indices of matching types + if contains_only_multipoints(lhs): + lengths_equal = self._offset_equals( + lhs.multipoints.geometry_offset, + rhs.multipoints.geometry_offset, + ) + if lengths_equal.any(): + # Multipoints are equal if they contains the + # same unordered points. + return self._sort_multipoints( + lhs[lengths_equal], + rhs[lengths_equal], + lengths_equal, + ) + else: + # No lengths are equal, so none can be equal. + return self._cancel_op(lhs, rhs, lengths_equal) + elif contains_only_linestrings(lhs): + lengths_equal = self._offset_equals( + lhs.lines.part_offset, rhs.lines.part_offset + ) + if lengths_equal.any(): + return ( + lhs[lengths_equal], + rhs[lengths_equal], + lengths_equal, + ) + else: + return self._cancel_op(lhs, rhs, lengths_equal) + elif contains_only_polygons(lhs): + raise NotImplementedError + elif contains_only_points(lhs): + return (lhs, rhs, type_compare) + + def postprocess(self, lengths_equal, point_result): + # if point_result is not a Series, preprocessing terminated + # the results early. + if isinstance(point_result, cudf.Series): + point_result = point_result.sort_index() + lengths_equal[point_result.index] = point_result + return cudf.Series(lengths_equal) + + def _vertices_equals(self, lhs, rhs): + """Compute the equals relationship between interleaved xy + coordinate buffers.""" + length = min(len(lhs), len(rhs)) + a = lhs[:length:2]._column == rhs[:length:2]._column + b = rhs[1:length:2]._column == lhs[1:length:2]._column + return a & b + + def _op(self, lhs, rhs): + if contains_only_linestrings(lhs): + # Linestrings can be compared either forward or + # reversed. We need to compare both directions. + lhs_reversed = self._reverse_linestrings( + lhs.lines.xy, lhs.lines.part_offset + ) + forward_result = self._vertices_equals(lhs.lines.xy, rhs.lines.xy) + reverse_result = self._vertices_equals(lhs_reversed, rhs.lines.xy) + result = forward_result | reverse_result + elif contains_only_multipoints(lhs): + result = self._vertices_equals( + lhs.multipoints.xy, rhs.multipoints.xy + ) + elif contains_only_points(lhs): + result = self._vertices_equals(lhs.points.xy, rhs.points.xy) + elif contains_only_polygons(lhs): + raise NotImplementedError + indices = lhs.point_indices + result_df = cudf.DataFrame( + {"idx": indices[: len(result)], "equals": result} + ) + gb_idx = result_df.groupby("idx") + result = (gb_idx.sum().sort_index() == gb_idx.count().sort_index())[ + "equals" + ] + result.index = lhs.index + result.index.name = None + result.name = None + return result + + +class CrossesBinpred(EqualsBinpred): + """An object is said to cross other if its interior intersects the + interior of the other but does not contain it, and the dimension of + the intersection is less than the dimension of the one or the other. + + This is only implemented for `points.crosses(points)` at this time. + """ + + def postprocess(self, point_indices, point_result): + if has_same_geometry(self.lhs, self.rhs) and contains_only_points( + self.lhs + ): + return cudf.Series([False] * len(self.lhs)) + df_result = cudf.DataFrame({"idx": point_indices, "pip": point_result}) + point_result = cudf.Series( + df_result["pip"], index=cudf.RangeIndex(0, len(df_result)) + ) + point_result.name = None + return point_result + + +class CoversBinpred(EqualsBinpred): + """An object A covers another B if no points of B lie in the exterior + of A, and at least one point of the interior of B lies in the interior + of A. + + This is only implemented for `points.covers(points)` at this time.""" + + def postprocess(self, point_indices, point_result): + return cudf.Series(point_result, index=point_indices) diff --git a/python/cuspatial/cuspatial/core/geoseries.py b/python/cuspatial/cuspatial/core/geoseries.py index 1cb6e0c85..41d5f7eb8 100644 --- a/python/cuspatial/cuspatial/core/geoseries.py +++ b/python/cuspatial/cuspatial/core/geoseries.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION +# Copyright (c) 2020-2023, NVIDIA CORPORATION from functools import cached_property from numbers import Integral @@ -28,10 +28,19 @@ from cuspatial.core._column.geometa import Feature_Enum, GeoMeta from cuspatial.core.binpreds.binpreds import ( ContainsProperlyBinpred, + CoversBinpred, + CrossesBinpred, + EqualsBinpred, IntersectsBinpred, OverlapsBinpred, WithinBinpred, ) +from cuspatial.utils.column_utils import ( + contains_only_linestrings, + contains_only_multipoints, + contains_only_points, + contains_only_polygons, +) T = TypeVar("T", bound="GeoSeries") @@ -95,6 +104,10 @@ def __init__( column, index, dtype=dtype, name=name, nan_as_null=nan_as_null ) + @property + def feature_types(self): + return self._column._meta.input_types + @property def type(self): gpu_types = cudf.Series(self._column._meta.input_types).astype("str") @@ -103,13 +116,27 @@ def type(self): "0": "Point", "1": "MultiPoint", "2": "Linestring", - "3": "MultiLinestring", - "4": "Polygon", - "5": "MultiPolygon", + "3": "Polygon", } ) return result + @property + def point_indices(self): + if contains_only_polygons(self): + return self.polygons.point_indices() + elif contains_only_linestrings(self): + return self.lines.point_indices() + elif contains_only_multipoints(self): + return self.multipoints.point_indices() + elif contains_only_points(self): + return self.points.point_indices() + else: + raise TypeError( + "GeoSeries must contain only Points, MultiPoints, Lines, or " + "Polygons to return point indices." + ) + class GeoColumnAccessor: def __init__(self, list_series, meta): self._series = list_series @@ -151,7 +178,7 @@ def point_indices(self): sizes = offsets[1:] - offsets[:-1] return cp.repeat(self._series.index, sizes) """ - return self._series.index + return self._meta.input_types.index[self._meta.input_types != -1] class MultiPointGeoColumnAccessor(GeoColumnAccessor): def __init__(self, list_series, meta): @@ -167,7 +194,7 @@ def point_indices(self): # MultiPoint GeoSeries that each individual point is member of. offsets = cp.array(self.geometry_offset) sizes = offsets[1:] - offsets[:-1] - return cp.repeat(self._series.index, sizes) + return cp.repeat(self._meta.input_types.index, sizes) class LineStringGeoColumnAccessor(GeoColumnAccessor): def __init__(self, list_series, meta): @@ -187,9 +214,9 @@ def part_offset(self): def point_indices(self): # Return a cupy.ndarray containing the index values from the # LineString GeoSeries that each individual point is member of. - offsets = self.part_offset.take(self.geometry_offset) + offsets = cp.array(self.part_offset) sizes = offsets[1:] - offsets[:-1] - return cp.repeat(self._series.index, sizes) + return cp.repeat(self._meta.input_types.index, sizes) class PolygonGeoColumnAccessor(GeoColumnAccessor): def __init__(self, list_series, meta): @@ -990,14 +1017,81 @@ def contains_properly(self, other, align=False, allpairs=False): `point_indices` and `polygon_indices`, each of which is a `Series` of `dtype('int32')` in the case of `allpairs=True`. """ + if contains_only_points(self) and contains_only_points(self): + return EqualsBinpred(self, other, align)() return ContainsProperlyBinpred(self, other, align, allpairs)() + def geom_equals(self, other, align=True): + """Compute if a GeoSeries of features A is equal to a GeoSeries of + features B. Features are equal if their vertices are equal. + + An object is equal to other if its set-theoretic boundary, interior, + and exterior coincide with those of the other object. + + Parameters + ---------- + other + a cuspatial.GeoSeries + align=True + to align the indices before computing .contains or not. If the + indices are not aligned, they will be compared based on their + implicit row order. + + Examples + -------- + Test if two points are equal: + >>> point = cuspatial.GeoSeries([Point(0, 0)]) + >>> point2 = cuspatial.GeoSeries([Point(0, 0)]) + >>> print(point.geom_equals(point2)) + 0 True + dtype: bool + + Test if two polygons are equal: + >>> polygon = cuspatial.GeoSeries([ + >>> Polygon([[0, 0], [1, 0], [1, 1], [0, 0]]), + >>> ]) + >>> polygon2 = cuspatial.GeoSeries([ + >>> Polygon([[0, 0], [1, 0], [1, 1], [0, 0]]), + >>> ]) + >>> print(polygon.geom_equals(polygon2)) + 0 True + dtype: bool + + Returns + ------- + + result : cudf.Series + A Series of boolean values indicating whether each feature in A + is equal to the corresponding feature in B. + """ + return EqualsBinpred(self, other, align)() + + def covers(self, other, align=True): + """Compute if a GeoSeries of features A covers a second GeoSeries of + features B. A covers B if no points on B lie in the exterior of A. + + Parameters + ---------- + other + a cuspatial.GeoSeries + align=True + align the GeoSeries indexes before calling the binpred + + Returns + ------- + result : cudf.Series + A Series of boolean values indicating whether each feature in the + input GeoSeries covers the corresponding feature in the other + GeoSeries. + """ + return CoversBinpred(self, other, align)() + def intersects(self, other, align=True): """Returns a `Series` of `dtype('bool')` with value `True` for each aligned geometry that intersects _other_. - A geometry intersects another geometry if its boundary or interior - intersect in any way with the other geometry. + An object is said to intersect _other_ if its _boundary_ and + _interior_ intersects in any way with those of other. Parameters ---------- @@ -1012,15 +1106,18 @@ def intersects(self, other, align=True): A Series of boolean values indicating whether the geometries of each row intersect. """ - return IntersectsBinpred(self, other, align)() + if contains_only_points(self) and contains_only_points(other): + return cudf.Series(EqualsBinpred(self, other, align)()) + else: + return IntersectsBinpred(self, other, align)() def within(self, other, align=True): """Returns a `Series` of `dtype('bool')` with value `True` for each aligned geometry that is within _other_. - A geometry is within another geometry if at least one of its points is - located in the interior of the other geometry and no points are - located in the exterior of the other geometry. + An object is said to be within other if at least one of its points + is located in the interior and no points are located in the exterior + of the other. Parameters ---------- @@ -1035,7 +1132,10 @@ def within(self, other, align=True): A Series of boolean values indicating whether each feature falls within the corresponding polygon in the input. """ - return WithinBinpred(self, other, align)() + if contains_only_points(self) and contains_only_points(other): + return cudf.Series(EqualsBinpred(self, other, align)()) + else: + return WithinBinpred(self, other, align)() def overlaps(self, other, align=True): """Returns True for all aligned geometries that overlap other, else @@ -1059,4 +1159,33 @@ def overlaps(self, other, align=True): A Series of boolean values indicating whether each geometry overlaps the corresponding geometry in the input.""" # Overlaps has the same requirement as crosses. - return OverlapsBinpred(self, other, align=align)() + if contains_only_points(self) and contains_only_points(other): + return cudf.Series([False] * len(self)) + else: + return OverlapsBinpred(self, other, align=align)() + + def crosses(self, other, align=True): + """Returns True for all aligned geometries that cross other, else + False. + + Geometries cross if they have some but not all interior points in + common, have the same dimension, and the intersection of the + interiors of the geometries has the dimension of the geometries + themselves minus one. + + Parameters + ---------- + other + a cuspatial.GeoSeries + align=True + align the GeoSeries indexes before calling the binpred + + Returns + ------- + result : cudf.Series + A Series of boolean values indicating whether each geometry + crosses the corresponding geometry in the input.""" + if contains_only_points(self) and contains_only_points(other): + return cudf.Series([False] * len(self)) + else: + return CrossesBinpred(self, other, align=align)() diff --git a/python/cuspatial/cuspatial/core/spatial/__init__.py b/python/cuspatial/cuspatial/core/spatial/__init__.py index b269707d6..ee07838f9 100644 --- a/python/cuspatial/cuspatial/core/spatial/__init__.py +++ b/python/cuspatial/cuspatial/core/spatial/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. from .bounding import linestring_bounding_boxes, polygon_bounding_boxes from .distance import ( @@ -7,6 +7,7 @@ pairwise_linestring_distance, pairwise_point_distance, pairwise_point_linestring_distance, + pairwise_point_polygon_distance, ) from .filtering import points_in_spatial_window from .indexing import quadtree_on_points @@ -26,6 +27,7 @@ "sinusoidal_projection", "pairwise_point_distance", "pairwise_linestring_distance", + "pairwise_point_polygon_distance", "pairwise_point_linestring_distance", "pairwise_point_linestring_nearest_points", "polygon_bounding_boxes", diff --git a/python/cuspatial/cuspatial/core/spatial/distance.py b/python/cuspatial/cuspatial/core/spatial/distance.py index 9bf9afcdb..5d61e0564 100644 --- a/python/cuspatial/cuspatial/core/spatial/distance.py +++ b/python/cuspatial/cuspatial/core/spatial/distance.py @@ -10,16 +10,19 @@ pairwise_linestring_distance as cpp_pairwise_linestring_distance, pairwise_point_distance as cpp_pairwise_point_distance, pairwise_point_linestring_distance as c_pairwise_point_linestring_distance, + pairwise_point_polygon_distance as c_pairwise_point_polygon_distance, ) from cuspatial._lib.hausdorff import ( directed_hausdorff_distance as cpp_directed_hausdorff_distance, ) from cuspatial._lib.spatial import haversine_distance as cpp_haversine_distance +from cuspatial._lib.types import CollectionType from cuspatial.core.geoseries import GeoSeries from cuspatial.utils.column_utils import ( contains_only_linestrings, contains_only_multipoints, contains_only_points, + contains_only_polygons, ) @@ -382,6 +385,104 @@ def pairwise_point_linestring_distance( ) +def pairwise_point_polygon_distance(points: GeoSeries, polygons: GeoSeries): + """Compute distance between pairs of (multi)points and (multi)polygons + + The distance between a (multi)point and a (multi)polygon + is defined as the shortest distance between every point in the + multipoint and every edge of the (multi)polygon. If the multipoint and + multipolygon intersects, the distance is 0. + + This algorithm computes distance pairwise. The ith row in the result is + the distance between the ith (multi)point in `points` and the ith + (multi)polygon in `polygons`. + + Parameters + ---------- + points : GeoSeries + The (multi)points to compute the distance from. + polygons : GeoSeries + The (multi)polygons to compute the distance from. + + Returns + ------- + distance : cudf.Series + + Notes + ----- + The input `GeoSeries` must contain a single type geometry. + For example, `points` series cannot contain both points and polygons. + Currently, it is unsupported that `points` contains both points and + multipoints. + + Examples + -------- + Compute distance between a point and a polygon: + >>> from shapely.geometry import Point + >>> points = cuspatial.GeoSeries([Point(0, 0)]) + >>> polygons = cuspatial.GeoSeries([Point(1, 1).buffer(0.5)]) + >>> cuspatial.pairwise_point_polygon_distance(points, polygons) + 0 0.914214 + dtype: float64 + + Compute distance between a multipoint and a multipolygon + + >>> from shapely.geometry import MultiPoint + >>> mpoints = cuspatial.GeoSeries([MultiPoint([Point(0, 0), Point(1, 1)])]) + >>> mpolys = cuspatial.GeoSeries([ + ... MultiPoint([Point(2, 2), Point(1, 2)]).buffer(0.5)]) + >>> cuspatial.pairwise_point_polygon_distance(mpoints, mpolys) + 0 0.5 + dtype: float64 + """ + + if len(points) != len(polygons): + raise ValueError("Unmatched input geoseries length.") + + if len(points) == 0: + return cudf.Series(dtype=points.points.xy.dtype) + + if not contains_only_points(points): + raise ValueError("`points` array must contain only points") + + if not contains_only_polygons(polygons): + raise ValueError("`linestrings` array must contain only linestrings") + + if len(points.points.xy) > 0 and len(points.multipoints.xy) > 0: + raise NotImplementedError( + "Mixing point and multipoint geometries is not supported" + ) + + point_collection_type = ( + CollectionType.SINGLE + if len(points.points.xy > 0) + else CollectionType.MULTI + ) + + # Handle slicing in geoseries + points_column = ( + points._column.points._column + if point_collection_type == CollectionType.SINGLE + else points._column.mpoints._column + ) + points_column = points_column.take( + points._column._meta.union_offsets._column + ) + + polygon_column = polygons._column.polygons._column + polygon_column = polygon_column.take( + polygons._column._meta.union_offsets._column + ) + + return Series._from_data( + { + None: c_pairwise_point_polygon_distance( + point_collection_type, points_column, polygon_column + ) + } + ) + + def _flatten_point_series( points: GeoSeries, ) -> Tuple[ diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py new file mode 100644 index 000000000..1c59e2882 --- /dev/null +++ b/python/cuspatial/cuspatial/tests/binpreds/test_binpred_internals.py @@ -0,0 +1,75 @@ +# Copyright (c) 2020-2023, NVIDIA CORPORATION + +import pandas as pd +from shapely.geometry import LineString + +import cuspatial + + +def test_internal_reversed_linestrings(): + linestring1 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + ] + ) + linestring2 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + ] + ) + from cuspatial.core.binpreds.binpreds import EqualsBinpred + + bp = EqualsBinpred(linestring1, linestring2) + got = bp._reverse_linestrings( + linestring1.lines.xy, linestring1.lines.part_offset + ).to_pandas() + expected = linestring2.lines.xy.to_pandas() + pd.testing.assert_series_equal(got, expected) + + +def test_internal_reversed_linestrings_pair(): + linestring1 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + LineString([(0, 0), (1, 1), (1, 0)]), + ] + ) + linestring2 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + LineString([(1, 0), (1, 1), (0, 0)]), + ] + ) + from cuspatial.core.binpreds.binpreds import EqualsBinpred + + bp = EqualsBinpred(linestring1, linestring2) + got = bp._reverse_linestrings( + linestring1.lines.xy, linestring1.lines.part_offset + ).to_pandas() + expected = linestring2.lines.xy.to_pandas() + pd.testing.assert_series_equal(got, expected) + + +def test_internal_reversed_linestrings_triple(): + linestring1 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + LineString([(0, 0), (1, 1), (1, 0)]), + LineString([(0, 0), (1, 1), (1, 0), (0, 0), (1, 1)]), + ] + ) + linestring2 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + LineString([(1, 0), (1, 1), (0, 0)]), + LineString([(1, 1), (0, 0), (1, 0), (1, 1), (0, 0)]), + ] + ) + from cuspatial.core.binpreds.binpreds import EqualsBinpred + + bp = EqualsBinpred(linestring1, linestring2) + got = bp._reverse_linestrings( + linestring1.lines.xy, linestring1.lines.part_offset + ).to_pandas() + expected = linestring2.lines.xy.to_pandas() + pd.testing.assert_series_equal(got, expected) diff --git a/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py b/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py new file mode 100644 index 000000000..84c4e0f25 --- /dev/null +++ b/python/cuspatial/cuspatial/tests/binpreds/test_equals_only_binpreds.py @@ -0,0 +1,787 @@ +# Copyright (c) 2020-2023, NVIDIA CORPORATION + +import cupy as cp +import geopandas as gpd +import pandas as pd +import pytest +from shapely.geometry import LineString, MultiPoint, Point, Polygon + +import cudf + +import cuspatial + + +def test_point_geom_equals_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.geom_equals(point2) + expected = gpdpoint1.geom_equals(gpdpoint2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.parametrize( + "lhs", + [ + [Point(0, 0), Point(0, 0), Point(0, 0)], + [Point(1, 1), Point(1, 1), Point(1, 1)], + [Point(2, 2), Point(2, 2), Point(2, 2)], + ], +) +def test_3_points_equals_3_points_one_equal(lhs): + gpdpoint1 = gpd.GeoSeries(lhs) + gpdpoint2 = gpd.GeoSeries([Point(0, 0), Point(1, 1), Point(2, 2)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.geom_equals(point2) + expected = gpdpoint1.geom_equals(gpdpoint2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_10000_points_geom_equals_10000_points(point_generator): + gpdpoints1 = gpd.GeoSeries([*point_generator(10000)]) + gpdpoints2 = gpd.GeoSeries([*point_generator(10000)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.geom_equals(points2) + expected = gpdpoints1.geom_equals(gpdpoints2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_linestring_geom_equals_linestring(): + gpdline1 = gpd.GeoSeries([LineString([(0, 0), (1, 1)])]) + gpdline2 = gpd.GeoSeries([LineString([(0, 0), (1, 1)])]) + line1 = cuspatial.from_geopandas(gpdline1) + line2 = cuspatial.from_geopandas(gpdline2) + got = line1.geom_equals(line2) + expected = gpdline1.geom_equals(gpdline2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_linestring_geom_equals_linestring_reversed(): + gpdline1 = gpd.GeoSeries([LineString([(0, 0), (1, 1)])]) + gpdline2 = gpd.GeoSeries([LineString([(1, 1), (0, 0)])]) + line1 = cuspatial.from_geopandas(gpdline1) + line2 = cuspatial.from_geopandas(gpdline2) + got = line1.geom_equals(line2) + expected = gpdline1.geom_equals(gpdline2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.parametrize( + "lhs", + [ + [ + LineString([(0, 0), (1, 1)]), + LineString([(0, 0), (1, 1)]), + LineString([(0, 0), (1, 1)]), + ], + [ + LineString([(1, 1), (2, 2)]), + LineString([(1, 1), (2, 2)]), + LineString([(1, 1), (2, 2)]), + ], + [ + LineString([(2, 2), (3, 3)]), + LineString([(2, 2), (3, 3)]), + LineString([(2, 2), (3, 3)]), + ], + ], +) +def test_3_linestrings_equals_3_linestrings_one_equal(lhs): + gpdline1 = gpd.GeoSeries(lhs) + gpdline2 = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 1)]), + LineString([(1, 1), (2, 2)]), + LineString([(2, 2), (3, 3)]), + ] + ) + line1 = cuspatial.from_geopandas(gpdline1) + line2 = cuspatial.from_geopandas(gpdline2) + got = line1.geom_equals(line2) + expected = gpdline1.geom_equals(gpdline2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_10_linestrings_geom_equals_10_linestrings(linestring_generator): + gpdlines1 = gpd.GeoSeries([*linestring_generator(11, 5)]) + gpdlines2 = gpd.GeoSeries([*linestring_generator(11, 5)]) + lines1 = cuspatial.from_geopandas(gpdlines1) + lines2 = cuspatial.from_geopandas(gpdlines2) + got = lines1.geom_equals(lines2) + expected = gpdlines1.geom_equals(gpdlines2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_10000_linestrings_geom_equals_10000_linestrings(linestring_generator): + gpdlines1 = gpd.GeoSeries([*linestring_generator(10000, 5)]) + gpdlines2 = gpd.GeoSeries([*linestring_generator(10000, 5)]) + lines1 = cuspatial.from_geopandas(gpdlines1) + lines2 = cuspatial.from_geopandas(gpdlines2) + got = lines1.geom_equals(lines2) + expected = gpdlines1.geom_equals(gpdlines2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_linestring_geom_equals_polygon(): + gpdline = gpd.GeoSeries([LineString([(0, 0), (1, 1)])]) + gpdpolygon = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + line = cuspatial.from_geopandas(gpdline) + polygon = cuspatial.from_geopandas(gpdpolygon) + got = line.geom_equals(polygon) + expected = gpdline.geom_equals(gpdpolygon) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_10000_linestrings_geom_equals_10000_polygons( + polygon_generator, linestring_generator +): + gpdlines = gpd.GeoSeries([*linestring_generator(10000, 5)]) + gpdpolygons = gpd.GeoSeries([*polygon_generator(10000, 0)]) + lines = cuspatial.from_geopandas(gpdlines) + polygons = cuspatial.from_geopandas(gpdpolygons) + got = lines.geom_equals(polygons) + expected = gpdlines.geom_equals(gpdpolygons) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_polygon_geom_equals_linestring(): + gpdline = gpd.GeoSeries([LineString([(0, 0), (1, 1)])]) + gpdpolygon = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + line = cuspatial.from_geopandas(gpdline) + polygon = cuspatial.from_geopandas(gpdpolygon) + got = polygon.geom_equals(line) + expected = gpdpolygon.geom_equals(gpdline) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_10000_polygons_geom_equals_10000_linestrings( + polygon_generator, linestring_generator +): + gpdpolygons = gpd.GeoSeries([*polygon_generator(10000, 0)]) + gpdlines = gpd.GeoSeries([*linestring_generator(10000, 5)]) + polygons = cuspatial.from_geopandas(gpdpolygons) + lines = cuspatial.from_geopandas(gpdlines) + got = polygons.geom_equals(lines) + expected = gpdpolygons.geom_equals(gpdlines) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_point_contains_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.contains_properly(point2) + expected = gpdpoint1.contains(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_point_not_contains_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(1, 1)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.contains_properly(point2) + expected = gpdpoint1.contains(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_10000_points_contains_10000_points(point_generator): + gpdpoints1 = gpd.GeoSeries([*point_generator(10000)]) + gpdpoints2 = gpd.GeoSeries([*point_generator(10000)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.contains_properly(points2) + expected = gpdpoints1.contains(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_point_covers_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.covers(point2) + expected = gpdpoint1.covers(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_point_not_covers_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(1, 1)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.covers(point2) + expected = gpdpoint1.covers(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_10000_points_covers_10000_points(point_generator): + gpdpoints1 = gpd.GeoSeries([*point_generator(10000)]) + gpdpoints2 = gpd.GeoSeries([*point_generator(10000)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.covers(points2) + expected = gpdpoints1.covers(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_point_intersects_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.intersects(point2) + expected = gpdpoint1.intersects(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_point_not_intersects_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(1, 1)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.intersects(point2) + expected = gpdpoint1.intersects(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_10000_points_intersects_10000_points(point_generator): + gpdpoints1 = gpd.GeoSeries([*point_generator(10000)]) + gpdpoints2 = gpd.GeoSeries([*point_generator(10000)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.intersects(points2) + expected = gpdpoints1.intersects(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_point_within_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.within(point2) + expected = gpdpoint1.within(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_point_not_within_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(1, 1)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.within(point2) + expected = gpdpoint1.within(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_10000_points_within_10000_points(point_generator): + gpdpoints1 = gpd.GeoSeries(point_generator(10000)) + gpdpoints2 = gpd.GeoSeries(point_generator(10000)) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.within(points2).values_host + expected = gpdpoints1.within(gpdpoints2).values + assert (expected == got).all() + + +def test_point_crosses_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.crosses(point2) + expected = gpdpoint1.crosses(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_point_not_crosses_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(1, 1)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.crosses(point2) + expected = gpdpoint1.crosses(gpdpoint2) + assert (got.values_host == expected.values).all() + + +@pytest.mark.parametrize( + "points", + [ + [Point(0, 0), Point(3, 3), Point(3, 3)], + [Point(3, 3), Point(1, 1), Point(3, 3)], + [Point(3, 3), Point(3, 3), Point(2, 2)], + ], +) +def test_three_points_crosses_three_points(points): + gpdpoints1 = gpd.GeoSeries(points) + gpdpoints2 = gpd.GeoSeries([Point(0, 0), Point(1, 1), Point(2, 2)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.crosses(points2) + expected = gpdpoints1.crosses(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_10000_points_crosses_10000_points(point_generator): + gpdpoints1 = gpd.GeoSeries([*point_generator(10000)]) + gpdpoints2 = gpd.GeoSeries([*point_generator(10000)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.crosses(points2) + expected = gpdpoints1.crosses(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_point_overlaps_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(0, 0)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.overlaps(point2) + expected = gpdpoint1.overlaps(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_point_not_overlaps_point(): + gpdpoint1 = gpd.GeoSeries([Point(0, 0)]) + gpdpoint2 = gpd.GeoSeries([Point(1, 1)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.overlaps(point2) + expected = gpdpoint1.overlaps(gpdpoint2) + assert (got.values_host == expected.values).all() + + +@pytest.mark.parametrize( + "points", + [ + [Point(0, 0), Point(3, 3), Point(3, 3)], + [Point(3, 3), Point(1, 1), Point(3, 3)], + [Point(3, 3), Point(3, 3), Point(2, 2)], + ], +) +def test_three_points_overlaps_three_points(points): + gpdpoints1 = gpd.GeoSeries(points) + gpdpoints2 = gpd.GeoSeries([Point(0, 0), Point(1, 1), Point(2, 2)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.overlaps(points2) + expected = gpdpoints1.overlaps(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_10000_points_overlaps_10000_points(point_generator): + gpdpoint1 = gpd.GeoSeries([*point_generator(10000)]) + gpdpoint2 = gpd.GeoSeries([*point_generator(10000)]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.overlaps(point2) + expected = gpdpoint1.overlaps(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_multipoint_geom_equals_multipoint(): + gpdpoint1 = gpd.GeoSeries([MultiPoint([(0, 0), (1, 1)])]) + gpdpoint2 = gpd.GeoSeries([MultiPoint([(0, 0), (1, 1)])]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.geom_equals(point2) + expected = gpdpoint1.geom_equals(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_multipoint_not_geom_equals_multipoint(): + gpdpoint1 = gpd.GeoSeries([MultiPoint([(0, 0), (1, 1)])]) + gpdpoint2 = gpd.GeoSeries([MultiPoint([(0, 1), (1, 1)])]) + point1 = cuspatial.from_geopandas(gpdpoint1) + point2 = cuspatial.from_geopandas(gpdpoint2) + got = point1.geom_equals(point2) + expected = gpdpoint1.geom_equals(gpdpoint2) + assert (got.values_host == expected.values).all() + + +def test_10000_multipoints_geom_equals_10000_multipoints(multipoint_generator): + gpdpoints1 = gpd.GeoSeries([*multipoint_generator(10000, 10)]) + gpdpoints2 = gpd.GeoSeries([*multipoint_generator(10000, 10)]) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.geom_equals(points2) + expected = gpdpoints1.geom_equals(gpdpoints2) + assert (got.values_host == expected.values).all() + + +@pytest.mark.parametrize( + "lhs", + [ + [ + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 0), (1, 1)]), + ], + [ + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 1), (1, 1)]), + MultiPoint([(0, 0), (1, 1)]), + ], + [ + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 2), (1, 1)]), + MultiPoint([(0, 0), (1, 1)]), + ], + ], +) +def test_3_multipoints_geom_equals_3_multipoints_one_equal(lhs): + gpdpoints1 = gpd.GeoSeries(lhs) + gpdpoints2 = gpd.GeoSeries( + [ + MultiPoint([(0, 0), (0, 1)]), + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 0), (2, 1)]), + ] + ) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.geom_equals(points2) + expected = gpdpoints1.geom_equals(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_3_multipoints_geom_equals_3_multipoints_misordered(): + gpdpoints1 = gpd.GeoSeries( + [ + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(0, 0), (1, 1)]), + ] + ) + gpdpoints2 = gpd.GeoSeries( + [ + MultiPoint([(1, 1), (0, 0)]), + MultiPoint([(1, 1), (0, 0)]), + MultiPoint([(1, 1), (0, 0)]), + ] + ) + points1 = cuspatial.from_geopandas(gpdpoints1) + points2 = cuspatial.from_geopandas(gpdpoints2) + got = points1.geom_equals(points2) + expected = gpdpoints1.geom_equals(gpdpoints2) + assert (got.values_host == expected.values).all() + + +def test_3_linestrings_geom_equals_3_linestrings_misordered(): + gpdline1 = gpd.GeoSeries( + [ + LineString([(1, 1), (0, 0)]), + LineString([(2, 2), (1, 1)]), + LineString([(3, 3), (2, 2)]), + ] + ) + gpdline2 = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 1)]), + LineString([(1, 1), (2, 2)]), + LineString([(2, 2), (3, 3)]), + ] + ) + line1 = cuspatial.from_geopandas(gpdline1) + line2 = cuspatial.from_geopandas(gpdline2) + got = line1.geom_equals(line2) + expected = gpdline1.geom_equals(gpdline2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_3_linestrings_geom_equals_3_linestrings_longer(): + gpdline1 = gpd.GeoSeries( + [ + LineString([(1, 1), (0, 0), (0, 4)]), + LineString([(2, 2), (1, 1), (0, 4)]), + LineString([(3, 3), (2, 2), (0, 4)]), + ] + ) + gpdline2 = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 1), (0, 4)]), + LineString([(1, 1), (2, 2), (0, 4)]), + LineString([(2, 2), (3, 3), (0, 4)]), + ] + ) + line1 = cuspatial.from_geopandas(gpdline1) + line2 = cuspatial.from_geopandas(gpdline2) + got = line1.geom_equals(line2) + expected = gpdline1.geom_equals(gpdline2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_pair_linestrings_different_last_two(): + gpdlinestring1 = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 1), (2, 1)]), + ] + ) + gpdlinestring2 = gpd.GeoSeries( + [ + LineString([(0, 0), (2, 1), (1, 1)]), + ] + ) + linestring1 = cuspatial.from_geopandas(gpdlinestring1) + linestring2 = cuspatial.from_geopandas(gpdlinestring2) + got = linestring1.geom_equals(linestring2) + expected = gpdlinestring1.geom_equals(gpdlinestring2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_pair_polygons_different_ordering(): + gpdpoly1 = gpd.GeoSeries( + [ + Polygon([(0, 0), (1, 0), (1, 1), (0.5, 0.5), (0, 1), (0, 0)]), + ] + ) + gpdpoly2 = gpd.GeoSeries( + [ + Polygon([(0, 0), (0.5, 0.5), (1, 0), (1, 1), (0, 1), (0, 0)]), + ] + ) + poly1 = cuspatial.from_geopandas(gpdpoly1) + poly2 = cuspatial.from_geopandas(gpdpoly2) + got = poly1.geom_equals(poly2) + expected = gpdpoly1.geom_equals(gpdpoly2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_pair_polygons_different_winding(): + gpdpoly1 = gpd.GeoSeries( + [ + Polygon([(0, 0), (1, 0), (1, 1), (0.5, 0.5), (0, 1), (0, 0)]), + ] + ) + gpdpoly2 = gpd.GeoSeries( + [ + Polygon([(1, 0), (1, 1), (0.5, 0.5), (0, 1), (0, 0), (1, 0)]), + ] + ) + poly1 = cuspatial.from_geopandas(gpdpoly1) + poly2 = cuspatial.from_geopandas(gpdpoly2) + got = poly1.geom_equals(poly2) + expected = gpdpoly1.geom_equals(gpdpoly2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_3_polygons_geom_equals_3_polygons_misordered_corrected_vertex(): + gpdpoly1 = gpd.GeoSeries( + [ + Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]), + Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]), + Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]), + ] + ) + gpdpoly2 = gpd.GeoSeries( + [ + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), # Oppositely wound + Polygon([(1, 1), (0, 1), (0, 0), (1, 1)]), # Wound by +1 offset + Polygon([(0, 1), (0, 0), (1, 1), (0, 1)]), # Wound by -1 offset + ] + ) + poly1 = cuspatial.from_geopandas(gpdpoly1) + poly2 = cuspatial.from_geopandas(gpdpoly2) + got = poly1.geom_equals(poly2) + expected = gpdpoly1.geom_equals(gpdpoly2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_polygon_geom_equals_polygon(): + gpdpolygon1 = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + gpdpolygon2 = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.geom_equals(polygon2) + expected = gpdpolygon1.geom_equals(gpdpolygon2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_polygon_geom_equals_polygon_swap_inner(): + gpdpolygon1 = gpd.GeoSeries(Polygon([[0, 0], [1, 0], [1, 1], [0, 0]])) + gpdpolygon2 = gpd.GeoSeries(Polygon([[0, 0], [1, 1], [1, 0], [0, 0]])) + polygon1 = cuspatial.from_geopandas(gpdpolygon1) + polygon2 = cuspatial.from_geopandas(gpdpolygon2) + got = polygon1.geom_equals(polygon2) + expected = gpdpolygon1.geom_equals(gpdpolygon2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +@pytest.mark.parametrize( + "lhs", + [ + [ + Polygon([[0, 0], [1, 0], [1, 1], [0, 0]]), + Polygon([[0, 0], [1, 0], [1, 1], [0, 0]]), + Polygon([[0, 0], [1, 0], [1, 1], [0, 0]]), + ], + [ + Polygon([[0, 0], [2, 0], [2, 2], [0, 0]]), + Polygon([[0, 0], [2, 0], [2, 2], [0, 0]]), + Polygon([[0, 0], [2, 0], [2, 2], [0, 0]]), + ], + [ + Polygon([[0, 0], [3, 0], [3, 3], [0, 0]]), + Polygon([[0, 0], [3, 0], [3, 3], [0, 0]]), + Polygon([[0, 0], [3, 0], [3, 3], [0, 0]]), + ], + ], +) +def test_3_polygons_geom_equals_3_polygons_one_equal(lhs): + gpdpolygons1 = gpd.GeoSeries(lhs) + gpdpolygons2 = gpd.GeoSeries( + [ + Polygon([[0, 0], [1, 0], [1, 1], [0, 0]]), + Polygon([[0, 0], [2, 0], [2, 2], [0, 0]]), + Polygon([[0, 0], [3, 0], [3, 3], [0, 0]]), + ] + ) + polygons1 = cuspatial.from_geopandas(gpdpolygons1) + polygons2 = cuspatial.from_geopandas(gpdpolygons2) + got = polygons1.geom_equals(polygons2) + expected = gpdpolygons1.geom_equals(gpdpolygons2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_10000_polygons_geom_equals_10000_polygons(polygon_generator): + gpdpolygons1 = gpd.GeoSeries([*polygon_generator(10000, 0)]) + gpdpolygons2 = gpd.GeoSeries([*polygon_generator(10000, 0)]) + polygons1 = cuspatial.from_geopandas(gpdpolygons1) + polygons2 = cuspatial.from_geopandas(gpdpolygons2) + got = polygons1.geom_equals(polygons2) + expected = gpdpolygons1.geom_equals(gpdpolygons2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_3_polygons_geom_equals_3_polygons_different_sizes(): + gpdpoly1 = gpd.GeoSeries( + [ + Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]), # Length 5 + Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]), + Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]), + ] + ) + gpdpoly2 = gpd.GeoSeries( + [ + Polygon( + [(0, 0), (1, 1), (1, 0), (0, 0)] + ), # Oppositely wound, length 4 + Polygon([(1, 1), (1, 0), (0, 0), (1, 1)]), # Wound by +1 offset + Polygon([(1, 0), (0, 0), (1, 1), (1, 0)]), # Wound by -1 offset + ] + ) + poly1 = cuspatial.from_geopandas(gpdpoly1) + poly2 = cuspatial.from_geopandas(gpdpoly2) + got = poly1.geom_equals(poly2) + expected = gpdpoly1.geom_equals(gpdpoly2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +@pytest.mark.skip(reason="NotImplemented: Depends on .contains") +def test_3_polygons_geom_equals_3_polygons_misordered(): + gpdpoly1 = gpd.GeoSeries( + [ + Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]), + Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]), + Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]), + ] + ) + gpdpoly2 = gpd.GeoSeries( + [ + Polygon([(0, 0), (1, 1), (1, 0), (0, 0)]), # Oppositely wound + Polygon([(1, 1), (1, 0), (0, 0), (1, 1)]), # Wound by +1 offset + Polygon([(1, 0), (0, 0), (1, 1), (1, 0)]), # Wound by -1 offset + ] + ) + poly1 = cuspatial.from_geopandas(gpdpoly1) + poly2 = cuspatial.from_geopandas(gpdpoly2) + got = poly1.geom_equals(poly2) + expected = gpdpoly1.geom_equals(gpdpoly2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_linestring_orders(): + gpdlinestring1 = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + ] + ) + gpdlinestring2 = gpd.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + ] + ) + linestring1 = cuspatial.from_geopandas(gpdlinestring1) + linestring2 = cuspatial.from_geopandas(gpdlinestring2) + got = linestring1.geom_equals(linestring2) + expected = gpdlinestring1.geom_equals(gpdlinestring2) + pd.testing.assert_series_equal(expected, got.to_pandas()) + + +def test_internal_reversed_linestrings(): + linestring1 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + ] + ) + linestring2 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + ] + ) + from cuspatial.core.binpreds.binpreds import EqualsBinpred + + bp = EqualsBinpred(linestring1, linestring2) + got = bp._reverse_linestrings( + linestring1.lines.xy, linestring1.lines.part_offset + ).to_pandas() + expected = linestring2.lines.xy.to_pandas() + pd.testing.assert_series_equal(got, expected) + + +def test_internal_reversed_linestrings_pair(): + linestring1 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 1), (1, 0), (0, 0)]), + LineString([(0, 0), (1, 1), (1, 0)]), + ] + ) + linestring2 = cuspatial.GeoSeries( + [ + LineString([(0, 0), (1, 0), (1, 1), (0, 0)]), + LineString([(1, 0), (1, 1), (0, 0)]), + ] + ) + from cuspatial.core.binpreds.binpreds import EqualsBinpred + + bp = EqualsBinpred(linestring1, linestring2) + got = bp._reverse_linestrings( + linestring1.lines.xy, linestring1.lines.part_offset + ).to_pandas() + expected = linestring2.lines.xy.to_pandas() + pd.testing.assert_series_equal(got, expected) + + +def test_from_points_xy_large(): + points = cuspatial.GeoSeries( + cuspatial.core._column.geocolumn.GeoColumn._from_points_xy( + cudf.core.column.column.as_column( + cp.arange(10000000, dtype="float64") + ) + ) + ) + assert points.geom_equals(points).all() diff --git a/python/cuspatial/cuspatial/tests/conftest.py b/python/cuspatial/cuspatial/tests/conftest.py index 4cb492e53..1c37cee77 100644 --- a/python/cuspatial/cuspatial/tests/conftest.py +++ b/python/cuspatial/cuspatial/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import geopandas as gpd import numpy as np @@ -305,3 +305,13 @@ def factory(length): return mask return factory + + +@pytest.fixture +def naturalearth_cities(): + return gpd.read_file(gpd.datasets.get_path("naturalearth_cities")) + + +@pytest.fixture +def naturalearth_lowres(): + return gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) diff --git a/python/cuspatial/cuspatial/tests/spatial/distance/test_pairwise_point_polygon_distance.py b/python/cuspatial/cuspatial/tests/spatial/distance/test_pairwise_point_polygon_distance.py new file mode 100644 index 000000000..199c41208 --- /dev/null +++ b/python/cuspatial/cuspatial/tests/spatial/distance/test_pairwise_point_polygon_distance.py @@ -0,0 +1,142 @@ +import geopandas as gpd +import pytest +from shapely.geometry import MultiPoint, MultiPolygon, Point, Polygon + +import cudf +from cudf.testing import assert_series_equal + +import cuspatial + + +def test_point_polygon_empty(): + lhs = cuspatial.GeoSeries.from_points_xy([]) + rhs = cuspatial.GeoSeries.from_polygons_xy([], [0], [0], [0]) + + got = cuspatial.pairwise_point_polygon_distance(lhs, rhs) + + expect = cudf.Series([], dtype="f8") + + assert_series_equal(got, expect) + + +def test_multipoint_polygon_empty(): + lhs = cuspatial.GeoSeries.from_multipoints_xy([], [0]) + rhs = cuspatial.GeoSeries.from_polygons_xy([], [0], [0], [0]) + + got = cuspatial.pairwise_point_polygon_distance(lhs, rhs) + + expect = cudf.Series([], dtype="f8") + + assert_series_equal(got, expect) + + +@pytest.mark.parametrize( + "points", [[Point(0, 0)], [MultiPoint([(1, 1), (2, 2)])]] +) +@pytest.mark.parametrize( + "polygons", + [ + [Polygon([(0, 1), (1, 0), (-1, 0), (0, 1)])], + [ + MultiPolygon( + [ + Polygon([(-2, 0), (-1, 0), (-1, -1), (-2, 0)]), + Polygon([(1, 0), (2, 0), (1, -1), (1, 0)]), + ] + ) + ], + ], +) +def test_one_pair(points, polygons): + lhs = gpd.GeoSeries(points) + rhs = gpd.GeoSeries(polygons) + + dlhs = cuspatial.GeoSeries(points) + drhs = cuspatial.GeoSeries(polygons) + + expect = lhs.distance(rhs) + got = cuspatial.pairwise_point_polygon_distance(dlhs, drhs) + + assert_series_equal(got, cudf.Series(expect)) + + +@pytest.mark.parametrize( + "points", + [ + [Point(0, 0), Point(3, -3)], + [MultiPoint([(1, 1), (2, 2)]), MultiPoint([(3, 3), (4, 4)])], + ], +) +@pytest.mark.parametrize( + "polygons", + [ + [ + Polygon([(0, 1), (1, 0), (-1, 0), (0, 1)]), + Polygon([(-4, -4), (-4, -5), (-5, -5), (-5, -4), (-5, -5)]), + ], + [ + MultiPolygon( + [ + Polygon([(0, 1), (1, 0), (-1, 0), (0, 1)]), + Polygon([(0, 1), (1, 0), (0, -1), (-1, 0), (0, 1)]), + ] + ), + MultiPolygon( + [ + Polygon( + [(-4, -4), (-4, -5), (-5, -5), (-5, -4), (-5, -5)] + ), + Polygon([(-2, 0), (-2, -2), (0, -2), (0, 0), (-2, 0)]), + ] + ), + ], + ], +) +def test_two_pair(points, polygons): + lhs = gpd.GeoSeries(points) + rhs = gpd.GeoSeries(polygons) + + dlhs = cuspatial.GeoSeries(points) + drhs = cuspatial.GeoSeries(polygons) + + expect = lhs.distance(rhs) + got = cuspatial.pairwise_point_polygon_distance(dlhs, drhs) + + assert_series_equal(got, cudf.Series(expect)) + + +def test_point_polygon_large(point_generator, polygon_generator): + N = 100 + points = gpd.GeoSeries(point_generator(N)) + polygons = gpd.GeoSeries(polygon_generator(N, 1.0, 1.5)) + + dpoints = cuspatial.from_geopandas(points) + dpolygons = cuspatial.from_geopandas(polygons) + + expect = points.distance(polygons) + got = cuspatial.pairwise_point_polygon_distance(dpoints, dpolygons) + + assert_series_equal(got, cudf.Series(expect)) + + +def test_point_polygon_geocities(naturalearth_cities, naturalearth_lowres): + N = 100 + gpu_cities = cuspatial.from_geopandas(naturalearth_cities.geometry) + gpu_countries = cuspatial.from_geopandas(naturalearth_lowres.geometry) + + print( + len(naturalearth_lowres), + len(naturalearth_lowres[: len(naturalearth_cities)]), + len(gpu_countries), + len(gpu_countries[: len(naturalearth_cities)]), + ) + + expect = naturalearth_cities.geometry[:N].distance( + naturalearth_lowres.geometry[:N] + ) + + got = cuspatial.pairwise_point_polygon_distance( + gpu_cities[:N], gpu_countries[:N] + ) + + assert_series_equal(cudf.Series(expect), got)