diff --git a/libgalois/include/katana/PropertyGraph.h b/libgalois/include/katana/PropertyGraph.h index 412d85ad43..f0545d95c6 100644 --- a/libgalois/include/katana/PropertyGraph.h +++ b/libgalois/include/katana/PropertyGraph.h @@ -74,6 +74,9 @@ class KATANA_EXPORT PropertyGraph { Result WriteView( const std::string& uri, const std::string& command_line); + /// Recreate indexes from column names in RDG metadata. + katana::Result RecreatePropertyIndexes(); + tsuba::RDG rdg_; std::unique_ptr file_; GraphTopology topology_; @@ -88,9 +91,11 @@ class KATANA_EXPORT PropertyGraph { /// The edge EntityTypeID for each edge's most specific type EntityTypeIDArray edge_entity_type_ids_; - // List of node and edge indexes on this graph. + /// List of node indexes on this graph. std::vector>> node_indexes_; + + /// List of edge indexes on this graph. std::vector>> edge_indexes_; diff --git a/libgalois/src/PropertyGraph.cpp b/libgalois/src/PropertyGraph.cpp index 4d3822da5c..7a7c952110 100644 --- a/libgalois/src/PropertyGraph.cpp +++ b/libgalois/src/PropertyGraph.cpp @@ -219,6 +219,8 @@ katana::PropertyGraph::Make( katana::GraphTopology topo = KATANA_CHECKED(MapTopology(rdg.topology_file_storage())); + std::unique_ptr property_graph; + if (rdg.IsEntityTypeIDsOutsideProperties()) { KATANA_LOG_DEBUG("loading EntityType data from outside properties"); @@ -236,7 +238,7 @@ katana::PropertyGraph::Make( EntityTypeManager edge_type_manager = KATANA_CHECKED(rdg.edge_entity_type_manager()); - return std::make_unique( + property_graph = std::make_unique( std::move(rdg_file), std::move(rdg), std::move(topo), std::move(node_type_ids), std::move(edge_type_ids), std::move(node_type_manager), std::move(edge_type_manager)); @@ -244,16 +246,18 @@ katana::PropertyGraph::Make( } else { // we must construct id_arrays and managers from properties - auto pg = std::make_unique( + property_graph = std::make_unique( std::move(rdg_file), std::move(rdg), std::move(topo), MakeDefaultEntityTypeIDArray(topo.num_nodes()), MakeDefaultEntityTypeIDArray(topo.num_edges()), EntityTypeManager{}, EntityTypeManager{}); - KATANA_CHECKED(pg->ConstructEntityTypeIDs()); - - return MakeResult(std::move(pg)); + KATANA_CHECKED(property_graph->ConstructEntityTypeIDs()); } + + KATANA_CHECKED(property_graph->RecreatePropertyIndexes()); + + return MakeResult(std::move(property_graph)); } katana::Result> @@ -468,6 +472,19 @@ katana::PropertyGraph::DoWrite( ? KATANA_CHECKED(WriteEntityTypeIDsArray(edge_entity_type_ids_)) : nullptr; + // Update lists of node and edge index columns. + std::vector node_index_columns(node_indexes_.size()); + std::transform( + node_indexes_.begin(), node_indexes_.end(), node_index_columns.begin(), + [](const auto& index) { return index->column_name(); }); + rdg_.set_node_property_index_columns(node_index_columns); + + std::vector edge_index_columns(edge_indexes_.size()); + std::transform( + edge_indexes_.begin(), edge_indexes_.end(), edge_index_columns.begin(), + [](const auto& index) { return index->column_name(); }); + rdg_.set_edge_property_index_columns(edge_index_columns); + return rdg_.Store( handle, command_line, versioning_action, std::move(topology_res), std::move(node_entity_type_id_array_res), @@ -1289,3 +1306,20 @@ katana::PropertyGraph::GetNodePropertyIndex( } return KATANA_ERROR(katana::ErrorCode::NotFound, "node index not found"); } + +katana::Result +katana::PropertyGraph::RecreatePropertyIndexes() { + for (const std::string& column_name : rdg_.node_property_index_columns()) { + if (HasNodeProperty(column_name)) { + KATANA_CHECKED(MakeNodeIndex(column_name)); + } + } + + for (const std::string& column_name : rdg_.edge_property_index_columns()) { + if (HasEdgeProperty(column_name)) { + KATANA_CHECKED(MakeEdgeIndex(column_name)); + } + } + + return katana::ResultSuccess(); +} diff --git a/libgalois/test/property-index.cpp b/libgalois/test/property-index.cpp index 89107c2f3a..94df35b5dc 100644 --- a/libgalois/test/property-index.cpp +++ b/libgalois/test/property-index.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "TestTypedPropertyGraph.h" #include "katana/Logging.h" @@ -11,8 +12,12 @@ template struct NodeOrEdge { static katana::Result*> MakeIndex( katana::PropertyGraph* pg, const std::string& column_name); + static katana::Result*> GetIndex( + katana::PropertyGraph* pg, const std::string& column_name); static katana::Result AddProperties( katana::PropertyGraph* pg, std::shared_ptr properties); + static std::shared_ptr GetProperty( + katana::PropertyGraph* pg, const std::string& column_name); static size_t num_entities(katana::PropertyGraph* pg); }; @@ -21,12 +26,7 @@ using Edge = NodeOrEdge; template <> katana::Result*> -Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { - auto result = pg->MakeNodeIndex(column_name); - if (!result) { - return result.error(); - } - +Node::GetIndex(katana::PropertyGraph* pg, const std::string& column_name) { for (const auto& index : pg->node_indexes()) { if (index->column_name() == column_name) { return index.get(); @@ -37,13 +37,15 @@ Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { } template <> -katana::Result*> -Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { - auto result = pg->MakeEdgeIndex(column_name); - if (!result) { - return result.error(); - } +katana::Result*> +Node::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { + KATANA_CHECKED(pg->MakeNodeIndex(column_name)); + return Node::GetIndex(pg, column_name); +} +template <> +katana::Result*> +Edge::GetIndex(katana::PropertyGraph* pg, const std::string& column_name) { for (const auto& index : pg->edge_indexes()) { if (index->column_name() == column_name) { return index.get(); @@ -53,6 +55,13 @@ Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { return KATANA_ERROR(katana::ErrorCode::NotFound, "Created index not found"); } +template <> +katana::Result*> +Edge::MakeIndex(katana::PropertyGraph* pg, const std::string& column_name) { + KATANA_CHECKED(pg->MakeEdgeIndex(column_name)); + return Edge::GetIndex(pg, column_name); +} + template <> size_t Node::num_entities(katana::PropertyGraph* pg) { @@ -79,6 +88,22 @@ Edge::AddProperties( return pg->AddEdgeProperties(properties); } +template <> +std::shared_ptr +Node::GetProperty(katana::PropertyGraph* pg, const std::string& column_name) { + auto prop_result = pg->GetNodeProperty(column_name); + KATANA_LOG_ASSERT(prop_result); + return prop_result.value()->chunk(0); +} + +template <> +std::shared_ptr +Edge::GetProperty(katana::PropertyGraph* pg, const std::string& column_name) { + auto prop_result = pg->GetEdgeProperty(column_name); + KATANA_LOG_ASSERT(prop_result); + return prop_result.value()->chunk(0); +} + template std::shared_ptr CreatePrimitiveProperty( @@ -200,11 +225,8 @@ TestPrimitiveIndex(size_t num_nodes, size_t line_width) { } template -void -TestStringIndex(size_t num_nodes, size_t line_width) { - using IndexType = katana::StringPropertyIndex; - using ArrayType = arrow::LargeStringArray; - +std::unique_ptr +MakeStringGraph(size_t num_nodes, size_t line_width) { LinePolicy policy{line_width}; std::unique_ptr g = @@ -230,6 +252,32 @@ TestStringIndex(size_t num_nodes, size_t line_width) { nonuniform_index_result, "Could not create index: {}", nonuniform_index_result.error()); + return g; +} + +template +std::unique_ptr +TestStringIndex( + std::unique_ptr g, size_t num_nodes, + size_t line_width) { + using IndexType = katana::StringPropertyIndex; + using ArrayType = arrow::LargeStringArray; + + if (!g) { + g = MakeStringGraph(num_nodes, line_width); + } + + auto uniform_index_result = + NodeOrEdge::GetIndex(g.get(), "uniform"); + KATANA_LOG_VASSERT( + uniform_index_result, "Could not get index: {}", + uniform_index_result.error()); + auto nonuniform_index_result = + NodeOrEdge::GetIndex(g.get(), "nonuniform"); + KATANA_LOG_VASSERT( + nonuniform_index_result, "Could not get index: {}", + nonuniform_index_result.error()); + auto* uniform_index = static_cast(uniform_index_result.value()); auto* nonuniform_index = static_cast(nonuniform_index_result.value()); @@ -253,8 +301,8 @@ TestStringIndex(size_t num_nodes, size_t line_width) { } // The non-uniform index starts at "aaaa" and increases by 2. - auto typed_prop = - std::static_pointer_cast(nonuniform_prop->column(0)->chunk(0)); + auto typed_prop = std::static_pointer_cast( + NodeOrEdge::GetProperty(g.get(), "nonuniform")); it = nonuniform_index->Find("aaaj"); KATANA_LOG_ASSERT(it == nonuniform_index->end()); it = nonuniform_index->LowerBound("aaaj"); @@ -263,6 +311,31 @@ TestStringIndex(size_t num_nodes, size_t line_width) { it = nonuniform_index->UpperBound("aaak"); KATANA_LOG_ASSERT(it != nonuniform_index->end()); KATANA_LOG_ASSERT(typed_prop->GetView(*it) == "aaam"); + + return g; +} + +std::unique_ptr +ReloadGraph(std::unique_ptr g) { + auto uri_res = katana::Uri::MakeRand("/tmp/propertyfilegraph"); + KATANA_LOG_ASSERT(uri_res); + std::string rdg_dir(uri_res.value().path()); + + auto write_result = g->Write(rdg_dir, "test command line"); + + if (!write_result) { + boost::filesystem::remove_all(rdg_dir); + KATANA_LOG_FATAL("writing result: {}", write_result.error()); + } + + katana::Result> make_result = + katana::PropertyGraph::Make(rdg_dir, tsuba::RDGLoadOptions()); + boost::filesystem::remove_all(rdg_dir); + if (!make_result) { + KATANA_LOG_FATAL("making result: {}", make_result.error()); + } + + return std::move(make_result.value()); } int @@ -274,8 +347,14 @@ main() { TestPrimitiveIndex(10, 3); TestPrimitiveIndex(10, 3); - TestStringIndex(10, 3); - TestStringIndex(10, 3); + auto node_g = TestStringIndex(nullptr, 10, 3); + auto edge_g = TestStringIndex(nullptr, 10, 3); + + node_g = ReloadGraph(std::move(node_g)); + edge_g = ReloadGraph(std::move(edge_g)); + + TestStringIndex(std::move(node_g), 10, 3); + TestStringIndex(std::move(edge_g), 10, 3); return 0; } diff --git a/libtsuba/include/tsuba/RDG.h b/libtsuba/include/tsuba/RDG.h index d578280040..a865b10617 100644 --- a/libtsuba/include/tsuba/RDG.h +++ b/libtsuba/include/tsuba/RDG.h @@ -230,6 +230,17 @@ class KATANA_EXPORT RDG { /// Remove all edge properties void DropEdgeProperties(); + // Set the list of node and edge column names to persist. Consumes the + // provided parameters. + void set_node_property_index_columns( + const std::vector& node_property_index_columns); + void set_edge_property_index_columns( + const std::vector& edge_property_index_columns); + + // Return the list of node and edge column names. + const std::vector& node_property_index_columns(); + const std::vector& edge_property_index_columns(); + /// Remove topology data katana::Result DropTopology(); diff --git a/libtsuba/src/RDG.cpp b/libtsuba/src/RDG.cpp index 4c037fe30d..2148963ae9 100644 --- a/libtsuba/src/RDG.cpp +++ b/libtsuba/src/RDG.cpp @@ -215,6 +215,30 @@ tsuba::RDG::WritePartArrays(const katana::Uri& dir, tsuba::WriteGroup* desc) { return next_properties; } +void +tsuba::RDG::set_node_property_index_columns( + const std::vector& node_property_index_columns) { + core_->part_header().set_node_property_index_columns( + node_property_index_columns); +} + +void +tsuba::RDG::set_edge_property_index_columns( + const std::vector& edge_property_index_columns) { + core_->part_header().set_edge_property_index_columns( + edge_property_index_columns); +} + +const std::vector& +tsuba::RDG::node_property_index_columns() { + return core_->part_header().node_property_index_columns(); +} + +const std::vector& +tsuba::RDG::edge_property_index_columns() { + return core_->part_header().edge_property_index_columns(); +} + katana::Result tsuba::RDG::DoStoreTopology( RDGHandle handle, std::unique_ptr topology_ff, diff --git a/libtsuba/src/RDGPartHeader.cpp b/libtsuba/src/RDGPartHeader.cpp index 6adf228db1..35130b2de1 100644 --- a/libtsuba/src/RDGPartHeader.cpp +++ b/libtsuba/src/RDGPartHeader.cpp @@ -33,6 +33,9 @@ const char* kEdgeEntityTypeIDDictionaryKey = const char* kNodeEntityTypeIDNameKey = "kg.v1.node_entity_type_id_name"; // Name maps from Atomic Edge Entity Type ID to set of string names for the Edge Entity Type ID const char* kEdgeEntityTypeIDNameKey = "kg.v1.edge_entity_type_id_name"; +// List of node and edge indexed columns +const char* kNodePropertyIndexColumnsKey = "kg.v1.node_property_index_columns"; +const char* kEdgePropertyIndexColumnsKey = "kg.v1.edge_property_index_columns"; // //constexpr std::string_view mirror_nodes_prop_name = "mirror_nodes"; @@ -288,6 +291,8 @@ tsuba::to_json(json& j, const tsuba::RDGPartHeader& header) { {kEdgeEntityTypeIDDictionaryKey, header.edge_entity_type_id_dictionary_}, {kNodeEntityTypeIDNameKey, header.node_entity_type_id_name_}, {kEdgeEntityTypeIDNameKey, header.edge_entity_type_id_name_}, + {kNodePropertyIndexColumnsKey, header.node_property_index_columns_}, + {kEdgePropertyIndexColumnsKey, header.edge_property_index_columns_}, }; } @@ -319,6 +324,16 @@ tsuba::from_json(const json& j, tsuba::RDGPartHeader& header) { j.at(kNodeEntityTypeIDNameKey).get_to(header.node_entity_type_id_name_); j.at(kEdgeEntityTypeIDNameKey).get_to(header.edge_entity_type_id_name_); } + + header.node_property_index_columns_ = {}; + if (auto it = j.find(kNodePropertyIndexColumnsKey); it != j.end()) { + it->get_to(header.node_property_index_columns_); + } + + header.edge_property_index_columns_ = {}; + if (auto it = j.find(kEdgePropertyIndexColumnsKey); it != j.end()) { + it->get_to(header.edge_property_index_columns_); + } } void diff --git a/libtsuba/src/RDGPartHeader.h b/libtsuba/src/RDGPartHeader.h index be2b0cc0ba..18a7948066 100644 --- a/libtsuba/src/RDGPartHeader.h +++ b/libtsuba/src/RDGPartHeader.h @@ -278,6 +278,24 @@ class KATANA_EXPORT RDGPartHeader { part_prop_info_list_ = std::move(part_prop_info_list); } + const std::vector& node_property_index_columns() const { + return node_property_index_columns_; + } + + void set_node_property_index_columns( + const std::vector& node_property_index_columns) { + node_property_index_columns_ = node_property_index_columns; + } + + const std::vector& edge_property_index_columns() const { + return edge_property_index_columns_; + } + + void set_edge_property_index_columns( + const std::vector& edge_property_index_columns) { + edge_property_index_columns_ = edge_property_index_columns; + } + const PartitionMetadata& metadata() const { return metadata_; } void set_metadata(const PartitionMetadata& metadata) { metadata_ = metadata; } @@ -501,6 +519,10 @@ class KATANA_EXPORT RDGPartHeader { std::vector node_prop_info_list_; std::vector edge_prop_info_list_; + /// Column names to create property index from on startup + std::vector node_property_index_columns_; + std::vector edge_property_index_columns_; + /// Metadata filled in by CuSP, or from storage (meta partition file) PartitionMetadata metadata_;