Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nj Json persistance for propertyindexes #465

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
32 changes: 31 additions & 1 deletion libgalois/include/katana/PropertyGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,46 @@ class KATANA_EXPORT PropertyGraph {
/// The edge EntityTypeID for each edge's most specific type
EntityTypeIDArray edge_entity_type_ids_;

// List of node and edge indexes on this graph.
// List of node indexes on this graph.
std::vector<std::unique_ptr<PropertyIndex<GraphTopology::Node>>>
node_indexes_;
//And the columns that created them to persist in json
std::vector<std::string> node_property_indexes_column_name_;

// List of edge indexes on this graph.
std::vector<std::unique_ptr<PropertyIndex<GraphTopology::Edge>>>
edge_indexes_;
//And the columns that created them to persist in json
std::vector<std::string> edge_property_indexes_column_name_;

PGViewCache pg_view_cache_;

friend class PropertyGraphRetractor;

// recreate indexes from json
katana::Result<void> recreate_node_property_indexes() {
node_property_indexes_column_name_ =
rdg_.node_property_indexes_column_name();
for (const std::string& column_name : node_property_indexes_column_name_) {
auto result = MakeNodeIndex(column_name);
if (!result) {
return result.error();
}
}
return katana::ResultSuccess();
}
katana::Result<void> recreate_edge_property_indexes() {
edge_property_indexes_column_name_ =
rdg_.edge_property_indexes_column_name();
for (const std::string& column_name : edge_property_indexes_column_name_) {
auto result = MakeEdgeIndex(column_name);
if (!result) {
return result.error();
}
}
return katana::ResultSuccess();
}

public:
/// PropertyView provides a uniform interface when you don't need to
/// distinguish operating on edge or node properties
Expand Down
32 changes: 30 additions & 2 deletions libgalois/src/PropertyGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ katana::PropertyGraph::Make(
katana::GraphTopology topo =
KATANA_CHECKED(MapTopology(rdg.topology_file_storage()));

std::unique_ptr<katana::PropertyGraph> property_graph;

if (rdg.IsEntityTypeIDsOutsideProperties()) {
KATANA_LOG_DEBUG("loading EntityType data from outside properties");

Expand All @@ -233,7 +235,7 @@ katana::PropertyGraph::Make(
EntityTypeManager edge_type_manager =
KATANA_CHECKED(rdg.edge_entity_type_manager());

return std::make_unique<PropertyGraph>(
property_graph = std::make_unique<PropertyGraph>(
std::move(rdg_file), std::move(rdg), std::move(topo),
std::move(node_type_ids), std::move(edge_type_ids),
std::move(node_type_manager), std::move(edge_type_manager));
Expand Down Expand Up @@ -262,11 +264,23 @@ katana::PropertyGraph::Make(
KATANA_ASSERT(topo.num_nodes() == node_type_ids.size());
KATANA_ASSERT(topo.num_edges() == edge_type_ids.size());

return std::make_unique<PropertyGraph>(
property_graph = std::make_unique<PropertyGraph>(
std::move(rdg_file), std::move(rdg), std::move(topo),
std::move(node_type_ids), std::move(edge_type_ids),
std::move(node_type_manager), std::move(edge_type_manager));
}

auto res = property_graph->recreate_node_property_indexes();
if (!res) {
return res.error();
}

res = property_graph->recreate_edge_property_indexes();
if (!res) {
return res.error();
}

return std::unique_ptr<katana::PropertyGraph>(std::move(property_graph));
}

katana::Result<std::unique_ptr<katana::PropertyGraph>>
Expand Down Expand Up @@ -956,6 +970,13 @@ katana::PropertyGraph::MakeNodeIndex(const std::string& column_name) {

node_indexes_.push_back(std::move(index));

//save the column name the index was created from for easy assess dudring json load/store
node_property_indexes_column_name_.push_back(column_name);

//persist column names to json, index can now can be recreated using recreate_node_property_indexes()
rdg_.set_node_property_indexes_column_name(
node_property_indexes_column_name_);

return katana::ResultSuccess();
}

Expand Down Expand Up @@ -985,6 +1006,13 @@ katana::PropertyGraph::MakeEdgeIndex(const std::string& column_name) {

edge_indexes_.push_back(std::move(index));

//save the column name the index was created from for easy assess dudring json load/store
edge_property_indexes_column_name_.push_back(column_name);

//persist column names to json, index can now can be recreated using recreate_edge_property_indexes()
rdg_.set_edge_property_indexes_column_name(
edge_property_indexes_column_name_);

return katana::ResultSuccess();
}

Expand Down
10 changes: 10 additions & 0 deletions libtsuba/include/tsuba/RDG.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,16 @@ class KATANA_EXPORT RDG {
/// Remove all edge properties
void DropEdgeProperties();

// write the list of node and edge column names persisted to json, private as it is called only when the node and edge property index vectors are pushed back
void set_node_property_indexes_column_name(
std::vector<std::string>& node_property_indexes_column_name);
void set_edge_property_indexes_column_name(
std::vector<std::string>& edge_property_indexes_column_name);

// read the same as above and recreate indexes
std::vector<std::string>& node_property_indexes_column_name();
std::vector<std::string>& edge_property_indexes_column_name();

/// Remove topology data
katana::Result<void> DropTopology();

Expand Down
23 changes: 23 additions & 0 deletions libtsuba/src/RDG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,29 @@ tsuba::RDG::WritePartArrays(const katana::Uri& dir, tsuba::WriteGroup* desc) {
return next_properties;
}

//write the list of node and edge column names persisted to json, private as it is called only when the node and edge property index vectors are pushed back
void
tsuba::RDG::set_node_property_indexes_column_name(
std::vector<std::string>& node_property_indexes_column_name) {
core_->part_header().set_node_property_indexes_column_name(
node_property_indexes_column_name);
}
void
tsuba::RDG::set_edge_property_indexes_column_name(
std::vector<std::string>& edge_property_indexes_column_name) {
core_->part_header().set_edge_property_indexes_column_name(
edge_property_indexes_column_name);
}
// read the same as above and recreate indexes
std::vector<std::string>&
tsuba::RDG::node_property_indexes_column_name() {
return core_->part_header().node_property_indexes_column_name();
}
std::vector<std::string>&
tsuba::RDG::edge_property_indexes_column_name() {
return core_->part_header().edge_property_indexes_column_name();
}

katana::Result<void>
tsuba::RDG::DoStoreTopology(
RDGHandle handle, std::unique_ptr<FileFrame> topology_ff,
Expand Down
30 changes: 30 additions & 0 deletions libtsuba/src/RDGPartHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,30 @@ class KATANA_EXPORT RDGPartHeader {
part_prop_info_list_ = std::move(part_prop_info_list);
}

const std::vector<std::string>& node_property_indexes_column_name() const {
return node_property_indexes_column_name_;
}
std::vector<std::string>& node_property_indexes_column_name() {
return node_property_indexes_column_name_;
}
void set_node_property_indexes_column_name(
std::vector<std::string>& node_property_indexes_column_name) {
node_property_indexes_column_name_ =
std::move(node_property_indexes_column_name);
}

const std::vector<std::string>& edge_property_indexes_column_name() const {
return edge_property_indexes_column_name_;
}
std::vector<std::string>& edge_property_indexes_column_name() {
return edge_property_indexes_column_name_;
}
void set_edge_property_indexes_column_name(
std::vector<std::string>& edge_property_indexes_column_name) {
edge_property_indexes_column_name_ =
std::move(edge_property_indexes_column_name);
}

const PartitionMetadata& metadata() const { return metadata_; }
void set_metadata(const PartitionMetadata& metadata) { metadata_ = metadata; }

Expand Down Expand Up @@ -495,6 +519,12 @@ class KATANA_EXPORT RDGPartHeader {
std::vector<PropStorageInfo> node_prop_info_list_;
std::vector<PropStorageInfo> edge_prop_info_list_;

/// Column Names to create property index from on startup
std::vector<std::string>
node_property_indexes_column_name_; //nhomann serializes this automagically. to/from json required if column name type is (in the future) changed from string to a custom one
std::vector<std::string>
edge_property_indexes_column_name_; //nhomann serializes this automagically. to/from json required if column name type is (in the future) changed from string to a custom one

/// Metadata filled in by CuSP, or from storage (meta partition file)
PartitionMetadata metadata_;

Expand Down