#2201: temperedlb: implement basic memory information consumption, threshold variable for user

lifflander · lifflander · commit a77ec7e79f3c · 2024-01-25T14:55:02.000-08:00
diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
@@ -274,6 +274,15 @@ Default: false
   instead of the processor-average load.
 )"
     },
+    {
+      "memory_threshold",
+      R"(
+Values: <double>
+Defaut: 0
+Description: The memory threshold TemperedLB should strictly stay under which is
+respected if memory information is present in the user-defined data.
+)"
+    }
   };
   return keys_help;
 }
@@ -378,6 +387,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) {
   deterministic_ = config->getOrDefault<bool>("deterministic", deterministic_);
   rollback_      = config->getOrDefault<bool>("rollback", rollback_);
   target_pole_   = config->getOrDefault<bool>("targetpole", target_pole_);
+  mem_thresh_    = config->getOrDefault<double>("memory_threshold", mem_thresh_);
 
   balance::LBArgsEnumConverter<CriterionEnum> criterion_converter_(
     "criterion", "CriterionEnum", {
@@ -509,6 +519,98 @@ void TemperedLB::runLB(LoadType total_load) {
   }
 }
 
+void TemperedLB::readClustersMemoryData() {
+  if (user_data_) {
+    for (auto const& [obj, data_map] : *user_data_) {
+      SharedIDType shared_id = -1;
+      BytesType shared_bytes = 0;
+      BytesType working_bytes = 0;
+      for (auto const& [key, variant] : data_map) {
+        if (key == "shared_id") {
+          // Because of how JSON is stored this is always a double, even though
+          // it should be an integer
+          if (double const* val = std::get_if<double>(&variant)) {
+            shared_id = static_cast<int>(*val);
+          } else {
+            vtAbort("\"shared_id\" in variant does not match integer");
+          }
+        }
+        if (key == "shared_bytes") {
+          if (BytesType const* val = std::get_if<BytesType>(&variant)) {
+            shared_bytes = *val;
+          } else {
+            vtAbort("\"shared_bytes\" in variant does not match double");
+          }
+        }
+        if (key == "task_working_bytes") {
+          if (BytesType const* val = std::get_if<BytesType>(&variant)) {
+            working_bytes = *val;
+          } else {
+            vtAbort("\"working_bytes\" in variant does not match double");
+          }
+        }
+        if (key == "rank_working_bytes") {
+          if (BytesType const* val = std::get_if<BytesType>(&variant)) {
+            rank_bytes_ = *val;
+          } else {
+            vtAbort("\"rank_bytes\" in variant does not match double");
+          }
+        }
+        // @todo: for now, skip "task_serialized_bytes" and
+        // "task_footprint_bytes"
+      }
+
+      // @todo: switch to debug print at some point
+      vt_print(
+        temperedlb, "obj={} shared_block={} bytes={}\n",
+        obj, shared_id, shared_bytes
+      );
+
+      obj_shared_block_[obj] = shared_id;
+      obj_working_bytes_[obj] = working_bytes;
+      shared_block_size_[shared_id] = shared_bytes;
+      has_memory_data_ = true;
+    }
+  }
+}
+
+TemperedLB::BytesType TemperedLB::computeMemoryUsage() const {
+  // Compute bytes used by shared blocks mapped here based on object mapping
+  auto const blocks_here = getSharedBlocksHere();
+
+  double total_shared_bytes = 0;
+  for (auto const& block_id : blocks_here) {
+    total_shared_bytes += shared_block_size_.find(block_id)->second;
+  }
+
+  // Compute max object size
+  // @todo: Slight issue here that this will only count migratable objects
+  // (those contained in cur_objs), for our current use case this is not a
+  // problem, but it should include the max of non-migratable
+  double max_object_working_bytes = 0;
+  for (auto const& [obj_id, _] : cur_objs_)  {
+    if (obj_working_bytes_.find(obj_id) != obj_working_bytes_.end()) {
+      max_object_working_bytes =
+        std::max(max_object_working_bytes, obj_working_bytes_.find(obj_id)->second);
+    } else {
+      vt_print(
+        temperedlb, "Warning: working bytes not found for object: {}\n", obj_id
+      );
+    }
+  }
+  return rank_bytes_ + total_shared_bytes + max_object_working_bytes;
+}
+
+std::set<TemperedLB::SharedIDType> TemperedLB::getSharedBlocksHere() const {
+  std::set<SharedIDType> blocks_here;
+  for (auto const& [obj, _] : cur_objs_) {
+    if (obj_shared_block_.find(obj) != obj_shared_block_.end()) {
+      blocks_here.insert(obj_shared_block_.find(obj)->second);
+    }
+  }
+  return blocks_here;
+}
+
 void TemperedLB::doLBStages(LoadType start_imb) {
   decltype(this->cur_objs_) best_objs;
   LoadType best_load = 0;
@@ -517,6 +619,9 @@ void TemperedLB::doLBStages(LoadType start_imb) {
 
   auto this_node = theContext()->getNode();
 
+  // Read in memory information if it's available before be do any trials
+  readClustersMemoryData();
+
   for (trial_ = 0; trial_ < num_trials_; ++trial_) {
     // Clear out data structures
     selected_.clear();
@@ -554,6 +659,13 @@ void TemperedLB::doLBStages(LoadType start_imb) {
         LoadType(this_new_load_)
       );
 
+      vt_print(
+        temperedlb,
+        "Current memory info: total memory usage={}, shared blocks here={}, "
+        "memory_threshold={}\n", computeMemoryUsage(), getSharedBlocksHere().size(),
+        mem_thresh_
+      );
+
       if (isOverloaded(this_new_load_)) {
         is_overloaded_ = true;
       } else if (isUnderloaded(this_new_load_)) {
diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h
@@ -68,6 +68,8 @@ struct TemperedLB : BaseLB {
   using ReduceMsgType  = vt::collective::ReduceNoneMsg;
   using QuantityType     = std::map<lb::StatisticQuantity, double>;
   using StatisticMapType = std::unordered_map<lb::Statistic, QuantityType>;
+  using SharedIDType     = int;
+  using BytesType        = double;
 
   TemperedLB() = default;
   TemperedLB(TemperedLB const&) = delete;
@@ -120,6 +122,27 @@ struct TemperedLB : BaseLB {
 
   void setupDone();
 
+  /**
+   * \brief Read the memory data from the user-defined json blocks into data
+   * structures
+   */
+  void readClustersMemoryData();
+
+  /**
+   * \brief Compute the memory usage for current assignment
+   *
+   * \return the total memory usage
+   */
+  BytesType computeMemoryUsage() const;
+
+  /**
+   * \brief Get the shared blocks that are located on this node with the current
+   * object assignment
+   *
+   * \return the number of shared blocks here
+   */
+  std::set<SharedIDType> getSharedBlocksHere() const;
+
 private:
   uint16_t f_                                       = 0;
   uint8_t k_max_                                    = 0;
@@ -184,6 +207,24 @@ struct TemperedLB : BaseLB {
   std::mt19937 gen_sample_;
   StatisticMapType stats;
   LoadType this_load                                = 0.0f;
+
+
+  //////////////////////////////////////////////////////////////////////////////
+  // All the memory info (may or may not be present)
+  //////////////////////////////////////////////////////////////////////////////
+
+  /// Whether we have memory information
+  bool has_memory_data_ = false;
+  /// Working bytes for this rank
+  BytesType rank_bytes_ = 0;
+  /// Shared ID for each object
+  std::unordered_map<ObjIDType, SharedIDType> obj_shared_block_;
+  /// Shared block size in bytes
+  std::unordered_map<SharedIDType, BytesType> shared_block_size_;
+  /// Working bytes for each object
+  std::unordered_map<ObjIDType, BytesType> obj_working_bytes_;
+  /// User-defined memory threshold
+  BytesType mem_thresh_ = 0;
 };
 
 }}}} /* end namespace vt::vrt::collection::lb */