fix

flexflow · Jan 10, 2024 · a039c98 · a039c98
1 parent ec6165f
commit a039c98
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 4 deletions.
diff --git a/include/flexflow/model.h b/include/flexflow/model.h
@@ -1114,14 +1114,15 @@ class FFModel {
   std::unordered_map<Op *, std::vector<std::pair<Op *, int>>>
       get_bwd_edge_map() const;
 
-  // Internal funcitons
+  // Internal functions
   Legion::IndexSpace get_or_create_task_is(ParallelConfig const &pc);
   Legion::IndexSpace get_or_create_task_is(MachineView const &view);
   Legion::IndexSpace get_or_create_task_is(Legion::Domain const &domain);
   Legion::IndexSpace get_or_create_task_is(const ParallelTensor);
   Legion::IndexSpace get_task_is(Legion::Domain const &domain) const;
   Legion::IndexSpace get_task_is(ParallelConfig const &pc) const;
   Legion::IndexSpace get_task_is(MachineView const &view) const;
+  bool is_mlp_block(int layer_idx) const;
   void create_operators_from_layers();
   Op *create_operator_from_layer(Layer *layer,
                                  std::vector<ParallelTensor> const &inputs);

diff --git a/src/runtime/model.cc b/src/runtime/model.cc
@@ -3249,6 +3249,27 @@ Op *FFModel::create_operator_from_layer(
   }
 }
 
+bool FFModel::is_mlp_block(int layer_idx) const {
+  auto const &l = layers[layer_idx];
+  if (l->op_type != OP_LINEAR) {
+    return false;
+  }
+  // standard opt relu
+  if (layer_idx >= 2 && layers[layer_idx - 1]->op_type == OP_RELU &&
+      layers[layer_idx - 2]->op_type == OP_LINEAR) {
+    return true;
+  }
+  // mlp layer with relu embedded in first dense layer
+  long long value;
+  l->get_int_property("activation", value);
+  ActiMode activation = (ActiMode)value;
+  if (layer_idx >= 1 && layers[layer_idx - 1]->op_type == OP_LINEAR &&
+      activation == AC_MODE_RELU) {
+    return true;
+  }
+  return false;
+}
+
 void FFModel::create_operators_from_layers() {
   std::map<const Tensor, ParallelTensor> tensors_to_parallel_tensors;
   // for (auto const &l : layers) {
@@ -3294,9 +3315,8 @@ void FFModel::create_operators_from_layers() {
                (l->op_type == OP_INC_MULTIHEAD_SELF_ATTENTION ||
                 l->op_type == OP_TREE_INC_MULTIHEAD_SELF_ATTENTION ||
                 // mlp layer
-                (l->op_type == OP_LINEAR && layer_idx >= 1 &&
-                 // layers[layer_idx - 1]->op_type == OP_RELU &&
-                 layers[layer_idx - 1]->op_type == OP_LINEAR) ||
+                is_mlp_block(layer_idx) ||
+                // llama mlp layer
                 (l->op_type == OP_LINEAR && layer_idx >= 2 &&
                  layers[layer_idx - 1]->op_type == OP_GELU &&
                  layers[layer_idx - 2]->op_type == OP_LINEAR) ||