intel
diff --git a/‎cmake/onnxruntime_optimizer.cmake
+1 b/‎cmake/onnxruntime_optimizer.cmake
+1
diff --git a/‎include/onnxruntime/core/framework/execution_provider.h
+16 b/‎include/onnxruntime/core/framework/execution_provider.h
+16
diff --git a/‎include/onnxruntime/core/graph/indexed_sub_graph.h
+6 b/‎include/onnxruntime/core/graph/indexed_sub_graph.h
+6
diff --git a/‎onnxruntime/core/framework/compute_capability.h
+20 b/‎onnxruntime/core/framework/compute_capability.h
+20
diff --git a/‎onnxruntime/core/framework/execution_provider.cc
+1 b/‎onnxruntime/core/framework/execution_provider.cc
+1
@@ -9,6 +9,7 @@ if (onnxruntime_MINIMAL_BUILD)
   list(APPEND onnxruntime_optimizer_src_patterns
     "${ONNXRUNTIME_INCLUDE_DIR}/core/optimizer/graph_transformer.h"
     "${ONNXRUNTIME_ROOT}/core/optimizer/graph_transformer.cc"
+    "${ONNXRUNTIME_ROOT}/core/optimizer/graph_optimizer_registry.cc"
   )
 
   if (onnxruntime_EXTENDED_MINIMAL_BUILD)
 
@@ -20,6 +20,7 @@ struct ComputeCapability;
 class KernelRegistry;
 struct KernelCreateInfo;
 class Node;
+class GraphOptimizerRegistry;
 }  // namespace onnxruntime
 #else
 #include <memory>
@@ -129,10 +130,25 @@ class IExecutionProvider {
      and decide whether a node will be assigned to <*this> execution provider.
      For kernels registered in a kernel registry, `kernel_lookup` must be used
      to find a matching kernel for this EP.
+
+     The graph_optimizer_registry is designed for enabling L2+ graph optimizations tailored for EPs.
+     These optimizations are applied after the graph partitioner assigns ComputeCapability to the EP
+     and before EP's "Compile" or fusion.
+
+     Steps to use graph_optimizer_registry and create the optimization ComputeCapability:
+     1. Lookup Optimizer: The EP calls provider bridge API to lookup pre-defined optimizer by name and get selection function.
+        - Example: g_host->GetOptimizerByName(optimizer_name, graph_optimizer_registry, selection_func)
+     2. Run Selection Function: The EP executes the selection function to obtain the selection ComputeCapability.
+        - ComputeCapability.optimize_func would be set by the optimizer to the function that does the optimization.
+     3. Create Optimization ComputeCapability: The EP uses the selection ComputeCapability to create the optimization ComputeCapability.
+     4. Return ComputeCapability: The EP returns the final ComputeCapability, with nodes_to_optimize set to the optimization ComputeCapability.
+
+     Note: For more detailed implementations of using graph_optimizer_registry, please refer to TensorRT EP.
   */
   virtual std::vector<std::unique_ptr<ComputeCapability>>
   GetCapability(const onnxruntime::GraphViewer& graph_viewer,
                 const IKernelLookup& kernel_lookup,
+                const GraphOptimizerRegistry& graph_optimizer_registry,
                 IResourceAccountant* resource_accountant = nullptr) const;
 
   /**
 
@@ -72,6 +72,12 @@ struct IndexedSubGraph {
     return meta_def_.get();
   }
 
+  /** Gets the mutable meta definition needed to represent this subgraph as a FunctionProto.
+  @returns MetaDef instance if it has been set. nullptr if not. */
+  MetaDef* GetMutableMetaDef() {
+    return meta_def_.get();
+  }
+
   // Check if the accounting is enabled for the current EP
   bool IsAccountingEnabled() const {
     return resource_accountant != nullptr &&
 
@@ -2,8 +2,11 @@
 // Licensed under the MIT License.
 
 #pragma once
+#include <functional>
 #include "core/common/common.h"
 #include "core/graph/indexed_sub_graph.h"
+#include "core/graph/graph.h"
+#include "core/optimizer/graph_optimizer_registry.h"
 
 namespace onnxruntime {
 // A structure encodes a subgraph and the method to run it.
@@ -21,5 +24,22 @@ struct ComputeCapability {
 
   ComputeCapability(std::unique_ptr<IndexedSubGraph> t_sub_graph)
       : sub_graph(std::move(t_sub_graph)) {}
+
+  // Optional function to optimize this ComputeCapability.
+  // This will be called by ORT once the ComputeCapability is assigned to the EP.
+  std::function<Status(Graph&,
+                       const ComputeCapability& /* this_optimization*/,
+                       ComputeCapability& /* cc_to_update */,
+                       const GraphOptimizerRegistry&)>
+      optimization_func;
+
+  // Optional ComputeCapability instances for sets of nodes within this ComputeCapability that should be optimized.
+  // when an optimization is applied, ORT will update this ComputeCapability to reflect the changes made.
+  // IndexedSubGraph.nodes:
+  //  - update based on RemovedNode/AddNode calls
+  // IndexedSubGraph.MetaDef (if present):
+  //  - inputs and outputs will be unchanged
+  //  - constant_initializers MAY change if we constant fold an initializer during optimization
+  std::vector<std::unique_ptr<ComputeCapability>> nodes_to_optimize;
 };
 }  // namespace onnxruntime
@@ -14,6 +14,7 @@ namespace onnxruntime {
 std::vector<std::unique_ptr<ComputeCapability>>
 IExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
                                   const IKernelLookup& kernel_lookup,
+                                  const GraphOptimizerRegistry&,
                                   IResourceAccountant*) const {
   std::vector<std::unique_ptr<ComputeCapability>> result;
   for (const auto& node : graph.Nodes()) {
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ if (onnxruntime_MINIMAL_BUILD)`
`9`	`9`	`list(APPEND onnxruntime_optimizer_src_patterns`
`10`	`10`	`"${ONNXRUNTIME_INCLUDE_DIR}/core/optimizer/graph_transformer.h"`
`11`	`11`	`"${ONNXRUNTIME_ROOT}/core/optimizer/graph_transformer.cc"`
	`12`	`+ "${ONNXRUNTIME_ROOT}/core/optimizer/graph_optimizer_registry.cc"`
`12`	`13`	`)`
`13`	`14`
`14`	`15`	`if (onnxruntime_EXTENDED_MINIMAL_BUILD)`