intel
diff --git a/‎ThirdPartyNotices.txt
+35 b/‎ThirdPartyNotices.txt
+35
diff --git a/‎cmake/deps.txt
-1 b/‎cmake/deps.txt
-1
diff --git a/‎cmake/external/onnxruntime_external_deps.cmake
+28-26 b/‎cmake/external/onnxruntime_external_deps.cmake
+28-26
diff --git a/‎cmake/nuget_helpers.cmake
+1-1 b/‎cmake/nuget_helpers.cmake
+1-1
diff --git a/‎cmake/onnxruntime_framework.cmake
+1-4 b/‎cmake/onnxruntime_framework.cmake
+1-4
diff --git a/‎cmake/onnxruntime_optimizer.cmake
+1 b/‎cmake/onnxruntime_optimizer.cmake
+1
diff --git a/‎cmake/onnxruntime_providers_js.cmake
+5-1 b/‎cmake/onnxruntime_providers_js.cmake
+5-1
diff --git a/‎cmake/onnxruntime_python.cmake
+1-1 b/‎cmake/onnxruntime_python.cmake
+1-1
diff --git a/‎cmake/onnxruntime_session.cmake
+1 b/‎cmake/onnxruntime_session.cmake
+1
diff --git a/‎cmake/onnxruntime_unittests.cmake
+26-17 b/‎cmake/onnxruntime_unittests.cmake
+26-17
diff --git a/‎cmake/onnxruntime_webassembly.cmake
+28-9 b/‎cmake/onnxruntime_webassembly.cmake
+28-9
@@ -6045,3 +6045,38 @@ https://github.com/intel/neural-speed
    terms, and open source software license terms. These separate license terms
    govern your use of the third party programs as set forth in the
    "THIRD-PARTY-PROGRAMS" file.
+
+_____
+
+dawn
+
+https://dawn.googlesource.com/dawn
+
+   BSD 3-Clause License
+
+   Copyright 2017-2023 The Dawn & Tint Authors
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, this
+      list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+   3. Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -53,7 +53,6 @@ re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cd
 safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
 tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
 cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.5.1.zip;e49b2b964163d27765a5002d210a2f3c73771835
-utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
 extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0c12f53da76d0c31b03b9f0f8ec8f3b4.zip;239063aee4946a9af147b473a4c3da78ba7413b4
 composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
 
@@ -107,23 +107,6 @@ if(onnxruntime_USE_MIMALLOC)
   FetchContent_MakeAvailable(mimalloc)
 endif()
 
-#Protobuf depends on utf8_range
-onnxruntime_fetchcontent_declare(
-    utf8_range
-    URL ${DEP_URL_utf8_range}
-    URL_HASH SHA1=${DEP_SHA1_utf8_range}
-    EXCLUDE_FROM_ALL
-    FIND_PACKAGE_ARGS NAMES utf8_range
-)
-
-set(utf8_range_ENABLE_TESTS OFF CACHE BOOL "Build test suite" FORCE)
-set(utf8_range_ENABLE_INSTALL OFF CACHE BOOL "Configure installation" FORCE)
-
-# The next line will generate an error message "fatal: not a git repository", but it is ok. It is from flatbuffers
-onnxruntime_fetchcontent_makeavailable(utf8_range)
-# protobuf's cmake/utf8_range.cmake has the following line
-include_directories(${utf8_range_SOURCE_DIR})
-
 # Download a protoc binary from Internet if needed
 if(NOT ONNX_CUSTOM_PROTOC_EXECUTABLE AND NOT onnxruntime_USE_VCPKG)
   # This part of code is only for users' convenience. The code couldn't handle all cases. Users always can manually
@@ -304,7 +287,7 @@ if(NOT TARGET Boost::mp11)
      EXCLUDE_FROM_ALL
      FIND_PACKAGE_ARGS NAMES Boost
     )
-    onnxruntime_fetchcontent_makeavailable(mp11)    
+    onnxruntime_fetchcontent_makeavailable(mp11)
     if(NOT TARGET Boost::mp11)
       add_library(Boost::mp11 ALIAS Boost::headers)
     endif()
@@ -442,6 +425,9 @@ target_include_directories(safeint_interface INTERFACE ${safeint_SOURCE_DIR})
 
 
 # Flatbuffers
+if(onnxruntime_USE_VCPKG)
+  find_package(flatbuffers REQUIRED)
+else()
 # We do not need to build flatc for iOS or Android Cross Compile
 if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "FLATBUFFERS_BUILD_FLATC" FORCE)
@@ -492,6 +478,7 @@ namespace std { using ::getenv; }
     endif()
   endif()
 endif()
+endif()
 
 # ONNX
 if (NOT onnxruntime_USE_FULL_PROTOBUF)
@@ -672,17 +659,10 @@ if (onnxruntime_USE_WEBGPU)
 
     # disable things we don't use
     set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF)
-    set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE)
-    set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE)
-    set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE)
-    set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE)
-    set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE)
     set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE)
 
     set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
     set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE)
-    set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
-    set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE)
     set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE)
     set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE)  # don't need. disabling is a large binary size saving
     set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE)  # needed to create cache key. runtime error if not enabled.
@@ -732,7 +712,29 @@ if (onnxruntime_USE_WEBGPU)
       # # if we need to apply patches in the future, we can uncomment the following line.
       #
       # The dawn.patch contains the following changes:
-      # - https://dawn-review.googlesource.com/c/dawn/+/225514
+      #
+      # - (public) CMake fix to support Emscripten v4.0.3+
+      #   This change allows Dawn to find the file "gen_struct_info.py" in the correct location.
+      #   https://dawn-review.googlesource.com/c/dawn/+/225514
+      #
+      # - (public) Fix emwgpu C++ implementation for buffer destroy
+      #   In native implementation, wgpuBufferRelease will trigger the buffer destroy (if refcount decreased to 0). But
+      #   in emwgpu implementation, the buffer destroy won't happen. This change fixes the bug.
+      #   https://dawn-review.googlesource.com/c/dawn/+/226315
+      #
+      # - (private) Allow "external" buffer in emwgpu C++ implementation
+      #   This change allows WGPUBufferImpl to destroy the buffer when the refcount decreased to 0 only for non-external
+      #   buffer.
+      #   "external buffer" means the GPUBuffer instance created in JavaScript and imported to C++ by `importJsBuffer`.
+      #
+      # - (private) Remove hard-coded CMAKE_OSX_DEPLOYMENT_TARGET in Dawn's CMake files
+      #   https://github.com/microsoft/onnxruntime/pull/23729
+      #
+      # - (private) Fix external ref count for "external" device in emwgpu C++ implementation
+      #   This change fixes the incorrect external ref count for class WGPUDeviceImpl when used with "external" device.
+      #   "external device" means the GPUDevice instance created in JavaScript and imported to C++ by `importJsDevice`.
+      #
+      #
       PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
       EXCLUDE_FROM_ALL
     )
 
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-cmake_minimum_required(VERSION 3.0)
+cmake_minimum_required(VERSION 3.5)
 
 # Determines the version of a native nuget package from the root packages.config.
 #
 
@@ -36,10 +36,7 @@ elseif(onnxruntime_ENABLE_TRITON)
 endif()
 
 if (onnxruntime_MINIMAL_BUILD)
-  set(onnxruntime_framework_src_exclude
-    "${ONNXRUNTIME_ROOT}/core/framework/fallback_cpu_capability.h"
-    "${ONNXRUNTIME_ROOT}/core/framework/fallback_cpu_capability.cc"
-  )
+  set(onnxruntime_framework_src_exclude)
 
   # custom ops support must be explicitly enabled in a minimal build. exclude if not.
   if (NOT onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)
 
@@ -9,6 +9,7 @@ if (onnxruntime_MINIMAL_BUILD)
   list(APPEND onnxruntime_optimizer_src_patterns
     "${ONNXRUNTIME_INCLUDE_DIR}/core/optimizer/graph_transformer.h"
     "${ONNXRUNTIME_ROOT}/core/optimizer/graph_transformer.cc"
+    "${ONNXRUNTIME_ROOT}/core/optimizer/graph_optimizer_registry.cc"
   )
 
   if (onnxruntime_EXTENDED_MINIMAL_BUILD)
 
@@ -1,6 +1,10 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+    message(FATAL_ERROR "JSEP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+  endif()
+
   add_compile_definitions(USE_JSEP=1)
 
   file(GLOB_RECURSE onnxruntime_providers_js_cc_srcs
@@ -18,4 +22,4 @@
     onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers Boost::mp11 Eigen3::Eigen
   )
 
-  add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})
+  add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})
@@ -1029,7 +1029,7 @@ if (onnxruntime_USE_QNN)
   add_custom_command(
     TARGET onnxruntime_pybind11_state POST_BUILD
     COMMAND ${CMAKE_COMMAND} -E copy
-        $<TARGET_FILE:onnxruntime_qnn_ctx_gen>
+        $<TARGET_FILE:ep_weight_sharing_ctx_gen>
         $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
   )
   if (EXISTS "${onnxruntime_QNN_HOME}/Qualcomm AI Hub Proprietary License.pdf")
 
@@ -22,6 +22,7 @@ endif()
 if (onnxruntime_MINIMAL_BUILD)
   set(onnxruntime_session_src_exclude
     "${ONNXRUNTIME_ROOT}/core/session/provider_bridge_ort.cc"
+    "${ONNXRUNTIME_ROOT}/core/session/model_builder_c_api.cc"
   )
 
   list(REMOVE_ITEM onnxruntime_session_srcs ${onnxruntime_session_src_exclude})
 
@@ -236,14 +236,14 @@ function(AddTest)
         )
       endif()
       # Set test timeout to 3 hours.
-      set_tests_properties(${_UT_TARGET} PROPERTIES TIMEOUT 7200)
+      set_tests_properties(${_UT_TARGET} PROPERTIES TIMEOUT 10800)
     else()
       add_test(NAME ${_UT_TARGET}
         COMMAND ${_UT_TARGET} ${TEST_ARGS}
         WORKING_DIRECTORY $<TARGET_FILE_DIR:${_UT_TARGET}>
       )
       # Set test timeout to 3 hours.
-      set_tests_properties(${_UT_TARGET} PROPERTIES TIMEOUT 7200)
+      set_tests_properties(${_UT_TARGET} PROPERTIES TIMEOUT 10800)
     endif()
   endif()
 endfunction(AddTest)
@@ -503,6 +503,7 @@ set (onnxruntime_shared_lib_test_SRC
 
 if (NOT onnxruntime_MINIMAL_BUILD)
   list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_inference.cc)
+  list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_builder_api.cc)
 endif()
 
 if(onnxruntime_RUN_ONNX_TESTS)
@@ -1288,31 +1289,34 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
 
   if(onnxruntime_USE_QNN)
     #qnn ctx generator
-    set(onnxruntime_qnn_ctx_gen_src_dir ${TEST_SRC_DIR}/qnn_ctx_gen)
-    set(onnxruntime_qnn_ctx_gen_src_patterns
-    "${onnxruntime_qnn_ctx_gen_src_dir}/*.cc"
-    "${onnxruntime_qnn_ctx_gen_src_dir}/*.h")
+    set(ep_weight_sharing_ctx_gen_src_dir ${TEST_SRC_DIR}/ep_weight_sharing_ctx_gen)
+    set(ep_weight_sharing_ctx_gen_src_patterns
+    "${ep_weight_sharing_ctx_gen_src_dir}/*.cc"
+    "${ep_weight_sharing_ctx_gen_src_dir}/*.h")
 
-    file(GLOB onnxruntime_qnn_ctx_gen_src CONFIGURE_DEPENDS
-      ${onnxruntime_qnn_ctx_gen_src_patterns}
+    file(GLOB ep_weight_sharing_ctx_gen_src CONFIGURE_DEPENDS
+      ${ep_weight_sharing_ctx_gen_src_patterns}
       )
-    onnxruntime_add_executable(onnxruntime_qnn_ctx_gen ${onnxruntime_qnn_ctx_gen_src})
-    target_include_directories(onnxruntime_qnn_ctx_gen PRIVATE   ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT}
-          ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir}
-          ${CMAKE_CURRENT_BINARY_DIR})
+    onnxruntime_add_executable(ep_weight_sharing_ctx_gen ${ep_weight_sharing_ctx_gen_src})
+    target_include_directories(ep_weight_sharing_ctx_gen PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR})
     if (WIN32)
-      target_compile_options(onnxruntime_qnn_ctx_gen PRIVATE ${disabled_warnings})
+      target_compile_options(ep_weight_sharing_ctx_gen PRIVATE ${disabled_warnings})
       if (NOT DEFINED SYS_PATH_LIB)
         set(SYS_PATH_LIB shlwapi)
       endif()
     endif()
 
-    if(WIN32)
-      target_link_libraries(onnxruntime_qnn_ctx_gen PRIVATE debug dbghelp advapi32)
+    if (onnxruntime_BUILD_SHARED_LIB)
+      set(ep_weight_sharing_ctx_gen_libs onnxruntime_common onnxruntime ${onnxruntime_EXTERNAL_LIBRARIES} ${GETOPT_LIB_WIDE})
+      target_link_libraries(ep_weight_sharing_ctx_gen PRIVATE ${ep_weight_sharing_ctx_gen_libs})
+      if (WIN32)
+        target_link_libraries(ep_weight_sharing_ctx_gen PRIVATE debug dbghelp advapi32)
+      endif()
+    else()
+      target_link_libraries(ep_weight_sharing_ctx_gen PRIVATE onnxruntime_session ${onnxruntime_test_providers_libs} ${onnxruntime_EXTERNAL_LIBRARIES} ${GETOPT_LIB_WIDE})
     endif()
-    target_link_libraries(onnxruntime_qnn_ctx_gen PRIVATE onnx_test_runner_common onnxruntime_test_utils onnxruntime_common onnxruntime_graph onnxruntime_session onnxruntime_providers onnxruntime_framework onnxruntime_util onnxruntime_mlas onnxruntime_optimizer onnxruntime_flatbuffers onnx_test_data_proto ${onnxruntime_test_providers_libs} ${onnxruntime_EXTERNAL_LIBRARIES} ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
 
-    set_target_properties(onnxruntime_qnn_ctx_gen PROPERTIES FOLDER "ONNXRuntimeTest")
+    set_target_properties(ep_weight_sharing_ctx_gen PROPERTIES FOLDER "ONNXRuntimeTest")
   endif()
 
   # shared lib
@@ -1359,14 +1363,19 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
             LIBS ${onnxruntime_shared_lib_test_LIBS}
             DEPENDS ${all_dependencies}
     )
+
+    target_include_directories(onnxruntime_shared_lib_test PRIVATE ${ONNXRUNTIME_ROOT})
+
     if (onnxruntime_USE_CUDA)
       target_include_directories(onnxruntime_shared_lib_test PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
       target_sources(onnxruntime_shared_lib_test PRIVATE ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
     endif()
+
     if (onnxruntime_USE_ROCM)
       target_include_directories(onnxruntime_shared_lib_test PRIVATE ${onnxruntime_ROCM_HOME}/include)
       target_compile_definitions(onnxruntime_shared_lib_test PRIVATE __HIP_PLATFORM_AMD__)
     endif()
+
     if (CMAKE_SYSTEM_NAME STREQUAL "Android")
       target_sources(onnxruntime_shared_lib_test PRIVATE
         "${ONNXRUNTIME_ROOT}/core/platform/android/cxa_demangle.cc"
 
@@ -211,10 +211,14 @@ else()
     target_link_libraries(onnxruntime_webassembly PRIVATE tensorboard)
   endif()
 
+  set(onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/pre.js")
+
+  set(EXPORTED_FUNCTIONS "_malloc,_free")
   if (onnxruntime_USE_JSEP)
-    set(EXPORTED_FUNCTIONS "_malloc,_free,_JsepOutput,_JsepGetNodeName")
-  else()
-    set(EXPORTED_FUNCTIONS "_malloc,_free")
+    string(APPEND EXPORTED_FUNCTIONS ",_JsepOutput,_JsepGetNodeName")
+  endif()
+  if (onnxruntime_USE_WEBGPU)
+    string(APPEND EXPORTED_FUNCTIONS ",_wgpuBufferRelease,_wgpuCreateInstance")
   endif()
 
   if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64)
@@ -312,13 +316,15 @@ else()
         target_compile_options(noexcep_operators PRIVATE ${SMEMORY_FLAG} -Wno-experimental)
     endif()
     target_link_options(onnxruntime_webassembly PRIVATE
-      --post-js "${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js"
+      "SHELL:--post-js \"${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js\""
     )
+    list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js")
   else ()
     set(MAXIMUM_MEMORY "4294967296")
     target_link_options(onnxruntime_webassembly PRIVATE
-      --post-js "${ONNXRUNTIME_ROOT}/wasm/js_post_js.js"
+      "SHELL:--post-js \"${ONNXRUNTIME_ROOT}/wasm/js_post_js.js\""
     )
+    list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/js_post_js.js")
   endif ()
 
   target_link_options(onnxruntime_webassembly PRIVATE
@@ -372,7 +378,6 @@ jsepDownload:_pp_")
       "SHELL:-s SIGNATURE_CONVERSIONS='${SIGNATURE_CONVERSIONS}'"
     )
   endif ()
-  set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre.js)
 
   if (onnxruntime_USE_JSEP)
     # NOTE: "-s ASYNCIFY=1" is required for JSEP to work with WebGPU
@@ -382,10 +387,8 @@ jsepDownload:_pp_")
     target_compile_definitions(onnxruntime_webassembly PRIVATE USE_JSEP=1)
     target_link_options(onnxruntime_webassembly PRIVATE
       "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\""
-      "SHELL:-s ASYNCIFY=1"
-      "SHELL:-s ASYNCIFY_STACK_SIZE=65536"
     )
-    set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js)
+    list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js")
 
     if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64)
       target_link_options(onnxruntime_webassembly PRIVATE
@@ -397,6 +400,20 @@ jsepDownload:_pp_")
 
   if (onnxruntime_USE_WEBGPU)
     target_compile_definitions(onnxruntime_webassembly PRIVATE USE_WEBGPU=1)
+    target_link_options(onnxruntime_webassembly PRIVATE
+      "SHELL:--post-js \"${ONNXRUNTIME_ROOT}/wasm/post-webgpu.js\""
+    )
+    list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/post-webgpu.js")
+  endif()
+
+  if (onnxruntime_USE_JSEP OR onnxruntime_USE_WEBGPU OR onnxruntime_USE_WEBNN)
+    # if any of the above is enabled, we need to use the asyncify library
+    target_link_options(onnxruntime_webassembly PRIVATE
+      "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-async.js\""
+      "SHELL:-s ASYNCIFY=1"
+      "SHELL:-s ASYNCIFY_STACK_SIZE=65536"
+    )
+    list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/pre-async.js")
   endif()
 
   if (onnxruntime_EMSCRIPTEN_SETTINGS)
@@ -458,6 +475,8 @@ jsepDownload:_pp_")
     )
   endif()
 
+  set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS "${onnxruntime_webassembly_script_deps}")
+
   set(target_name_list ort)
 
   if (onnxruntime_ENABLE_TRAINING_APIS)
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`# Copyright (c) Microsoft Corporation. All rights reserved.`
`2`	`2`	`# Licensed under the MIT License.`
`3`	`3`
`4`		`-cmake_minimum_required(VERSION 3.0)`
	`4`	`+cmake_minimum_required(VERSION 3.5)`
`5`	`5`
`6`	`6`	`# Determines the version of a native nuget package from the root packages.config.`
`7`	`7`	`#`
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ if (onnxruntime_MINIMAL_BUILD)`
`9`	`9`	`list(APPEND onnxruntime_optimizer_src_patterns`
`10`	`10`	`"${ONNXRUNTIME_INCLUDE_DIR}/core/optimizer/graph_transformer.h"`
`11`	`11`	`"${ONNXRUNTIME_ROOT}/core/optimizer/graph_transformer.cc"`
	`12`	`+ "${ONNXRUNTIME_ROOT}/core/optimizer/graph_optimizer_registry.cc"`
`12`	`13`	`)`
`13`	`14`
`14`	`15`	`if (onnxruntime_EXTENDED_MINIMAL_BUILD)`
Original file line number	Diff line number	Diff line change
`@@ -1029,7 +1029,7 @@ if (onnxruntime_USE_QNN)`
`1029`	`1029`	`add_custom_command(`
`1030`	`1030`	`TARGET onnxruntime_pybind11_state POST_BUILD`
`1031`	`1031`	`COMMAND ${CMAKE_COMMAND} -E copy`
`1032`		`- $<TARGET_FILE:onnxruntime_qnn_ctx_gen>`
	`1032`	`+ $<TARGET_FILE:ep_weight_sharing_ctx_gen>`
`1033`	`1033`	`$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/`
`1034`	`1034`	`)`
`1035`	`1035`	`if (EXISTS "${onnxruntime_QNN_HOME}/Qualcomm AI Hub Proprietary License.pdf")`
Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ endif()`
`22`	`22`	`if (onnxruntime_MINIMAL_BUILD)`
`23`	`23`	`set(onnxruntime_session_src_exclude`
`24`	`24`	`"${ONNXRUNTIME_ROOT}/core/session/provider_bridge_ort.cc"`
	`25`	`+ "${ONNXRUNTIME_ROOT}/core/session/model_builder_c_api.cc"`
`25`	`26`	`)`
`26`	`27`
`27`	`28`	`list(REMOVE_ITEM onnxruntime_session_srcs ${onnxruntime_session_src_exclude})`