Skip to content

Commit 39c09d4

Browse files
jjsjann123pytorchmergebot
authored andcommitted
Revert "Revert "Nvfuser code removal (pytorch#111093)"" (pytorch#111604)
This reverts commit 715dfce. The original PR pytorch#111093 is reverted due to broken internal build. Pull Request resolved: pytorch#111604 Approved by: https://github.com/davidberard98
1 parent ce48d36 commit 39c09d4

12 files changed

+4
-188
lines changed

.ci/pytorch/test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ test_libtorch_jit() {
615615

616616
# Run jit and lazy tensor cpp tests together to finish them faster
617617
if [[ "$BUILD_ENVIRONMENT" == *cuda* && "$TEST_CONFIG" != *nogpu* ]]; then
618-
LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/nvfuser_tests cpp/test_lazy
618+
LTC_TS_CUDA=1 python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy
619619
else
620620
# CUDA tests have already been skipped when CUDA is not available
621621
python test/run_test.py --cpp --verbose -i cpp/test_jit cpp/test_lazy -k "not CUDA"

CMakeLists.txt

-16
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,6 @@ option(USE_TSAN "Use Thread Sanitizer" OFF)
197197
option(USE_CUDA "Use CUDA" ON)
198198
cmake_dependent_option(
199199
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
200-
cmake_dependent_option(
201-
BUILD_NVFUSER "Build NVFUSER" ON
202-
"USE_CUDA OR USE_ROCM" OFF)
203200
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
204201
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
205202
cmake_dependent_option(
@@ -1206,19 +1203,6 @@ if(BUILD_JNI)
12061203
add_subdirectory(android/pytorch_android)
12071204
endif()
12081205

1209-
if(NOT USE_CUDA AND NOT USE_ROCM)
1210-
set(BUILD_NVFUSER OFF CACHE BOOL "BUILD nvfuser" FORCE)
1211-
endif()
1212-
1213-
if(BUILD_NVFUSER)
1214-
if(DEFINED ENV{NVFUSER_SOURCE_DIR})
1215-
add_subdirectory($ENV{NVFUSER_SOURCE_DIR} nvfuser)
1216-
else()
1217-
add_subdirectory(third_party/nvfuser nvfuser)
1218-
endif()
1219-
add_compile_definitions(BUILD_NVFUSER)
1220-
endif()
1221-
12221206
include(cmake/Summary.cmake)
12231207
caffe2_print_configuration_summary()
12241208

build_variables.bzl

-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ core_sources_full_mobile_no_backend_interface_xplat = [
255255
"torch/csrc/jit/passes/constant_propagation.cpp",
256256
"torch/csrc/jit/passes/restore_mutation.cpp",
257257
"torch/csrc/jit/passes/create_autodiff_subgraphs.cpp",
258-
"torch/csrc/jit/passes/cuda_graph_fuser.cpp",
259258
"torch/csrc/jit/passes/dead_code_elimination.cpp",
260259
"torch/csrc/jit/passes/eliminate_no_ops.cpp",
261260
"torch/csrc/jit/passes/remove_redundant_profiles.cpp",

setup.py

-44
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,6 @@
189189
# NCCL_INCLUDE_DIR
190190
# specify where nccl is installed
191191
#
192-
# NVFUSER_SOURCE_DIR
193-
# specify nvfuser root directory
194-
#
195192
# NVTOOLSEXT_PATH (Windows only)
196193
# specify where nvtoolsext is installed
197194
#
@@ -632,11 +629,6 @@ def run(self):
632629
else:
633630
report("-- Not using ITT")
634631

635-
if cmake_cache_vars["BUILD_NVFUSER"]:
636-
report("-- Building nvfuser")
637-
else:
638-
report("-- Not Building nvfuser")
639-
640632
# Do not use clang to compile extensions if `-fstack-clash-protection` is defined
641633
# in system CFLAGS
642634
c_flags = str(os.getenv("CFLAGS", ""))
@@ -736,22 +728,6 @@ def build_extensions(self):
736728
os.makedirs(dst_dir)
737729
self.copy_file(src, dst)
738730

739-
# Copy nvfuser extension
740-
for i, ext in enumerate(self.extensions):
741-
if ext.name != "nvfuser._C":
742-
continue
743-
fullname = self.get_ext_fullname(ext.name)
744-
filename = self.get_ext_filename(fullname)
745-
fileext = os.path.splitext(filename)[1]
746-
src = os.path.join(os.path.dirname(filename), "nvfuser" + fileext)
747-
dst = os.path.join(os.path.realpath(self.build_lib), filename)
748-
if os.path.exists(src):
749-
report(f"Copying {ext.name} from {src} to {dst}")
750-
dst_dir = os.path.dirname(dst)
751-
if not os.path.exists(dst_dir):
752-
os.makedirs(dst_dir)
753-
self.copy_file(src, dst)
754-
755731
setuptools.command.build_ext.build_ext.build_extensions(self)
756732

757733
def get_outputs(self):
@@ -1011,8 +987,6 @@ def make_relative_rpath_args(path):
1011987
excludes.extend(["caffe2", "caffe2.*"])
1012988
if not cmake_cache_vars["BUILD_FUNCTORCH"]:
1013989
excludes.extend(["functorch", "functorch.*"])
1014-
if not cmake_cache_vars["BUILD_NVFUSER"]:
1015-
excludes.extend(["nvfuser", "nvfuser.*"])
1016990
packages = find_packages(exclude=excludes)
1017991
C = Extension(
1018992
"torch._C",
@@ -1046,10 +1020,6 @@ def make_relative_rpath_args(path):
10461020
extensions.append(
10471021
Extension(name="functorch._C", sources=[]),
10481022
)
1049-
if cmake_cache_vars["BUILD_NVFUSER"]:
1050-
extensions.append(
1051-
Extension(name="nvfuser._C", sources=[]),
1052-
)
10531023

10541024
cmdclass = {
10551025
"bdist_wheel": wheel_concatenate,
@@ -1312,8 +1282,6 @@ def main():
13121282
"include/torch/csrc/jit/tensorexpr/*.h",
13131283
"include/torch/csrc/jit/tensorexpr/operators/*.h",
13141284
"include/torch/csrc/jit/codegen/cuda/*.h",
1315-
"include/torch/csrc/jit/codegen/cuda/ops/*.h",
1316-
"include/torch/csrc/jit/codegen/cuda/scheduler/*.h",
13171285
"include/torch/csrc/onnx/*.h",
13181286
"include/torch/csrc/profiler/*.h",
13191287
"include/torch/csrc/profiler/orchestration/*.h",
@@ -1355,18 +1323,6 @@ def main():
13551323
"utils/model_dump/code.js",
13561324
"utils/model_dump/*.mjs",
13571325
]
1358-
if get_cmake_cache_vars()["BUILD_NVFUSER"]:
1359-
torch_package_data.extend(
1360-
[
1361-
"share/cmake/nvfuser/*.cmake",
1362-
"include/nvfuser/*.h",
1363-
"include/nvfuser/kernel_db/*.h",
1364-
"include/nvfuser/multidevice/*.h",
1365-
"include/nvfuser/ops/*.h",
1366-
"include/nvfuser/python_frontend/*.h",
1367-
"include/nvfuser/scheduler/*.h",
1368-
]
1369-
)
13701326

13711327
if get_cmake_cache_vars()["BUILD_CAFFE2"]:
13721328
torch_package_data.extend(

torch/csrc/jit/codegen/cuda/interface.cpp

+1-34
Original file line numberDiff line numberDiff line change
@@ -14,43 +14,10 @@ namespace jit {
1414
namespace fuser {
1515
namespace cuda {
1616

17-
class LoadingNvfuserLibrary {
18-
public:
19-
#ifdef USE_CUDA
20-
LoadingNvfuserLibrary() {
21-
std::string library_name;
22-
if (const char* path = std::getenv("TORCH_NVFUSER_LIBRARY_PATH")) {
23-
library_name = path;
24-
}
25-
#if defined(_WIN32)
26-
library_name += "nvfuser_codegen.dll";
27-
#elif defined(__APPLE__)
28-
library_name += "libnvfuser_codegen.dylib";
29-
#else
30-
library_name += "libnvfuser_codegen.so";
31-
#endif
32-
try {
33-
// NOTE: we need to refactor this to a lazy load instead. We could end up
34-
// with double de-allocation with our python API loading the library.
35-
// Leaking the handle should solve the problem for now
36-
nvfuserLib_ = std::make_shared<at::DynamicLibrary>(
37-
library_name.c_str(), nullptr, true);
38-
} catch (const c10::DynamicLibraryError& e) {
39-
#if defined(BUILD_NVFUSER) || !defined(NDEBUG)
40-
TORCH_WARN_ONCE("Loading nvfuser library failed with: ", e.msg());
41-
#endif
42-
}
43-
}
44-
45-
#endif // USE_CUDA
46-
std::shared_ptr<at::DynamicLibrary> nvfuserLib_;
47-
};
48-
49-
static LoadingNvfuserLibrary loading_nvfuser_library_;
50-
5117
static std::atomic<bool> cuda_fusion_guard_mode{true};
5218

5319
bool isEnabled() {
20+
TORCH_WARN_ONCE("torch::jit::fuser::cuda::isEnabled() is deprecated");
5421
return false;
5522
}
5623

torch/csrc/jit/passes/autocast.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <c10/util/Optional.h>
88
#include <torch/csrc/jit/ir/ir.h>
99
#include <torch/csrc/jit/jit_log.h>
10-
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
1110
#include <torch/csrc/jit/passes/quantization/helper.h>
1211

1312
#include <stack>

torch/csrc/jit/passes/cuda_graph_fuser.cpp

-21
This file was deleted.

torch/csrc/jit/passes/cuda_graph_fuser.h

-42
This file was deleted.

torch/csrc/jit/passes/tensorexpr_fuser.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -857,11 +857,6 @@ class TensorExprFuser {
857857
if (device->is_cpu()) {
858858
return canFuseOnCPU();
859859
} else if (device->is_cuda()) {
860-
#ifndef C10_MOBILE
861-
if (fuser::cuda::isEnabled()) {
862-
return false;
863-
}
864-
#endif
865860
return canFuseOnGPU();
866861
} else if (device->is_xpu()) {
867862
return false;

torch/csrc/jit/python/init.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include <torch/csrc/jit/passes/constant_propagation.h>
2828
#include <torch/csrc/jit/passes/create_autodiff_subgraphs.h>
2929
#include <torch/csrc/jit/passes/create_functional_graphs.h>
30-
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
3130
#include <torch/csrc/jit/passes/dbr_quantization/remove_redundant_aliases.h>
3231
#include <torch/csrc/jit/passes/dead_code_elimination.h>
3332
#include <torch/csrc/jit/passes/decompose_ops.h>

torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp

-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include <torch/csrc/jit/passes/constant_pooling.h>
1515
#include <torch/csrc/jit/passes/constant_propagation.h>
1616
#include <torch/csrc/jit/passes/create_autodiff_subgraphs.h>
17-
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
1817
#include <torch/csrc/jit/passes/dead_code_elimination.h>
1918
#include <torch/csrc/jit/passes/decompose_ops.h>
2019
#include <torch/csrc/jit/passes/graph_fuser.h>
@@ -646,13 +645,6 @@ const ExecutionPlan& ProfilingGraphExecutorImpl::getOptimizedPlanFor(
646645
// before any other pass that could insert `prim::iprofile_value` node on
647646
// `aten::_grad_sum_to_size` input.
648647
InsertProfileNodesForSpecializeAutogradZero(pr_.get());
649-
// `InsertProfileNodesForCUDAFuser` inserts profile node for non-tensor
650-
// value
651-
#ifndef C10_MOBILE
652-
if (torch::jit::fuser::cuda::isEnabled()) {
653-
torch::jit::fuser::cuda::InsertProfileNodesForCUDAFuser(pr_.get());
654-
}
655-
#endif
656648
GRAPH_DUMP("Profiled Graph: ", pr_->graph());
657649
profiling_plan_ = ExecutionPlan(pr_->graph(), function_name_);
658650
// fall-through

torch/csrc/jit/runtime/profiling_record.cpp

+2-14
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,7 @@ void ProfilingRecord::insertShapeProfile(
207207
}
208208

209209
static bool needsProfiledInputs(Node* n) {
210-
if (tensorexpr::isSupported(n) ||
211-
#ifndef C10_MOBILE
212-
(fuser::cuda::isEnabled() && fuser::cuda::profileNode(n))
213-
#else
214-
false
215-
#endif
216-
) {
210+
if (tensorexpr::isSupported(n)) {
217211
return true;
218212
}
219213

@@ -244,13 +238,7 @@ static bool needsProfiledInputs(Node* n) {
244238
}
245239

246240
static bool needsProfiledOutput(Node* n) {
247-
if (tensorexpr::isSupported(n) ||
248-
#ifndef C10_MOBILE
249-
(fuser::cuda::isEnabled() && fuser::cuda::profileNode(n))
250-
#else
251-
false
252-
#endif
253-
) {
241+
if (tensorexpr::isSupported(n)) {
254242
return true;
255243
}
256244

0 commit comments

Comments
 (0)