[Environment Variable][2/N] Use thread-safe setenv wrapper (pytorch#124485)

cyyever · pytorchmergebot · commit 6327a7188052 · 2024-10-04T07:30:51.000Z
This follows pytorch#119449 to make setenv thread-safe. Pull Request resolved: pytorch#124485 Approved by: https://github.com/eqy
diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
@@ -14,6 +14,7 @@
 #include <ATen/detail/CUDAHooksInterface.h>
 #include <ATen/native/cuda/CuFFTPlanCache.h>
 #include <c10/util/Exception.h>
+#include <c10/util/env.h>
 #include <c10/cuda/CUDACachingAllocator.h>
 #include <c10/cuda/CUDAFunctions.h>
 #include <c10/util/irange.h>
@@ -79,19 +80,6 @@ struct _Initializer {
 } initializer;
 } // anonymous namespace
 
-// Sets the CUDA_MODULE_LOADING environment variable
-// if it's not set by the user.
-void maybe_set_cuda_module_loading(const std::string &def_value) {
-  auto value = std::getenv("CUDA_MODULE_LOADING");
-  if (!value) {
-#ifdef _WIN32
-    auto env_var = "CUDA_MODULE_LOADING=" + def_value;
-    _putenv(env_var.c_str());
-#else
-    setenv("CUDA_MODULE_LOADING", def_value.c_str(), 1);
-#endif
-  }
-}
 
 // NB: deleter is dynamic, because we need it to live in a separate
 // compilation unit (alt is to have another method in hooks, but
@@ -102,7 +90,9 @@ void CUDAHooks::initCUDA() const {
   // have a chance to enable vitals.
   at::vitals::VitalsAPI.setVital("CUDA", "used", "true", /* force = */ true);
 
-  maybe_set_cuda_module_loading("LAZY");
+  // Sets the CUDA_MODULE_LOADING environment variable
+  // if it's not set by the user.
+  c10::utils::set_env("CUDA_MODULE_LOADING", "LAZY", false);
   const auto num_devices = c10::cuda::device_count_ensure_non_zero();
   c10::cuda::CUDACachingAllocator::init(num_devices);
   at::cuda::detail::init_p2p_access_cache(num_devices);
diff --git a/aten/src/ATen/test/vitals.cpp b/aten/src/ATen/test/vitals.cpp
@@ -3,6 +3,7 @@
 
 #include <ATen/ATen.h>
 #include <ATen/core/Vitals.h>
+#include <c10/util/env.h>
 #include <c10/util/irange.h>
 #include <cstdlib>
 
@@ -15,11 +16,7 @@ TEST(Vitals, Basic) {
   std::streambuf* sbuf = std::cout.rdbuf();
   std::cout.rdbuf(buffer.rdbuf());
   {
-#ifdef _WIN32
-    _putenv("TORCH_VITAL=1");
-#else
-    setenv("TORCH_VITAL", "1", 1);
-#endif
+    c10::utils::set_env("TORCH_VITAL", "1");
     TORCH_VITAL_DEFINE(Testing);
     TORCH_VITAL(Testing, Attribute0) << 1;
     TORCH_VITAL(Testing, Attribute1) << "1";
@@ -44,11 +41,7 @@ TEST(Vitals, MultiString) {
   std::streambuf* sbuf = std::cout.rdbuf();
   std::cout.rdbuf(buffer.rdbuf());
   {
-#ifdef _WIN32
-    _putenv("TORCH_VITAL=1");
-#else
-    setenv("TORCH_VITAL", "1", 1);
-#endif
+    c10::utils::set_env("TORCH_VITAL", "1");
     TORCH_VITAL_DEFINE(Testing);
     TORCH_VITAL(Testing, Attribute0) << 1 << " of " << 2;
     TORCH_VITAL(Testing, Attribute1) << 1;
@@ -69,15 +62,7 @@ TEST(Vitals, OnAndOff) {
     std::streambuf* sbuf = std::cout.rdbuf();
     std::cout.rdbuf(buffer.rdbuf());
     {
-#ifdef _WIN32
-      if (i) {
-        _putenv("TORCH_VITAL=1");
-      } else {
-        _putenv("TORCH_VITAL=0");
-      }
-#else
-      setenv("TORCH_VITAL", i ? "1" : "", 1);
-#endif
+      c10::utils::set_env("TORCH_VITAL", i ? "1" : "0");
       TORCH_VITAL_DEFINE(Testing);
       TORCH_VITAL(Testing, Attribute0) << 1;
     }
@@ -100,11 +85,7 @@ TEST(Vitals, APIVitals) {
   std::streambuf* sbuf = std::cout.rdbuf();
   std::cout.rdbuf(buffer.rdbuf());
   {
-#ifdef _WIN32
-    _putenv("TORCH_VITAL=1");
-#else
-    setenv("TORCH_VITAL", "1", 1);
-#endif
+    c10::utils::set_env("TORCH_VITAL", "1");
     APIVitals api_vitals;
     rvalue = api_vitals.setVital("TestingSetVital", "TestAttr", "TestValue");
   }
diff --git a/test/cpp/api/dispatch.cpp b/test/cpp/api/dispatch.cpp
@@ -1,14 +1,13 @@
 #include <gtest/gtest.h>
 
 #include <ATen/native/Pow.h>
+#include <c10/util/env.h>
 #include <c10/util/irange.h>
 #include <test/cpp/api/support.h>
 #include <torch/torch.h>
 #include <torch/types.h>
 #include <torch/utils.h>
 #include <cstdlib>
-#include <iostream>
-#include <type_traits>
 #include <vector>
 
 struct DispatchTest : torch::test::SeedingFixture {};
@@ -18,11 +17,7 @@ TEST_F(DispatchTest, TestAVX2) {
   const std::vector<int> result{1, 4, 27, 256};
   const auto vals_tensor = torch::tensor(ints);
   const auto pows_tensor = torch::tensor(ints);
-#ifdef _WIN32
-  _putenv("ATEN_CPU_CAPABILITY=avx2");
-#else
-  setenv("ATEN_CPU_CAPABILITY", "avx2", 1);
-#endif
+  c10::utils::set_env("ATEN_CPU_CAPABILITY", "avx2");
   const auto actual_pow_avx2 = vals_tensor.pow(pows_tensor);
   for (const auto i : c10::irange(4)) {
     ASSERT_EQ(result[i], actual_pow_avx2[i].item<int>());
@@ -34,11 +29,7 @@ TEST_F(DispatchTest, TestAVX512) {
   const std::vector<int> result{1, 4, 27, 256};
   const auto vals_tensor = torch::tensor(ints);
   const auto pows_tensor = torch::tensor(ints);
-#ifdef _WIN32
-  _putenv("ATEN_CPU_CAPABILITY=avx512");
-#else
-  setenv("ATEN_CPU_CAPABILITY", "avx512", 1);
-#endif
+  c10::utils::set_env("ATEN_CPU_CAPABILITY", "avx512");
   const auto actual_pow_avx512 = vals_tensor.pow(pows_tensor);
   for (const auto i : c10::irange(4)) {
     ASSERT_EQ(result[i], actual_pow_avx512[i].item<int>());
@@ -50,11 +41,7 @@ TEST_F(DispatchTest, TestDefault) {
   const std::vector<int> result{1, 4, 27, 256};
   const auto vals_tensor = torch::tensor(ints);
   const auto pows_tensor = torch::tensor(ints);
-#ifdef _WIN32
-  _putenv("ATEN_CPU_CAPABILITY=default");
-#else
-  setenv("ATEN_CPU_CAPABILITY", "default", 1);
-#endif
+  c10::utils::set_env("ATEN_CPU_CAPABILITY", "default");
   const auto actual_pow_default = vals_tensor.pow(pows_tensor);
   for (const auto i : c10::irange(4)) {
     ASSERT_EQ(result[i], actual_pow_default[i].item<int>());
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -5364,6 +5364,11 @@ def test_cuda_autocast_deprecated_warning(self):
             with torch.cuda.amp.autocast():
                 _ = torch.ones(10)
 
+    def test_cuda_module_loading_env(self):
+        torch.cuda.init()
+        val = os.environ.get("CUDA_MODULE_LOADING", "")
+        self.assertEqual(val, "LAZY")
+
 
 instantiate_parametrized_tests(TestCuda)
 instantiate_parametrized_tests(TestCudaMallocAsync)