fix build break on windows

fo40225 · fo40225 · commit 36087db59ac2 · 2021-01-24T14:16:21.000+08:00
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -14,6 +14,7 @@ load(
     "tf_cc_test",
     "tf_copts",
     "tf_cuda_library",
+    "if_not_windows",
 )
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 load(
@@ -605,8 +606,9 @@ cc_library(
         "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/types:optional",
         "@com_google_absl//absl/types:span",
+    ] + if_not_windows([
         "@nvtx_archive//:nvtx",
-    ] + if_cuda_is_configured([
+    ]) + if_cuda_is_configured([
         "//tensorflow/stream_executor/cuda:cuda_stream",
         "//tensorflow/core/platform/default/build_config:cublas_plugin",
         "//tensorflow/core/platform/default/build_config:cudnn_plugin",
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
@@ -3287,9 +3287,10 @@ tf_cuda_library(
         "//third_party/eigen3",
         "//tensorflow/core/grappler/utils:functions",
         "//tensorflow/core/profiler/lib:traceme",
-        "@nvtx_archive//:nvtx",
         "//tensorflow/core/profiler/internal:traceme_recorder",
-    ] + mkl_deps(),
+    ] + if_not_windows([
+        "@nvtx_archive//:nvtx",
+    ]) + mkl_deps(),
     alwayslink = 1,
 )
 
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
@@ -3,6 +3,7 @@ load(
     "tf_cc_test",
     "tf_copts",
     "tf_cuda_library",
+    "if_not_windows",
 )
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -203,9 +204,10 @@ tf_cuda_library(
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core/profiler/lib:traceme",
             "//tensorflow/core/grappler/optimizers:meta_optimizer",
-            "@nvtx_archive//:nvtx",
         ],
-    }),
+    }) + if_not_windows([
+        "@nvtx_archive//:nvtx",
+    ]),
 )
 
 tf_cc_test(
diff --git a/tensorflow/core/kernels/non_max_suppression_op.cu.cc b/tensorflow/core/kernels/non_max_suppression_op.cu.cc
@@ -149,27 +149,6 @@ __device__ EIGEN_STRONG_INLINE void ClearBit(T* bit_mask, int bit) {
   atomicAnd(bit_mask + bin, ~(T(1) << (bit & kRemainderMask)));
 }
 
-__global__ void FlipBoxes(Box* boxes, const int* num_batch_boxes,
-                          const int* box_strides, const int batch_size) {
-  // for (int b = 0; b < batch_size; ++b) {
-  // int box_offset = box_strides[b];
-  for (const int y : CudaGridRangeY(batch_size)) {
-    int box_offset = box_strides[y];
-    Box* curr_boxes = boxes + box_offset;
-    // if (threadIdx.x == 0) {
-    //   printf(" FBx batch=%d, box_offset=%d, num_batch_boxes=%d boxes@ %p \n",
-    //   y,
-    //          box_offset, num_batch_boxes[y],curr_boxes);
-    // }
-
-    for (int i : GpuGridRangeX(num_batch_boxes[y])) {
-      Flipped<true>(curr_boxes[i]);
-    }
-  }
-  // }
-}
-
-
 // Produce a global bitmask (result_mask) of selected boxes from bitmask
 // generated by NMSKernel Abort early if max_boxes boxes are selected.
 // Bitmask is num_boxes*bit_mask_len bits indicating whether to keep or
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
@@ -570,10 +570,4 @@ Status ReadTextOrBinaryProto(Env* env, const string& fname,
   return ReadBinaryProto(env, fname, proto);
 }
 
-int setenv(const char* name, const char* value, int overwrite) {
-  return ::setenv(name, value, overwrite);
-}
-
-int unsetenv(const char* name) { return ::unsetenv(name); }
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/nvtx.h b/tensorflow/core/platform/nvtx.h
@@ -16,7 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_NVTX_H_
 #define TENSORFLOW_CORE_PLATFORM_NVTX_H_
 
+#ifdef _WIN32
+#include "cuda/include/nvtx3/nvToolsExt.h"
+#else
 #include "third_party/nvtx3/nvToolsExt.h"
+#endif
 
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/attr_value_util.h"
diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/time.h>
@@ -258,4 +259,10 @@ void PosixEnv::GetLocalTempDirectories(std::vector<string>* list) {
   }
 }
 
+int setenv(const char* name, const char* value, int overwrite) {
+  return ::setenv(name, value, overwrite);
+}
+
+int unsetenv(const char* name) { return ::unsetenv(name); }
+
 }  // namespace tensorflow
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
@@ -73,7 +73,6 @@ class AlgorithmDesc;
 
 class StreamExecutor;
 class ScratchAllocator;
-enum BatchNormalizationKind;
 
 // Convert a type to the corresponding QuantizedActivationMode.
 template <typename ElementType>
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
@@ -531,7 +531,10 @@ def lib_name(base_name, cpu_value, version = None, static = False):
             return "lib%s.a" % base_name
         return "lib%s.so%s" % (base_name, version)
     elif cpu_value == "Windows":
-        return "%s.lib" % base_name
+        if base_name == "nvToolsExt":
+            return "lib/x64/nvToolsExt64_1.lib"
+        else:
+            return "%s.lib" % base_name
     elif cpu_value == "Darwin":
         if static:
             return "lib%s.a" % base_name
@@ -669,7 +672,7 @@ def _find_libs(repository_ctx, cuda_config):
             "nvToolsExt",
             repository_ctx,
             cpu_value,
-            cuda_config.config["cuda_library_dir"],
+            cuda_config.nvToolsExt_path,
             "1",
         ),
         "cupti": _find_cuda_lib(
@@ -762,6 +765,11 @@ def _get_cuda_config(repository_ctx):
         cufft_version = cuda_version
         cusparse_version = cuda_version
 
+    if cpu_value == "Windows":
+        nvToolsExt_path = repository_ctx.os.environ.get("NVTOOLSEXT_PATH", "C:/Program Files/NVIDIA Corporation/NvToolsExt/")
+    else:
+        nvToolsExt_path = toolkit_path
+
     return struct(
         cuda_toolkit_path = toolkit_path,
         cuda_version = cuda_version,
@@ -775,6 +783,7 @@ def _get_cuda_config(repository_ctx):
         compute_capabilities = compute_capabilities(repository_ctx),
         cpu_value = cpu_value,
         config = config,
+        nvToolsExt_path=nvToolsExt_path,
     )
 
 def _tpl(repository_ctx, tpl, substitutions = {}, out = None):