diff --git a/csrc/npu/NPUBlockHandle.h b/csrc/npu/NPUBlockHandle.h
deleted file mode 100644
index 6319b9c5..00000000
--- a/csrc/npu/NPUBlockHandle.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-#include "npu/core/NPUMacros.h"
-
-namespace c10_npu {
-namespace NPUCachingAllocator {
-// for torch2.X graph mode(non-raw malloc)
-// We only expose a Block data of hgandle to the user for storing the
-// application. The user does not need to perceive the actual data structure,
-// but can query and release the data through handle.
-
-/// @ingroup torch_npu
-/// @brief Malloc Block from DeviceCachingAllocator
-/// @param [in] size: size used for memory malloc
-/// @param [in] stream: stream used for memory malloc
-/// @return void*: block handle to the memory block
-C10_NPU_API void* MallocBlock(size_t size, void* stream, int device = -1);
-
-/// @ingroup torch_npu
-/// @brief Free Block according to handle
-/// @param [in] handle: the block handle to free
-/// @return void
-C10_NPU_API void FreeBlock(void* handle);
-
-/// @ingroup torch_npu
-/// @brief Get device memory address according to block handle
-/// @param [in] handle: the block handle to query address
-/// @return void*: the device memory address managed by block
-C10_NPU_API void* GetBlockPtr(const void* handle);
-
-/// @ingroup torch_npu
-/// @brief Get device memory size according to handle
-/// @param [in] handle: the block handle to query size
-/// @return size: the device memory size managed by block
-C10_NPU_API size_t GetBlockSize(const void* handle);
-} // namespace NPUCachingAllocator
-} // namespace c10_npu
diff --git a/csrc/npu/NPUCachingAllocator.cpp b/csrc/npu/NPUCachingAllocator.cpp
index 9c90dd52..8a3b2071 100644
--- a/csrc/npu/NPUCachingAllocator.cpp
+++ b/csrc/npu/NPUCachingAllocator.cpp
@@ -13,14 +13,14 @@
 #include <c10/util/flat_hash_map.h>
 #include <c10/util/irange.h>
 
-#include "Memory.h"
-#include "NPUBlockHandle.h"
 #include "csrc/npu/CachingAllocatorHelper.h"
 #include "csrc/npu/NPUCachingAllocator.h"
-#include "npu/acl/include/acl/acl_base.h"
-#include "npu/core/NPUGuard.h"
+#include "csrc/npu/NPUFunctions.h"
+#include "csrc/npu/Memory.h"
+
 #include "npu/core/interface/AsyncTaskQueueInterface.h"
 #include "npu/core/sys_ctrl/npu_sys_ctrl.h"
+#include "npu/acl/include/acl/acl_base.h"
 
 namespace c10_npu {
 namespace NPUCachingAllocator {
@@ -128,7 +128,7 @@ struct BlockPool {
 };
 
 struct Block {
-  int device; // npu
+  int device;
   void* stream; // allocation stream
   stream_set stream_uses; // streams on which the block was used
   size_t size; // block size in bytes
@@ -2259,46 +2259,6 @@ void local_raw_delete(void* ptr) {
   caching_allocator.free(ptr);
 }
 
-void* MallocBlock(size_t size, void* stream, int device) {
-  if (device == -1) {
-    NPU_CHECK_ERROR(c10_npu::GetDevice(&device));
-  }
-  if ((device < 0) ||
-      (device > static_cast<int>(caching_allocator.device_allocator.size()))) {
-    return nullptr;
-  }
-  AT_ASSERT(
-      caching_allocator.device_allocator[device],
-      PTA_ERROR(ErrCode::NOT_FOUND));
-  AT_ASSERT(stream, PTA_ERROR(ErrCode::NOT_FOUND));
-  auto block =
-      caching_allocator.device_allocator[device]->malloc(device, size, stream);
-  AT_ASSERT(block, PTA_ERROR(ErrCode::NOT_FOUND));
-  return reinterpret_cast<void*>(block);
-}
-
-void FreeBlock(void* handle) {
-  Block* block = reinterpret_cast<Block*>(handle);
-  AT_ASSERT(block, PTA_ERROR(ErrCode::PTR));
-  caching_allocator.assertValidDevice(block->device);
-  AT_ASSERT(
-      caching_allocator.device_allocator[block->device],
-      PTA_ERROR(ErrCode::NOT_FOUND));
-  caching_allocator.device_allocator[block->device]->free(block);
-}
-
-void* GetBlockPtr(const void* handle) {
-  const Block* block = reinterpret_cast<const Block*>(handle);
-  AT_ASSERT(block, PTA_ERROR(ErrCode::PTR));
-  return block->ptr;
-}
-
-size_t GetBlockSize(const void* handle) {
-  const Block* block = reinterpret_cast<const Block*>(handle);
-  AT_ASSERT(block, PTA_ERROR(ErrCode::PTR));
-  return block->size;
-}
-
 struct BackendStaticInitializer {
   BackendStaticInitializer() {
     allocator.store(&caching_allocator);
diff --git a/csrc/npu/NPUCachingAllocator.h b/csrc/npu/NPUCachingAllocator.h
index 2ff287cd..1dc5c508 100644
--- a/csrc/npu/NPUCachingAllocator.h
+++ b/csrc/npu/NPUCachingAllocator.h
@@ -19,11 +19,12 @@ C10_NPU_API std::mutex* getFreeMutex();
 // block inside of already allocated area.
 class FreeMemoryCallback {
  public:
-  virtual ~FreeMemoryCallback(){};
+  virtual ~FreeMemoryCallback() {};
   virtual bool Execute() = 0;
 };
 
 C10_DECLARE_REGISTRY(FreeNPUMemoryCallbacksRegistry, FreeMemoryCallback);
+
 #define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \
   C10_REGISTER_CLASS(FreeNPUMemoryCallbacksRegistry, name, __VA_ARGS__);