|
1 |
| -// Copyright (C) 2018-2021 Intel Corporation |
| 1 | +// Copyright (C) 2018-2023 Intel Corporation |
2 | 2 | // SPDX-License-Identifier: Apache-2.0
|
3 | 3 | //
|
4 |
| - |
5 | 4 | #include "cuda_async_infer_request.hpp"
|
6 |
| - |
7 |
| -#include <threading/ie_cpu_streams_executor.hpp> |
8 |
| - |
9 |
| -#include "cuda_executable_network.hpp" |
10 | 5 | #include "cuda_itt.hpp"
|
11 | 6 | #include "cuda_thread_pool.hpp"
|
12 | 7 |
|
13 | 8 | namespace ov {
|
14 | 9 | namespace nvidia_gpu {
|
15 | 10 |
|
16 |
| -CudaAsyncInferRequest::CudaAsyncInferRequest(const CudaInferRequest::Ptr& inferRequest, |
17 |
| - const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor, |
18 |
| - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, |
19 |
| - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) |
20 |
| - : AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor), _inferRequest(inferRequest) { |
| 11 | +CudaAsyncInferRequest::CudaAsyncInferRequest(const CudaInferRequest::Ptr& request, |
| 12 | + const std::shared_ptr<ov::threading::ITaskExecutor>& task_executor, |
| 13 | + const std::shared_ptr<ov::threading::ITaskExecutor>& wait_executor, |
| 14 | + const std::shared_ptr<ov::threading::ITaskExecutor>& callback_executor) |
| 15 | + : ov::IAsyncInferRequest(request, task_executor, callback_executor), |
| 16 | + request_(request) { |
21 | 17 | // In current implementation we have CPU only tasks and no needs in 2 executors
|
22 | 18 | // So, by default single stage pipeline is created.
|
23 | 19 | // This stage executes InferRequest::Infer() using cpuTaskExecutor.
|
24 | 20 | // But if remote asynchronous device is used the pipeline can by splitted tasks that are executed by cpuTaskExecutor
|
25 | 21 | // and waiting tasks. Waiting tasks can lock execution thread so they use separate threads from other executor.
|
26 | 22 | constexpr const auto remoteDevice = true;
|
27 | 23 |
|
28 |
| - auto cudaThreadPool = std::dynamic_pointer_cast<CudaThreadPool>(waitExecutor); |
| 24 | + auto cuda_thread_pool = std::dynamic_pointer_cast<CudaThreadPool>(wait_executor); |
29 | 25 | if (remoteDevice) {
|
30 |
| - _pipeline = {{cpuTaskExecutor, |
| 26 | + m_pipeline = {{task_executor, |
31 | 27 | [this] {
|
32 |
| - OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::Preprocessing"); |
33 |
| - _inferRequest->inferPreprocess(); |
| 28 | + OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::infer_preprocess"); |
| 29 | + request_->infer_preprocess(); |
34 | 30 | }},
|
35 |
| - {waitExecutor, |
36 |
| - [this, cudaThreadPool] { |
37 |
| - auto& threadContext = cudaThreadPool->GetThreadContext(); |
| 31 | + {wait_executor, |
| 32 | + [this, cuda_thread_pool] { |
| 33 | + auto& threadContext = cuda_thread_pool->get_thread_context(); |
38 | 34 | {
|
39 |
| - OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::StartPipeline"); |
40 |
| - _inferRequest->startPipeline(threadContext); |
| 35 | + OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::start_pipeline"); |
| 36 | + request_->start_pipeline(threadContext); |
41 | 37 | }
|
42 | 38 | {
|
43 |
| - OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::WaitPipeline"); |
44 |
| - _inferRequest->waitPipeline(threadContext); |
| 39 | + OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::wait_pipeline"); |
| 40 | + request_->wait_pipeline(threadContext); |
45 | 41 | }
|
46 | 42 | }},
|
47 |
| - {cpuTaskExecutor, [this] { |
48 |
| - OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::Postprocessing"); |
49 |
| - _inferRequest->inferPostprocess(); |
| 43 | + {task_executor, [this] { |
| 44 | + OV_ITT_SCOPED_TASK(itt::domains::nvidia_gpu, "CudaAsyncInferRequest::infer_postprocess"); |
| 45 | + request_->infer_postprocess(); |
50 | 46 | }}};
|
51 | 47 | }
|
52 | 48 | }
|
53 | 49 |
|
54 |
| -void CudaAsyncInferRequest::Cancel() { |
55 |
| - InferenceEngine::AsyncInferRequestThreadSafeDefault::Cancel(); |
56 |
| - _inferRequest->Cancel(); |
| 50 | +CudaAsyncInferRequest::~CudaAsyncInferRequest() { |
| 51 | + ov::IAsyncInferRequest::stop_and_wait(); |
57 | 52 | }
|
58 | 53 |
|
59 |
| -void CudaAsyncInferRequest::Infer_ThreadUnsafe() { StartAsync_ThreadUnsafe(); } |
| 54 | +void CudaAsyncInferRequest::cancel() { |
| 55 | + ov::IAsyncInferRequest::cancel(); |
| 56 | + request_->cancel(); |
| 57 | +} |
60 | 58 |
|
| 59 | +void CudaAsyncInferRequest::infer_thread_unsafe() { |
| 60 | + start_async_thread_unsafe(); |
| 61 | +} |
61 | 62 | } // namespace nvidia_gpu
|
62 | 63 | } // namespace ov
|
0 commit comments