Skip to content

Commit 82d1962

Browse files
Assign BCS at first blit enqueue
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
1 parent 4f7a225 commit 82d1962

File tree

8 files changed

+91
-22
lines changed

8 files changed

+91
-22
lines changed

opencl/source/command_queue/command_queue.cpp

+34-12
Original file line numberDiff line numberDiff line change
@@ -73,19 +73,22 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
7373
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
7474
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
7575

76-
bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
77-
hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS);
76+
bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
77+
hwHelper.isSubDeviceEngineSupported(hwInfo, device->getDeviceBitfield(), aub_stream::EngineType::ENGINE_BCS);
7878

7979
if (bcsAllowed || device->getDefaultEngine().commandStreamReceiver->peekTimestampPacketWriteEnabled()) {
8080
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
8181
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
8282
}
83-
if (bcsAllowed) {
84-
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
85-
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
86-
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
87-
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
88-
bcsEngineTypes.push_back(bcsEngineType);
83+
84+
auto deferCmdQBcsInitialization = hwInfo.featureTable.ftrBcsInfo.count() > 1u;
85+
86+
if (DebugManager.flags.DeferCmdQBcsInitialization.get() != -1) {
87+
deferCmdQBcsInitialization = DebugManager.flags.DeferCmdQBcsInitialization.get();
88+
}
89+
90+
if (!deferCmdQBcsInitialization) {
91+
this->initializeBcsEngine(internalUsage);
8992
}
9093
}
9194

@@ -181,7 +184,8 @@ CommandStreamReceiver &CommandQueue::getGpgpuCommandStreamReceiver() const {
181184
return *gpgpuEngine->commandStreamReceiver;
182185
}
183186

184-
CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const {
187+
CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) {
188+
initializeBcsEngine(isSpecial());
185189
const EngineControl *engine = this->bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)];
186190
if (engine == nullptr) {
187191
return nullptr;
@@ -190,7 +194,8 @@ CommandStreamReceiver *CommandQueue::getBcsCommandStreamReceiver(aub_stream::Eng
190194
}
191195
}
192196

193-
CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
197+
CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() {
198+
initializeBcsEngine(isSpecial());
194199
for (const EngineControl *engine : this->bcsEngines) {
195200
if (engine != nullptr) {
196201
return engine->commandStreamReceiver;
@@ -199,7 +204,8 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
199204
return nullptr;
200205
}
201206

202-
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const {
207+
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelectionArgs &args) {
208+
initializeBcsEngine(isSpecial());
203209
if (isCopyOnly) {
204210
return *getBcsCommandStreamReceiver(bcsEngineTypes[0]);
205211
}
@@ -267,6 +273,21 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec
267273
return *selectedCsr;
268274
}
269275

276+
void CommandQueue::initializeBcsEngine(bool internalUsage) {
277+
if (bcsAllowed && !bcsInitialized) {
278+
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
279+
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
280+
auto bcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
281+
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
282+
bcsEngineTypes.push_back(bcsEngineType);
283+
bcsInitialized = true;
284+
if (bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]) {
285+
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->osContext->ensureContextInitialized();
286+
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->commandStreamReceiver->initDirectSubmission();
287+
}
288+
}
289+
}
290+
270291
Device &CommandQueue::getDevice() const noexcept {
271292
return device->getDevice();
272293
}
@@ -280,7 +301,7 @@ volatile uint32_t *CommandQueue::getHwTagAddress() const {
280301
return getGpgpuCommandStreamReceiver().getTagAddress();
281302
}
282303

283-
bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const {
304+
bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) {
284305
DEBUG_BREAK_IF(getHwTag() == CompletionStamp::notReady);
285306

286307
if (getGpgpuCommandStreamReceiver().testTaskCountReady(getHwTagAddress(), gpgpuTaskCount)) {
@@ -1028,6 +1049,7 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
10281049
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
10291050
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
10301051
isCopyOnly = true;
1052+
bcsInitialized = true;
10311053
} else {
10321054
gpgpuEngine = &device->getEngine(engineType, engineUsage);
10331055
}

opencl/source/command_queue/command_queue.h

+7-4
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
202202

203203
volatile uint32_t *getHwTagAddress() const;
204204

205-
bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const;
205+
bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState);
206206

207207
bool isWaitForTimestampsEnabled() const;
208208
virtual bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, uint32_t taskCount) = 0;
@@ -225,9 +225,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
225225
void initializeGpgpu() const;
226226
void initializeGpgpuInternals() const;
227227
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
228-
MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const;
229-
CommandStreamReceiver *getBcsForAuxTranslation() const;
230-
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const;
228+
MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType);
229+
CommandStreamReceiver *getBcsForAuxTranslation();
230+
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args);
231+
void initializeBcsEngine(bool internalUsage);
231232
Device &getDevice() const noexcept;
232233
ClDevice &getClDevice() const { return *device; }
233234
Context &getContext() const { return *context; }
@@ -413,6 +414,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
413414
bool perfCountersEnabled = false;
414415

415416
bool isCopyOnly = false;
417+
bool bcsAllowed = false;
418+
bool bcsInitialized = false;
416419

417420
LinearStream *commandStream = nullptr;
418421

opencl/test/unit_test/command_queue/command_queue_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1067,7 +1067,7 @@ struct WaitUntilCompletionTests : public ::testing::Test {
10671067

10681068
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
10691069

1070-
CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const override {
1070+
CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) override {
10711071
return bcsCsrToReturn;
10721072
}
10731073

opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp

+45-5
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,46 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenAdditionalBcsWhenCreatingCommandQue
8484
EXPECT_EQ(1u, queue->countBcsEngines());
8585
}
8686

87+
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationEnabledWhenCreateCommandQueueThenBcsCountIsZero, IsAtLeastXeHpcCore) {
88+
DebugManagerStateRestore restorer;
89+
DebugManager.flags.DeferCmdQBcsInitialization.set(1u);
90+
91+
HardwareInfo hwInfo = *defaultHwInfo;
92+
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
93+
hwInfo.capabilityTable.blitterOperationsSupported = true;
94+
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
95+
MockClDevice clDevice{device};
96+
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
97+
ClDeviceVector clDevices{&clDeviceId, 1u};
98+
cl_int retVal{};
99+
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
100+
EXPECT_EQ(CL_SUCCESS, retVal);
101+
102+
auto queue = std::make_unique<MockCommandQueue>(*context);
103+
104+
EXPECT_EQ(0u, queue->countBcsEngines());
105+
}
106+
107+
HWTEST2_F(CommandQueuePvcAndLaterTests, givenDeferCmdQBcsInitializationDisabledWhenCreateCommandQueueThenBcsIsInitialized, IsAtLeastXeHpcCore) {
108+
DebugManagerStateRestore restorer;
109+
DebugManager.flags.DeferCmdQBcsInitialization.set(0u);
110+
111+
HardwareInfo hwInfo = *defaultHwInfo;
112+
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
113+
hwInfo.capabilityTable.blitterOperationsSupported = true;
114+
MockDevice *device = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
115+
MockClDevice clDevice{device};
116+
cl_device_id clDeviceId = static_cast<cl_device_id>(&clDevice);
117+
ClDeviceVector clDevices{&clDeviceId, 1u};
118+
cl_int retVal{};
119+
auto context = std::unique_ptr<Context>{Context::create<Context>(nullptr, clDevices, nullptr, nullptr, retVal)};
120+
EXPECT_EQ(CL_SUCCESS, retVal);
121+
122+
auto queue = std::make_unique<MockCommandQueue>(*context);
123+
124+
EXPECT_NE(0u, queue->countBcsEngines());
125+
}
126+
87127
HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQueueIsCreatedThenMainBcsCanBeUsedAgain, IsAtLeastXeHpcCore) {
88128
HardwareInfo hwInfo = *defaultHwInfo;
89129
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(9);
@@ -102,9 +142,9 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu
102142
auto queue4 = std::make_unique<MockCommandQueue>(*context);
103143

104144
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, queue1->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getOsContext().getEngineType());
105-
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType());
106-
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType());
107-
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType());
145+
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType());
146+
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue3->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType());
147+
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue4->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType());
108148

109149
// Releasing main BCS. Next creation should be able to grab it
110150
queue1.reset();
@@ -114,7 +154,7 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu
114154
// Releasing link BCS. Shouldn't change anything
115155
queue2.reset();
116156
queue2 = std::make_unique<MockCommandQueue>(*context);
117-
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType());
157+
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1)->getOsContext().getEngineType());
118158
}
119159

120160
HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhenCreatingCommandQueueThenCreateQueueWithCooperativeEngine, IsAtLeastXeHpcCore) {
@@ -491,10 +531,10 @@ HWTEST2_F(BcsCsrSelectionCommandQueueTests, givenMultipleEnginesInQueueWhenSelec
491531
aub_stream::ENGINE_BCS7,
492532
aub_stream::ENGINE_BCS8,
493533
});
494-
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args));
495534
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args));
496535
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args));
497536
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2), &queue->selectCsrForBuiltinOperation(args));
537+
EXPECT_EQ(queue->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS1), &queue->selectCsrForBuiltinOperation(args));
498538
}
499539
}
500540

opencl/test/unit_test/event/event_tests.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ TEST(Event, givenBcsCsrSetInEventWhenPeekingBcsTaskCountThenReturnCorrectTaskCou
144144
new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockAlignedMallocManagerDevice>(&hwInfo)}};
145145
MockContext context{device.get()};
146146
MockCommandQueue queue{context};
147+
queue.initializeBcsEngine(false);
147148
queue.updateBcsTaskCount(queue.bcsEngines[0]->getEngineType(), 19);
148149
Event event{&queue, CL_COMMAND_READ_BUFFER, 0, 0};
149150

opencl/test/unit_test/mocks/mock_command_queue.h

+1
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
257257
MockCommandQueueHw(Context *context,
258258
ClDevice *device,
259259
cl_queue_properties *properties) : BaseClass(context, device, properties, false) {
260+
this->initializeBcsEngine(false);
260261
}
261262

262263
void clearBcsEngines() {

opencl/test/unit_test/test_files/igdrcl.config

+1
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ MakeIndirectAllocationsResidentAsPack = -1
383383
MakeEachAllocationResident = -1
384384
AssignBCSAtEnqueue = -1
385385
DeferCmdQGpgpuInitialization = -1
386+
DeferCmdQBcsInitialization = -1
386387
ReuseKernelBinaries = -1
387388
EnableChipsetUniqueUUID = -1
388389
ForceSimdMessageSizeInWalker = -1

shared/source/debug_settings/debug_variables_base.inl

+1
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, MakeIndirectAllocationsResidentAsPack, -1, "-1:
268268
DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0: disabled, 1: bind every allocation at creation time, 2: bind all created allocations in flush")
269269
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
270270
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
271+
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
271272
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")
272273

273274
/*DIRECT SUBMISSION FLAGS*/

0 commit comments

Comments
 (0)