-
Notifications
You must be signed in to change notification settings - Fork 351
/
Copy pathmhw_render.h
332 lines (296 loc) · 13.5 KB
/
mhw_render.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
/*
* Copyright (c) 2014-2021, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file mhw_render.h
//! \brief MHW interface for constructing commands for the render engine
//! \details Impelements the functionalities common across all platforms for MHW_RENDER
//!
#ifndef __MHW_RENDER_H__
#define __MHW_RENDER_H__
#include "mos_os.h"
#include "mhw_state_heap.h"
#include "mhw_render_itf.h"
#define MHW_RENDER_ENGINE_SSH_SURFACES_PER_BT_MAX 256
#define MHW_RENDER_ENGINE_SAMPLERS_MAX 16
#define MHW_RENDER_ENGINE_SAMPLERS_AVS_MAX 8
#define MHW_RENDER_ENGINE_MEDIA_PALOAD_SIZE_MAX 512
#define MHW_RENDER_ENGINE_URB_SIZE_MAX 2048
#define MHW_RENDER_ENGINE_URB_ENTRIES_MAX 128
#define MHW_RENDER_ENGINE_INTERFACE_DESCRIPTOR_ENTRIES_MAX 64
#define MHW_RENDER_ENGINE_EU_INDEX_MAX 12
#define MHW_RENDER_ENGINE_SIZE_REGISTERS_PER_THREAD 0x1800
#define MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT 32
#define MHW_RENDER_ENGINE_MAX_NUMBER_OF_THREAD (1024 / MHW_RENDER_ENGINE_NUMBER_OF_THREAD_UNIT)
#define MHW_MAX_DEPENDENCY_COUNT 8
typedef struct _MHW_RENDER_ENGINE_L3_CACHE_SETTINGS
{
uint32_t dwCntlReg = 0;
uint32_t dwCntlReg2 = 0;
uint32_t dwCntlReg3 = 0;
uint32_t dwSqcReg1 = 0;
uint32_t dwSqcReg4 = 0;
uint32_t dwLra1Reg = 0;
virtual ~_MHW_RENDER_ENGINE_L3_CACHE_SETTINGS() {}
} MHW_RENDER_ENGINE_L3_CACHE_SETTINGS, *PMHW_RENDER_ENGINE_L3_CACHE_SETTINGS;
typedef struct _MHW_RENDER_ENGINE_L3_CACHE_CONFIG
{
uint32_t dwL3CacheCntlReg_Register = 0;
uint32_t dwL3CacheCntlReg_Setting = 0;
uint32_t dwL3CacheCntlReg2_Register = 0;
uint32_t dwL3CacheCntlReg2_Setting = 0;
uint32_t dwL3CacheCntlReg3_Register = 0;
uint32_t dwL3CacheCntlReg3_Setting = 0;
uint32_t dwL3CacheSqcReg1_Register = 0;
uint32_t dwL3CacheSqcReg1_Setting = 0;
uint32_t dwL3CacheSqcReg4_Register = 0;
uint32_t dwL3CacheSqcReg4_Setting = 0;
uint32_t dwL3LRA1Reg_Register = 0;
uint32_t dwL3LRA1Reg_Setting = 0;
bool bL3CachingEnabled = false;
bool bL3LRA1Reset = false;
} MHW_RENDER_ENGINE_L3_CACHE_CONFIG, *PMHW_RENDER_ENGINE_L3_CACHE_CONFIG;
typedef enum _MHW_RENDER_ENGINE_ADDRESS_SHIFT
{
MHW_RENDER_ENGINE_KERNEL_POINTER_SHIFT = 6,
MHW_RENDER_ENGINE_STATE_BASE_ADDRESS_SHIFT = 12
} MHW_RENDER_ENGINE_ADDRESS_SHIFT;
typedef enum _MHW_VFE_SLICE_DISABLE
{
MHW_VFE_SLICE_ALL = 0,
MHW_VFE_SLICE0_SUBSLICE_ALL,
MHW_VFE_SLICE0_SUBSLICE0
} MHW_VFE_SLICE_DISABLE;
typedef enum _MHW_WALKER_MODE
{
MHW_WALKER_MODE_NOT_SET = -1,
MHW_WALKER_MODE_DISABLED = 0,
MHW_WALKER_MODE_SINGLE = 1, // dual = 0, repel = 1
MHW_WALKER_MODE_DUAL = 2, // dual = 1, repel = 0)
MHW_WALKER_MODE_TRI = 3, // applies in BDW GT2 which has 1 slice and 3 sampler/VME per slice
MHW_WALKER_MODE_QUAD = 4, // applies in HSW GT3 which has 2 slices and 2 sampler/VME per slice
MHW_WALKER_MODE_HEX = 6, // applies in BDW GT2 which has 2 slices and 3 sampler/VME per slice
MHW_WALKER_MODE_OCT = 8 // may apply in future Gen media architectures
} MHW_WALKER_MODE;
typedef enum _MHW_EMIT_LOCAL_MODE
{
MHW_EMIT_LOCAL_NONE = 0,
MHW_EMIT_LOCAL_X = 1,
MHW_EMIT_LOCAL_XY = 3,
MHW_EMIT_LOCAL_XYZ = 7
} MHW_EMIT_LOCAL_MODE;
//!
//! \brief Structure to capture HW capabilities
//!
typedef struct _MHW_RENDER_ENGINE_CAPS
{
uint32_t dwMaxUnormSamplers; // Max UNORM Sampler States supported
uint32_t dwMaxAVSSamplers; // Max AVS Sampler States supported
uint32_t dwMaxBTIndex; // Max Binding Table index per Binding Table
uint32_t dwMaxThreads; // Max Threads supported
uint32_t dwMaxMediaPayloadSize; // Max Media payload size
uint32_t dwMaxURBSize; // Max URB Size
uint32_t dwMaxURBEntries; // Max URB Entries
uint32_t dwMaxURBEntryAllocationSize; // Max URB Entry Allocation Size
uint32_t dwMaxCURBEAllocationSize;
uint32_t dwMaxInterfaceDescriptorEntries; // Max Interface Descriptor Entries
uint32_t dwMaxSubslice; // Max number of subslice
uint32_t dwMaxEUIndex; // Max EU index (sometimes != number of EU)
uint32_t dwNumThreadsPerEU; // Num threads per EU
uint32_t dwSizeRegistersPerThread; // Size of all registers per thread (for ASM Debug)
} MHW_RENDER_ENGINE_CAPS, *PMHW_RENDER_ENGINE_CAPS;
typedef struct _MHW_STATE_BASE_ADDR_PARAMS
{
PMOS_RESOURCE presGeneralState;
PMOS_RESOURCE presDynamicState;
PMOS_RESOURCE presIndirectObjectBuffer;
PMOS_RESOURCE presInstructionBuffer;
uint32_t dwGeneralStateSize;
uint32_t dwDynamicStateSize;
uint32_t dwIndirectObjectBufferSize;
uint32_t dwInstructionBufferSize;
uint32_t mocs4InstructionCache;
uint32_t mocs4GeneralState;
uint32_t mocs4DynamicState;
uint32_t mocs4SurfaceState;
uint32_t mocs4IndirectObjectBuffer;
uint32_t mocs4StatelessDataport;
uint32_t l1CacheConfig;
bool bDynamicStateRenderTarget;
bool addressDis;
} MHW_STATE_BASE_ADDR_PARAMS, *PMHW_STATE_BASE_ADDR_PARAMS;
typedef struct _MHW_VFE_SCOREBOARD_DELTA
{
uint8_t x : 4;
uint8_t y : 4;
} MHW_VFE_SCOREBOARD_DELTA, *PMHW_VFE_SCOREBOARD_DELTA;
typedef struct _MHW_VFE_SCOREBOARD
{
struct
{
uint32_t ScoreboardMask : 8;
uint32_t ScoreboardColor : 4;
uint32_t : 18;
uint32_t ScoreboardType : 1;
uint32_t ScoreboardEnable : 1;
};
union
{
MHW_VFE_SCOREBOARD_DELTA ScoreboardDelta[MHW_MAX_DEPENDENCY_COUNT];
struct
{
uint32_t Value[2];
};
};
} MHW_VFE_SCOREBOARD, *PMHW_VFE_SCOREBOARD;
struct MHW_VFE_PARAMS
{
uint32_t dwDebugCounterControl = 0; // Debug Counter Control
uint32_t dwMaximumNumberofThreads = 0;
uint32_t dwNumberofURBEntries = 0;
uint32_t dwCURBEAllocationSize = 0;
uint32_t dwURBEntryAllocationSize = 0;
uint32_t dwPerThreadScratchSpace = 0;
uint32_t dwScratchSpaceBasePointer = 0;
MHW_VFE_SLICE_DISABLE eVfeSliceDisable = MHW_VFE_SLICE_ALL;
MHW_VFE_SCOREBOARD Scoreboard = {};
PMHW_KERNEL_STATE pKernelState = nullptr;
virtual ~MHW_VFE_PARAMS() {}
};
typedef MHW_VFE_PARAMS *PMHW_VFE_PARAMS;
typedef struct _MHW_CURBE_LOAD_PARAMS
{
PMHW_KERNEL_STATE pKernelState;
bool bOldInterface;
uint32_t dwCURBETotalDataLength;
uint32_t dwCURBEDataStartAddress;
} MHW_CURBE_LOAD_PARAMS, *PMHW_CURBE_LOAD_PARAMS;
typedef struct _MHW_ID_LOAD_PARAMS
{
PMHW_KERNEL_STATE pKernelState;
uint32_t dwNumKernelsLoaded;
uint32_t dwIdIdx;
uint32_t dwInterfaceDescriptorStartOffset;
uint32_t dwInterfaceDescriptorLength;
} MHW_ID_LOAD_PARAMS, *PMHW_ID_LOAD_PARAMS;
typedef struct _MHW_SIP_STATE_PARAMS
{
bool bSipKernel;
uint32_t dwSipBase;
} MHW_SIP_STATE_PARAMS, *PMHW_SIP_STATE_PARAMS;
typedef struct _MHW_WALKER_XY
{
union
{
struct
{
uint32_t x : 16;
uint32_t y : 16;
};
uint32_t value;
};
} MHW_WALKER_XY, *PMHW_WALKER_XY;
typedef struct _MHW_PALETTE_PARAMS
{
int32_t iPaletteID; //!< Palette ID
int32_t iNumEntries; //!< Palette entries in use
void* pPaletteData; //!< Palette data
} MHW_PALETTE_PARAMS, *PMHW_PALETTE_PARAMS;
typedef struct _MHW_CHROMAKEY_PARAMS
{
uint32_t dwIndex; //!< Chroma Key Index
uint32_t dwLow; //!< Chroma Key Low
uint32_t dwHigh; //!< Chroma Key High
} MHW_CHROMAKEY_PARAMS, *PMHW_CHROMAKEY_PARAMS;
// IMPORTANT - changes in this structure must
// be ported to CM_HAL_WALKER_PARAMS in cm_common.h
typedef struct _MHW_WALKER_PARAMS
{
uint32_t InterfaceDescriptorOffset : 5;
uint32_t CmWalkerEnable : 1;
uint32_t ColorCountMinusOne : 8;
uint32_t UseScoreboard : 1;
uint32_t ScoreboardMask : 8;
uint32_t MidLoopUnitX : 2;
uint32_t MidLoopUnitY : 2;
uint32_t MiddleLoopExtraSteps : 5;
uint32_t GroupIdLoopSelect : 24;
uint32_t : 8;
uint32_t InlineDataLength;
uint8_t* pInlineData;
uint32_t dwLocalLoopExecCount;
uint32_t dwGlobalLoopExecCount;
MHW_WALKER_MODE WalkerMode;
MHW_WALKER_XY BlockResolution;
MHW_WALKER_XY LocalStart;
MHW_WALKER_XY LocalEnd;
MHW_WALKER_XY LocalOutLoopStride;
MHW_WALKER_XY LocalInnerLoopUnit;
MHW_WALKER_XY GlobalResolution;
MHW_WALKER_XY GlobalStart;
MHW_WALKER_XY GlobalOutlerLoopStride;
MHW_WALKER_XY GlobalInnerLoopUnit;
bool bAddMediaFlush;
bool bRequestSingleSlice;
uint32_t IndirectDataLength;
uint32_t IndirectDataStartAddress;
} MHW_WALKER_PARAMS, *PMHW_WALKER_PARAMS;
typedef struct _MHW_GPGPU_WALKER_PARAMS
{
uint32_t InterfaceDescriptorOffset : 5;
uint32_t GpGpuEnable : 1;
uint32_t : 26;
uint32_t ThreadWidth;
uint32_t ThreadHeight;
uint32_t ThreadDepth;
uint32_t GroupWidth;
uint32_t GroupHeight;
uint32_t GroupDepth;
uint32_t GroupStartingX;
uint32_t GroupStartingY;
uint32_t GroupStartingZ;
uint32_t SLMSize;
uint32_t IndirectDataLength;
uint32_t IndirectDataStartAddress;
uint32_t BindingTableID;
uint32_t ForcePreferredSLMZero;
bool isEmitInlineParameter;
uint32_t inlineDataLength;
uint8_t* inlineData;
bool isGenerateLocalID;
MHW_EMIT_LOCAL_MODE emitLocal;
bool hasBarrier;
PMHW_INLINE_DATA_PARAMS inlineDataParamBase;
uint32_t inlineDataParamSize;
} MHW_GPGPU_WALKER_PARAMS, *PMHW_GPGPU_WALKER_PARAMS;
typedef struct _MHW_MEDIA_OBJECT_PARAMS
{
uint32_t dwInterfaceDescriptorOffset;
uint32_t dwHalfSliceDestinationSelect;
uint32_t dwSliceDestinationSelect;
uint32_t dwIndirectLoadLength;
uint32_t dwIndirectDataStartAddress;
void* pInlineData;
uint32_t dwInlineDataSize;
bool bForceDestination;
MHW_VFE_SCOREBOARD VfeScoreboard;
} MHW_MEDIA_OBJECT_PARAMS, *PMHW_MEDIA_OBJECT_PARAMS;
#endif // __MHW_RENDER_H__