@@ -196,26 +196,26 @@ def wrapper(module, target_op, *args, **kwargs):
196
196
extension = [
197
197
ModuleExtension (
198
198
PagedAttention ,
199
- extension = lambda module : 'PagedAttentionPlaceholder ' ,
199
+ extension = lambda module : 'PagedAttentionExtension ' ,
200
200
replacer = lambda module , * args , ** kwargs : args [0 ],
201
201
wrapper = wrapper
202
- )
202
+ ),
203
+ 'libuser_ov_extensions.so'
203
204
]
204
205
)
205
206
206
- for input_name , input_data , input_tensor in zip (input_names , flatten_input , ov_model .inputs ):
207
+ for input_data , input_tensor in zip (flatten_input , ov_model .inputs ):
207
208
if input_tensor .element_type .is_dynamic ():
208
209
input_tensor .get_node ().set_element_type (ov_dtype_maping [input_data .dtype ])
209
210
if input_tensor .partial_shape .rank .is_dynamic :
210
211
input_tensor .get_node ().set_partial_shape (ov .PartialShape ([- 1 ]* input_data .ndim ))
211
- #input_tensor.get_tensor().set_names({input_name})
212
212
213
213
for out_name , out in zip (output_names , ov_model .outputs ):
214
214
out .get_tensor ().set_names ({out_name })
215
215
ov_model .validate_nodes_and_infer_types ()
216
216
#ov.save_model(ov_model, "vllm_openvino_model.xml")
217
217
print ('>>>>>>>>>>>>> OV MODEL CONVERTED' )
218
- print (ov_model )
218
+ # print(ov_model)
219
219
ov_compiled = ov .compile_model (ov_model )
220
220
221
221
from functools import partial
@@ -243,6 +243,7 @@ def wrapper(*args, **kwargs):
243
243
inputs .append (input_metadata .block_tables )
244
244
#for input in inputs:
245
245
# print(f'{input.dtype} wiht shape {input.shape}' if isinstance(input, torch.Tensor) else type(input))
246
+ #print('input_metadata.slot_mapping:', input_metadata.slot_mapping)
246
247
result = ov_compiled (inputs , share_outputs = False )
247
248
#print(f'result: {type(result)}')
248
249
return torch .from_numpy (result [0 ])
0 commit comments