From 00be2445e091824ac4a784222bc4cc9cbb75ae1c Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 3 Feb 2025 21:45:47 +0400 Subject: [PATCH] fix stateful seq2seq model inference perfomance --- optimum/intel/openvino/modeling_seq2seq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 983f1f6850..ccc5bb1b44 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -668,7 +668,7 @@ def forward( logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device) self._past_length += input_ids.shape[1] - out_past_key_values = () + out_past_key_values = ((),) if not self.stateful: # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the