|
226 | 226 | "sequence_length = len(initial_prompt_tokens[0])\n",
|
227 | 227 | "position_ids = np.arange(0, sequence_length).reshape(initial_prompt_tokens.shape)\n",
|
228 | 228 | "\n",
|
229 |
| - "output = ov_model({\"input_ids\": initial_prompt_tokens, \"position_ids\": position_ids})\n", |
230 |
| - "logits = output[\"logits\"]\n", |
| 229 | + "infer_request = ov_model.create_infer_request()\n", |
| 230 | + "infer_request.set_tensors({\"input_ids\": ov.Tensor(initial_prompt_tokens), \"position_ids\": ov.Tensor(position_ids)})\n", |
| 231 | + "infer_request.infer()\n", |
| 232 | + "logits = infer_request.get_tensor(\"logits\").data\n", |
| 233 | + "\n", |
231 | 234 | "curr_token_ids = np.argmax(logits[:, -1, :], axis=1).reshape([1, 1])\n",
|
232 | 235 | "\n",
|
233 | 236 | "MAX_TOKENS_GENERATED = 256\n",
|
|
245 | 248 | " curr_position_ids = np.ndarray([1, 1], dtype=np.int64)\n",
|
246 | 249 | " curr_position_ids[0][0] = next_position_id \n",
|
247 | 250 | " next_position_id += 1\n",
|
248 |
| - " curr_generated_output = ov_model({\"input_ids\": curr_token_ids, \"position_ids\": curr_position_ids})\n", |
249 |
| - " curr_logits = curr_generated_output[\"logits\"]\n", |
| 251 | + " \n", |
| 252 | + " infer_request.set_tensors({\"input_ids\": ov.Tensor(curr_token_ids), \"position_ids\": ov.Tensor(curr_position_ids)})\n", |
| 253 | + " infer_request.infer()\n", |
| 254 | + " curr_logits = infer_request.get_tensor(\"logits\").data\n", |
| 255 | + " \n", |
250 | 256 | " curr_token_ids = np.argmax(curr_logits[:, -1, :], axis=1).reshape([1, 1])\n",
|
251 | 257 | " last_token_id = curr_token_ids[0][0]\n",
|
252 | 258 | "\n",
|
253 |
| - "ov_model.create_infer_request().reset_state()" |
| 259 | + "infer_request.reset_state()" |
254 | 260 | ]
|
255 | 261 | },
|
256 | 262 | {
|
|
0 commit comments