|
177 | 177 | "\n",
|
178 | 178 | "quantizer = OVQuantizer.from_pretrained(model)\n",
|
179 | 179 | "\n",
|
| 180 | + "\n", |
180 | 181 | "def preprocess_function(examples, tokenizer):\n",
|
181 | 182 | " return tokenizer(examples[\"sentence\"], padding=\"max_length\", max_length=384, truncation=True)\n",
|
182 | 183 | "\n",
|
| 184 | + "\n", |
183 | 185 | "calibration_dataset = quantizer.get_calibration_dataset(\n",
|
184 | 186 | " \"glue\",\n",
|
185 | 187 | " dataset_config_name=\"sst2\",\n",
|
|
194 | 196 | "tokenizer.save_pretrained(int8_ptq_model_path)"
|
195 | 197 | ]
|
196 | 198 | },
|
197 |
| - { |
198 |
| - "cell_type": "code", |
199 |
| - "execution_count": null, |
200 |
| - "metadata": {}, |
201 |
| - "outputs": [], |
202 |
| - "source": [] |
203 |
| - }, |
204 | 199 | {
|
205 | 200 | "cell_type": "markdown",
|
206 | 201 | "metadata": {},
|
|
216 | 211 | "source": [
|
217 | 212 | "from transformers import Pipeline\n",
|
218 | 213 | "import torch.nn.functional as F\n",
|
219 |
| - "import torch \n", |
| 214 | + "import torch\n", |
| 215 | + "\n", |
220 | 216 | "\n",
|
221 | 217 | "# copied from the model card\n",
|
222 | 218 | "def mean_pooling(model_output, attention_mask):\n",
|
223 |
| - " token_embeddings = model_output[0] #First element of model_output contains all token embeddings\n", |
| 219 | + " token_embeddings = model_output[0] # First element of model_output contains all token embeddings\n", |
224 | 220 | " input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()\n",
|
225 | 221 | " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n",
|
226 | 222 | "\n",
|
|
230 | 226 | " # we don\"t have any hyperameters to sanitize\n",
|
231 | 227 | " preprocess_kwargs = {}\n",
|
232 | 228 | " return preprocess_kwargs, {}, {}\n",
|
233 |
| - " \n", |
| 229 | + "\n", |
234 | 230 | " def preprocess(self, inputs):\n",
|
235 | 231 | " encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors=\"pt\")\n",
|
236 | 232 | " return encoded_inputs\n",
|
|
283 | 279 | "from datasets import load_dataset\n",
|
284 | 280 | "from evaluate import load\n",
|
285 | 281 | "\n",
|
286 |
| - "eval_dataset = load_dataset(\"glue\",\"stsb\",split=\"validation\")\n", |
| 282 | + "eval_dataset = load_dataset(\"glue\", \"stsb\", split=\"validation\")\n", |
287 | 283 | "metric = load(\"glue\", \"stsb\")"
|
288 | 284 | ]
|
289 | 285 | },
|
|
315 | 311 | }
|
316 | 312 | ],
|
317 | 313 | "source": [
|
318 |
| - "def compute_sentence_similarity(sentence_1, sentence_2,pipeline):\n", |
| 314 | + "def compute_sentence_similarity(sentence_1, sentence_2, pipeline):\n", |
319 | 315 | " embedding_1 = pipeline(sentence_1)\n",
|
320 | 316 | " embedding_2 = pipeline(sentence_2)\n",
|
321 | 317 | " # compute cosine similarity between two sentences\n",
|
322 | 318 | " return torch.nn.functional.cosine_similarity(embedding_1, embedding_2, dim=1)\n",
|
323 | 319 | "\n",
|
324 | 320 | "\n",
|
325 | 321 | "def evaluate_stsb(example):\n",
|
326 |
| - " default = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], vanilla_emb)\n", |
327 |
| - " quantized = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], q8_emb)\n", |
328 |
| - " return {\n", |
329 |
| - " \"reference\": (example[\"label\"] - 1) / (5 - 1), # rescale to [0,1]\n", |
330 |
| - " \"default\": float(default),\n", |
331 |
| - " \"quantized\": float(quantized),\n", |
332 |
| - " }\n", |
| 322 | + " default = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], vanilla_emb)\n", |
| 323 | + " quantized = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], q8_emb)\n", |
| 324 | + " return {\n", |
| 325 | + " \"reference\": (example[\"label\"] - 1) / (5 - 1), # rescale to [0,1]\n", |
| 326 | + " \"default\": float(default),\n", |
| 327 | + " \"quantized\": float(quantized),\n", |
| 328 | + " }\n", |
| 329 | + "\n", |
333 | 330 | "\n",
|
334 | 331 | "result = eval_dataset.map(evaluate_stsb)"
|
335 | 332 | ]
|
|
353 | 350 | "default_acc = metric.compute(predictions=result[\"default\"], references=result[\"reference\"])\n",
|
354 | 351 | "quantized = metric.compute(predictions=result[\"quantized\"], references=result[\"reference\"])\n",
|
355 | 352 | "\n",
|
356 |
| - "print(\"vanilla model: pearson=\", default_acc['pearson'])\n", |
357 |
| - "print(\"quantized model: pearson=\", quantized['pearson'])\n", |
358 |
| - "print(\"The quantized model achieves \", round(quantized[\"pearson\"]/default_acc[\"pearson\"],2)*100, \"% accuracy of the fp32 model\")" |
| 353 | + "print(\"vanilla model: pearson=\", default_acc[\"pearson\"])\n", |
| 354 | + "print(\"quantized model: pearson=\", quantized[\"pearson\"])\n", |
| 355 | + "print(\n", |
| 356 | + " \"The quantized model achieves \",\n", |
| 357 | + " round(quantized[\"pearson\"] / default_acc[\"pearson\"], 2) * 100,\n", |
| 358 | + " \"% accuracy of the fp32 model\",\n", |
| 359 | + ")" |
359 | 360 | ]
|
360 | 361 | },
|
361 | 362 | {
|
|
0 commit comments