Skip to content

Commit b6220d5

Browse files
committed
Style
1 parent 0bf2325 commit b6220d5

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

notebooks/openvino/sentence_transformer_quantization.ipynb

+23-22
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,11 @@
177177
"\n",
178178
"quantizer = OVQuantizer.from_pretrained(model)\n",
179179
"\n",
180+
"\n",
180181
"def preprocess_function(examples, tokenizer):\n",
181182
" return tokenizer(examples[\"sentence\"], padding=\"max_length\", max_length=384, truncation=True)\n",
182183
"\n",
184+
"\n",
183185
"calibration_dataset = quantizer.get_calibration_dataset(\n",
184186
" \"glue\",\n",
185187
" dataset_config_name=\"sst2\",\n",
@@ -194,13 +196,6 @@
194196
"tokenizer.save_pretrained(int8_ptq_model_path)"
195197
]
196198
},
197-
{
198-
"cell_type": "code",
199-
"execution_count": null,
200-
"metadata": {},
201-
"outputs": [],
202-
"source": []
203-
},
204199
{
205200
"cell_type": "markdown",
206201
"metadata": {},
@@ -216,11 +211,12 @@
216211
"source": [
217212
"from transformers import Pipeline\n",
218213
"import torch.nn.functional as F\n",
219-
"import torch \n",
214+
"import torch\n",
215+
"\n",
220216
"\n",
221217
"# copied from the model card\n",
222218
"def mean_pooling(model_output, attention_mask):\n",
223-
" token_embeddings = model_output[0] #First element of model_output contains all token embeddings\n",
219+
" token_embeddings = model_output[0] # First element of model_output contains all token embeddings\n",
224220
" input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()\n",
225221
" return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n",
226222
"\n",
@@ -230,7 +226,7 @@
230226
" # we don\"t have any hyperameters to sanitize\n",
231227
" preprocess_kwargs = {}\n",
232228
" return preprocess_kwargs, {}, {}\n",
233-
" \n",
229+
"\n",
234230
" def preprocess(self, inputs):\n",
235231
" encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors=\"pt\")\n",
236232
" return encoded_inputs\n",
@@ -283,7 +279,7 @@
283279
"from datasets import load_dataset\n",
284280
"from evaluate import load\n",
285281
"\n",
286-
"eval_dataset = load_dataset(\"glue\",\"stsb\",split=\"validation\")\n",
282+
"eval_dataset = load_dataset(\"glue\", \"stsb\", split=\"validation\")\n",
287283
"metric = load(\"glue\", \"stsb\")"
288284
]
289285
},
@@ -315,21 +311,22 @@
315311
}
316312
],
317313
"source": [
318-
"def compute_sentence_similarity(sentence_1, sentence_2,pipeline):\n",
314+
"def compute_sentence_similarity(sentence_1, sentence_2, pipeline):\n",
319315
" embedding_1 = pipeline(sentence_1)\n",
320316
" embedding_2 = pipeline(sentence_2)\n",
321317
" # compute cosine similarity between two sentences\n",
322318
" return torch.nn.functional.cosine_similarity(embedding_1, embedding_2, dim=1)\n",
323319
"\n",
324320
"\n",
325321
"def evaluate_stsb(example):\n",
326-
" default = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], vanilla_emb)\n",
327-
" quantized = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], q8_emb)\n",
328-
" return {\n",
329-
" \"reference\": (example[\"label\"] - 1) / (5 - 1), # rescale to [0,1]\n",
330-
" \"default\": float(default),\n",
331-
" \"quantized\": float(quantized),\n",
332-
" }\n",
322+
" default = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], vanilla_emb)\n",
323+
" quantized = compute_sentence_similarity(example[\"sentence1\"], example[\"sentence2\"], q8_emb)\n",
324+
" return {\n",
325+
" \"reference\": (example[\"label\"] - 1) / (5 - 1), # rescale to [0,1]\n",
326+
" \"default\": float(default),\n",
327+
" \"quantized\": float(quantized),\n",
328+
" }\n",
329+
"\n",
333330
"\n",
334331
"result = eval_dataset.map(evaluate_stsb)"
335332
]
@@ -353,9 +350,13 @@
353350
"default_acc = metric.compute(predictions=result[\"default\"], references=result[\"reference\"])\n",
354351
"quantized = metric.compute(predictions=result[\"quantized\"], references=result[\"reference\"])\n",
355352
"\n",
356-
"print(\"vanilla model: pearson=\", default_acc['pearson'])\n",
357-
"print(\"quantized model: pearson=\", quantized['pearson'])\n",
358-
"print(\"The quantized model achieves \", round(quantized[\"pearson\"]/default_acc[\"pearson\"],2)*100, \"% accuracy of the fp32 model\")"
353+
"print(\"vanilla model: pearson=\", default_acc[\"pearson\"])\n",
354+
"print(\"quantized model: pearson=\", quantized[\"pearson\"])\n",
355+
"print(\n",
356+
" \"The quantized model achieves \",\n",
357+
" round(quantized[\"pearson\"] / default_acc[\"pearson\"], 2) * 100,\n",
358+
" \"% accuracy of the fp32 model\",\n",
359+
")"
359360
]
360361
},
361362
{

0 commit comments

Comments
 (0)