Fixed small issue. Results are the same.

AlexKoff88 · AlexKoff88 · commit 6de610c1d8f5 · 2024-10-19T09:36:10.000+04:00
diff --git a/notebooks/openvino/sentence_transformer_quantization.ipynb b/notebooks/openvino/sentence_transformer_quantization.ipynb
@@ -44,7 +44,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fd8729d418f3453bb1d97a2b038ff072",
+       "model_id": "53d4d1f1703a4e52812ea366c06f2d67",
        "version_major": 2,
        "version_minor": 0
       },
@@ -68,7 +68,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "758502bfa6a142cc9078b8404a4b5d78",
+       "model_id": "a3de9a9bbdd942069b85519c83267f83",
        "version_major": 2,
        "version_minor": 0
       },
@@ -92,7 +92,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e82786c713694f0da616dee6164aa242",
+       "model_id": "ebc55f3ce3974aaa8861474699d5a15f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -116,7 +116,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "344f6318563c4bed8dbedbdee08c9b59",
+       "model_id": "f206e4e8651f4f449f9dcb1fc11ef266",
        "version_major": 2,
        "version_minor": 0
       },
@@ -267,7 +267,7 @@
     "vanilla_emb = SentenceEmbeddingPipeline(model=model, tokenizer=tokenizer)\n",
     "\n",
     "q_model = OVModelForFeatureExtraction.from_pretrained(int8_ptq_model_path)\n",
-    "q8_emb = SentenceEmbeddingPipeline(model=model, tokenizer=tokenizer)"
+    "q8_emb = SentenceEmbeddingPipeline(model=q_model, tokenizer=tokenizer)"
    ]
   },
   {
@@ -292,13 +292,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Parameter 'function'=<function evaluate_stsb at 0x7f62282ab380> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n"
+      "Parameter 'function'=<function evaluate_stsb at 0x7f92780c7600> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6b39c9e0096a423bbcda949bede6a9cb",
+       "model_id": "0f28df147f95484c955c3f20f2f954d2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -341,7 +341,7 @@
      "output_type": "stream",
      "text": [
       "vanilla model: pearson= 0.869619439095004\n",
-      "quantized model: pearson= 0.869619439095004\n",
+      "quantized model: pearson= 0.869326218489249\n",
       "The quantized model achieves  100.0 % accuracy of the fp32 model\n"
      ]
     }
@@ -368,7 +368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -391,13 +391,7 @@
       "[ INFO ] OpenVINO:\n",
       "[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
       "[ INFO ] \n",
-      "[ INFO ] Device info:\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "[ INFO ] Device info:\n",
       "[ INFO ] CPU\n",
       "[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
       "[ INFO ] \n",
@@ -406,7 +400,7 @@
       "[ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.LATENCY.\n",
       "[Step 4/11] Reading model files\n",
       "[ INFO ] Loading model files\n",
-      "[ INFO ] Read model took 11.28 ms\n",
+      "[ INFO ] Read model took 10.87 ms\n",
       "[ INFO ] Original model I/O parameters:\n",
       "[ INFO ] Model inputs:\n",
       "[ INFO ]     input_ids (node: input_ids) : i64 / [...] / [?,?]\n",
@@ -417,7 +411,7 @@
       "[Step 5/11] Resizing model to match image sizes and given batch\n",
       "[ INFO ] Model batch size: 1\n",
       "[ INFO ] Reshaping model: 'input_ids': [1,384], 'attention_mask': [1,384], 'token_type_ids': [1,384]\n",
-      "[ INFO ] Reshape model took 2.58 ms\n",
+      "[ INFO ] Reshape model took 3.02 ms\n",
       "[Step 6/11] Configuring input of the model\n",
       "[ INFO ] Model inputs:\n",
       "[ INFO ]     input_ids (node: input_ids) : i64 / [...] / [1,384]\n",
@@ -426,7 +420,7 @@
       "[ INFO ] Model outputs:\n",
       "[ INFO ]     last_hidden_state (node: __module.encoder.layer.5.output.LayerNorm/aten::layer_norm/Add) : f32 / [...] / [1,384,384]\n",
       "[Step 7/11] Loading the model to the device\n",
-      "[ INFO ] Compile model took 132.33 ms\n",
+      "[ INFO ] Compile model took 125.14 ms\n",
       "[Step 8/11] Querying optimal runtime parameters\n",
       "[ INFO ] Model:\n",
       "[ INFO ]   NETWORK_NAME: Model0\n",
@@ -458,17 +452,23 @@
       "[ INFO ] Fill input 'token_type_ids' with random values \n",
       "[Step 10/11] Measuring performance (Start inference synchronously, limits: 200 iterations)\n",
       "[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).\n",
-      "[ INFO ] First inference took 14.89 ms\n",
+      "[ INFO ] First inference took 13.97 ms\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "[Step 11/11] Dumping statistics report\n",
       "[ INFO ] Execution Devices:['CPU']\n",
       "[ INFO ] Count:            200 iterations\n",
-      "[ INFO ] Duration:         2067.30 ms\n",
+      "[ INFO ] Duration:         1988.82 ms\n",
       "[ INFO ] Latency:\n",
-      "[ INFO ]    Median:        9.88 ms\n",
-      "[ INFO ]    Average:       10.15 ms\n",
-      "[ INFO ]    Min:           9.60 ms\n",
-      "[ INFO ]    Max:           11.37 ms\n",
-      "[ INFO ] Throughput:   96.74 FPS\n"
+      "[ INFO ]    Median:        9.70 ms\n",
+      "[ INFO ]    Average:       9.77 ms\n",
+      "[ INFO ]    Min:           9.54 ms\n",
+      "[ INFO ]    Max:           11.35 ms\n",
+      "[ INFO ] Throughput:   100.56 FPS\n"
      ]
     }
    ],
@@ -479,7 +479,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -502,13 +502,7 @@
       "[ INFO ] OpenVINO:\n",
       "[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
       "[ INFO ] \n",
-      "[ INFO ] Device info:\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "[ INFO ] Device info:\n",
       "[ INFO ] CPU\n",
       "[ INFO ] Build ................................. 2024.5.0-16971-8a02b4c17bb\n",
       "[ INFO ] \n",
@@ -517,7 +511,7 @@
       "[ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.LATENCY.\n",
       "[Step 4/11] Reading model files\n",
       "[ INFO ] Loading model files\n",
-      "[ INFO ] Read model took 21.99 ms\n",
+      "[ INFO ] Read model took 15.46 ms\n",
       "[ INFO ] Original model I/O parameters:\n",
       "[ INFO ] Model inputs:\n",
       "[ INFO ]     input_ids (node: input_ids) : i64 / [...] / [?,?]\n",
@@ -528,7 +522,7 @@
       "[Step 5/11] Resizing model to match image sizes and given batch\n",
       "[ INFO ] Model batch size: 1\n",
       "[ INFO ] Reshaping model: 'input_ids': [1,384], 'attention_mask': [1,384], 'token_type_ids': [1,384]\n",
-      "[ INFO ] Reshape model took 3.60 ms\n",
+      "[ INFO ] Reshape model took 6.89 ms\n",
       "[Step 6/11] Configuring input of the model\n",
       "[ INFO ] Model inputs:\n",
       "[ INFO ]     input_ids (node: input_ids) : i64 / [...] / [1,384]\n",
@@ -537,7 +531,7 @@
       "[ INFO ] Model outputs:\n",
       "[ INFO ]     last_hidden_state (node: __module.encoder.layer.5.output.LayerNorm/aten::layer_norm/Add) : f32 / [...] / [1,384,384]\n",
       "[Step 7/11] Loading the model to the device\n",
-      "[ INFO ] Compile model took 324.67 ms\n",
+      "[ INFO ] Compile model took 325.40 ms\n",
       "[Step 8/11] Querying optimal runtime parameters\n",
       "[ INFO ] Model:\n",
       "[ INFO ]   NETWORK_NAME: Model0\n",
@@ -569,17 +563,17 @@
       "[ INFO ] Fill input 'token_type_ids' with random values \n",
       "[Step 10/11] Measuring performance (Start inference synchronously, limits: 200 iterations)\n",
       "[ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop).\n",
-      "[ INFO ] First inference took 9.54 ms\n",
+      "[ INFO ] First inference took 8.49 ms\n",
       "[Step 11/11] Dumping statistics report\n",
       "[ INFO ] Execution Devices:['CPU']\n",
       "[ INFO ] Count:            200 iterations\n",
-      "[ INFO ] Duration:         906.86 ms\n",
+      "[ INFO ] Duration:         869.96 ms\n",
       "[ INFO ] Latency:\n",
-      "[ INFO ]    Median:        4.19 ms\n",
-      "[ INFO ]    Average:       4.42 ms\n",
-      "[ INFO ]    Min:           4.09 ms\n",
-      "[ INFO ]    Max:           5.56 ms\n",
-      "[ INFO ] Throughput:   220.54 FPS\n"
+      "[ INFO ]    Median:        4.17 ms\n",
+      "[ INFO ]    Average:       4.23 ms\n",
+      "[ INFO ]    Min:           4.08 ms\n",
+      "[ INFO ]    Max:           6.04 ms\n",
+      "[ INFO ] Throughput:   229.89 FPS\n"
      ]
     }
    ],