@@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L
29
29
| databricks/dolly-v2-12b | ✖ | ✔ | ✖ |
30
30
| EleutherAI/gpt-neox-20b | ✖ | ✔ | ✔ |
31
31
| mistralai/Mistral-7B-v0.1 | ✖ | ✔ | ✔ |
32
- | THUDM/chatglm2-6b | WIP | ✔ | ✔ |
33
- | THUDM/chatglm3-6b | WIP | ✔ | ✔ |
32
+ | THUDM/chatglm2-6b | ✔ | ✔ | ✔ |
33
+ | THUDM/chatglm3-6b | WIP | ✔ | WIP |
34
34
35
35
** Detail recipes can be found [ HERE] ( https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md ) .**
36
36
@@ -40,8 +40,8 @@ This document aims to publish the specific recipes we achieved for the popular L
40
40
> - The WIP recipes will be published soon.
41
41
42
42
## Large Language Models Accuracy
43
- < table >
44
- <thead >
43
+
44
+ <table >< thead >
45
45
<tr >
46
46
<th rowspan="3">Model</th>
47
47
<th colspan="9">lambada_openai</th>
@@ -63,212 +63,210 @@ This document aims to publish the specific recipes we achieved for the popular L
63
63
<th>Ratio</th>
64
64
<th>ACC</th>
65
65
<th>Ratio</th>
66
- </tr >
67
- </thead >
66
+ </tr ></thead >
68
67
<tbody >
69
68
<tr >
70
69
<td>baichuan-inc/Baichuan-13B-Chat</td>
71
70
<td>67.57%</td>
72
- <td>68.23 %</td>
73
- <td>1.0098 </td>
74
- <td>67.57 %</td>
75
- <td>1.0000 </td>
76
- <td>67.84 %</td>
77
- <td>1.0040 </td>
78
- <td>NA </td>
79
- <td>NA </td>
71
+ <td>69.07 %</td>
72
+ <td>1.0222 </td>
73
+ <td>67.55 %</td>
74
+ <td>0.9997 </td>
75
+ <td>68.12 %</td>
76
+ <td>1.0081 </td>
77
+ <td>66.93% </td>
78
+ <td>0.9905 </td>
80
79
</tr >
81
80
<tr >
82
81
<td>baichuan-inc/Baichuan2-13B-Chat</td>
83
82
<td>71.51%</td>
84
- <td>70.89 %</td>
85
- <td>0.9913 </td>
86
- <td>71.53 %</td>
87
- <td>1.0003 </td>
88
- <td>71.76 %</td>
89
- <td>1.0035 </td>
90
- <td>NA </td>
91
- <td>NA </td>
83
+ <td>75.57 %</td>
84
+ <td>1.0568 </td>
85
+ <td>71.57 %</td>
86
+ <td>1.0008 </td>
87
+ <td>70.81 %</td>
88
+ <td>0.9902 </td>
89
+ <td>N/A </td>
90
+ <td>N/A </td>
92
91
</tr >
93
92
<tr >
94
93
<td>baichuan-inc/Baichuan2-7B-Chat</td>
95
94
<td>67.67%</td>
96
- <td>67.96 %</td>
97
- <td>1.0043 </td>
98
- <td>67.59 %</td>
99
- <td>0.9988 </td>
100
- <td>67.24 %</td>
101
- <td>0.9936 </td>
102
- <td>67.42% </td>
103
- <td>0.9963 </td>
95
+ <td>68.06 %</td>
96
+ <td>1.0058 </td>
97
+ <td>67.61 %</td>
98
+ <td>0.9991 </td>
99
+ <td>67.90 %</td>
100
+ <td>1.0034 </td>
101
+ <td>N/A </td>
102
+ <td>N/A </td>
104
103
</tr >
105
104
<tr >
106
105
<td>bigscience/bloom-1b7</td>
107
106
<td>46.34%</td>
108
107
<td>47.99%</td>
109
108
<td>1.0356</td>
110
- <td>46.38 %</td>
111
- <td>1.0009 </td>
112
- <td>46.19 %</td>
113
- <td>0.9968 </td>
114
- <td>NA </td>
115
- <td>NA </td>
109
+ <td>46.21 %</td>
110
+ <td>0.9972 </td>
111
+ <td>46.90 %</td>
112
+ <td>1.0121 </td>
113
+ <td>N/A </td>
114
+ <td>N/A </td>
116
115
</tr >
117
116
<tr >
118
117
<td>databricks/dolly-v2-12b</td>
119
118
<td>64.35%</td>
120
- <td>NA </td>
121
- <td>NA </td>
122
- <td>64.10 %</td>
123
- <td>0.9961 </td>
124
- <td>NA </td>
125
- <td>NA </td>
126
- <td>NA </td>
127
- <td>NA </td>
119
+ <td>N/A </td>
120
+ <td>N/A </td>
121
+ <td>63.92 %</td>
122
+ <td>0.9933 </td>
123
+ <td>N/A </td>
124
+ <td>N/A </td>
125
+ <td>N/A </td>
126
+ <td>N/A </td>
128
127
</tr >
129
128
<tr >
130
129
<td>EleutherAI/gpt-j-6b</td>
131
130
<td>68.31%</td>
132
- <td>68.33 %</td>
133
- <td>1.0003 </td>
134
- <td>68.23 %</td>
135
- <td>0.9988 </td>
136
- <td>68.79 %</td>
137
- <td>1.0070 </td>
138
- <td>68.43 %</td>
139
- <td>1.0018 </td>
131
+ <td>68.27 %</td>
132
+ <td>0.9994 </td>
133
+ <td>68.27 %</td>
134
+ <td>0.9994 </td>
135
+ <td>68.35 %</td>
136
+ <td>1.0006 </td>
137
+ <td>68.02 %</td>
138
+ <td>0.9958 </td>
140
139
</tr >
141
140
<tr >
142
141
<td>EleutherAI/gpt-neox-20b</td>
143
142
<td>72.33%</td>
144
- <td>NA </td>
145
- <td>NA </td>
146
- <td>72.25 %</td>
147
- <td>0.9989 </td>
148
- <td>71.96 %</td>
149
- <td>0.9949 </td>
150
- <td>NA </td>
151
- <td>NA </td>
143
+ <td>N/A </td>
144
+ <td>N/A </td>
145
+ <td>72.29 %</td>
146
+ <td>0.9994 </td>
147
+ <td>71.74 %</td>
148
+ <td>0.9918 </td>
149
+ <td>N/A </td>
150
+ <td>N/A </td>
152
151
</tr >
153
152
<tr >
154
153
<td>facebook/opt-1.3b</td>
155
154
<td>57.89%</td>
156
- <td>57.54 %</td>
157
- <td>0.9940 </td>
158
- <td>58.08 %</td>
159
- <td>1.0033 </td>
160
- <td>58.57 %</td>
161
- <td>1.0117 </td>
162
- <td>NA </td>
163
- <td>NA </td>
155
+ <td>57.68 %</td>
156
+ <td>0.9964 </td>
157
+ <td>58.12 %</td>
158
+ <td>1.0040 </td>
159
+ <td>58.26 %</td>
160
+ <td>1.0064 </td>
161
+ <td>N/A </td>
162
+ <td>N/A </td>
164
163
</tr >
165
164
<tr >
166
165
<td>facebook/opt-30b</td>
167
166
<td>71.49%</td>
168
- <td>71.51 %</td>
169
- <td>1.0003 </td>
170
- <td>71.51 %</td>
171
- <td>1.0003 </td>
172
- <td>71.82 %</td>
173
- <td>1.0046 </td>
174
- <td>72.11 %</td>
175
- <td>1.0087 </td>
167
+ <td>71.78 %</td>
168
+ <td>1.0041 </td>
169
+ <td>71.53 %</td>
170
+ <td>1.0006 </td>
171
+ <td>71.59 %</td>
172
+ <td>1.0014 </td>
173
+ <td>71.80 %</td>
174
+ <td>1.0043 </td>
176
175
</tr >
177
176
<tr >
178
177
<td>meta-llama/Llama-2-13b-hf</td>
179
178
<td>76.77%</td>
180
179
<td>76.25%</td>
181
180
<td>0.9932</td>
182
- <td>76.75 %</td>
183
- <td>0.9997 </td>
184
- <td>77.43 %</td>
185
- <td>1.0086 </td>
186
- <td>76.75 %</td>
187
- <td>0.9997 </td>
181
+ <td>76.89 %</td>
182
+ <td>1.0016 </td>
183
+ <td>77.66 %</td>
184
+ <td>1.0116 </td>
185
+ <td>76.60 %</td>
186
+ <td>0.9978 </td>
188
187
</tr >
189
188
<tr >
190
189
<td>meta-llama/Llama-2-70b-hf</td>
191
190
<td>79.64%</td>
192
- <td>79.55 %</td>
193
- <td>0.9989 </td>
194
- <td>79.57 %</td>
195
- <td>0.9991 </td>
191
+ <td>79.14 %</td>
192
+ <td>0.9937 </td>
193
+ <td>79.62 %</td>
194
+ <td>0.9997 </td>
196
195
<td>80.09%</td>
197
196
<td>1.0057</td>
198
- <td>79.97 %</td>
199
- <td>1.0041 </td>
197
+ <td>79.68 %</td>
198
+ <td>1.0005 </td>
200
199
</tr >
201
200
<tr >
202
201
<td>meta-llama/Llama-2-7b-hf</td>
203
202
<td>73.92%</td>
204
203
<td>73.45%</td>
205
204
<td>0.9936</td>
206
- <td>73.96 %</td>
207
- <td>1.0005 </td>
208
- <td>73.45 %</td>
209
- <td>0.9936 </td>
210
- <td>73.49% </td>
211
- <td>0.9942 </td>
205
+ <td>73.90 %</td>
206
+ <td>0.9997 </td>
207
+ <td>73.84 %</td>
208
+ <td>0.9989 </td>
209
+ <td>N/A </td>
210
+ <td>N/A </td>
212
211
</tr >
213
212
<tr >
214
213
<td>mistralai/Mistral-7B-v0.1</td>
215
214
<td>75.90%</td>
216
- <td>NA </td>
217
- <td>NA </td>
215
+ <td>N/A </td>
216
+ <td>N/A </td>
218
217
<td>75.80%</td>
219
218
<td>0.9987</td>
220
- <td>76.13 %</td>
221
- <td>1.0030 </td>
222
- <td>75.61 %</td>
223
- <td>0.9962 </td>
219
+ <td>76.25 %</td>
220
+ <td>1.0046 </td>
221
+ <td>75.74 %</td>
222
+ <td>0.9979 </td>
224
223
</tr >
225
224
<tr >
226
225
<td>THUDM/chatglm2-6b</td>
227
226
<td>53.23%</td>
228
- <td>NA </td>
229
- <td>NA </td>
230
- <td>53.19 %</td>
231
- <td>0.9992 </td>
232
- <td>52.77 %</td>
233
- <td>0.9914 </td>
234
- <td>53.35 %</td>
235
- <td>1.0023 </td>
227
+ <td>52.86% </td>
228
+ <td>0.9930 </td>
229
+ <td>53.00 %</td>
230
+ <td>0.9957 </td>
231
+ <td>52.90 %</td>
232
+ <td>0.9938 </td>
233
+ <td>52.92 %</td>
234
+ <td>0.9942 </td>
236
235
</tr >
237
236
<tr >
238
237
<td>THUDM/chatglm3-6b</td>
239
238
<td>59.09%</td>
240
- <td>NA </td>
241
- <td>NA </td>
242
- <td>59.01 %</td>
243
- <td>0.9986 </td>
244
- <td>NA </td>
245
- <td>NA </td>
246
- <td>58.61% </td>
247
- <td>0.9919 </td>
239
+ <td>N/A </td>
240
+ <td>N/A </td>
241
+ <td>59.03 %</td>
242
+ <td>0.9990 </td>
243
+ <td>N/A </td>
244
+ <td>N/A </td>
245
+ <td>N/A </td>
246
+ <td>N/A </td>
248
247
</tr >
249
248
<tr >
250
249
<td>tiiuae/falcon-40b</td>
251
250
<td>77.22%</td>
252
- <td>77.04 %</td>
253
- <td>0.9977 </td>
254
- <td>77.22 %</td>
255
- <td>1.0000 </td>
256
- <td>77.94 %</td>
257
- <td>1.0093 </td>
258
- <td>78.79 %</td>
259
- <td>1.0203 </td>
251
+ <td>76.95 %</td>
252
+ <td>0.9965 </td>
253
+ <td>77.18 %</td>
254
+ <td>0.9995 </td>
255
+ <td>77.55 %</td>
256
+ <td>1.0043 </td>
257
+ <td>77.82 %</td>
258
+ <td>1.0078 </td>
260
259
</tr >
261
260
<tr >
262
261
<td>tiiuae/falcon-7b</td>
263
262
<td>74.67%</td>
264
- <td>76.44 %</td>
265
- <td>1.0237 </td>
266
- <td>74.77 %</td>
267
- <td>1.0013 </td>
268
- <td>75.00 %</td>
269
- <td>1.0044 </td>
270
- <td>NA </td>
271
- <td>NA </td>
263
+ <td>76.63 %</td>
264
+ <td>1.0262 </td>
265
+ <td>74.73 %</td>
266
+ <td>1.0008 </td>
267
+ <td>75.06 %</td>
268
+ <td>1.0052 </td>
269
+ <td>74.00% </td>
270
+ <td>0.9910 </td>
272
271
</tr >
273
- </tbody >
274
- </table >
272
+ </tbody ></table >
0 commit comments