From 17d811d7140c69937722033a9d90147fb8543904 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Mon, 17 Feb 2025 12:45:49 +0000 Subject: [PATCH 01/11] initial commit --- tools/llm_weight_compression/README.md | 29 ++ .../config_optimum_cli.json | 23 + tools/llm_weight_compression/requirements.txt | 1 + tools/llm_weight_compression/run.py | 405 ++++++++++++++++++ 4 files changed, 458 insertions(+) create mode 100644 tools/llm_weight_compression/README.md create mode 100644 tools/llm_weight_compression/config_optimum_cli.json create mode 100644 tools/llm_weight_compression/requirements.txt create mode 100644 tools/llm_weight_compression/run.py diff --git a/tools/llm_weight_compression/README.md b/tools/llm_weight_compression/README.md new file mode 100644 index 00000000000..c2d38caadcf --- /dev/null +++ b/tools/llm_weight_compression/README.md @@ -0,0 +1,29 @@ +# LLM Weight Compression Tool + +## Install + +```bash +python3.10 -m venv env +. env/bin/activate +pip install --upgrade pip + +pip install openvino==2025.0.0 +pip install nncf==2.15.0 +pip install "git+https://github.com/huggingface/optimum.git@v1.24.0" +pip install git+https://github.com/huggingface/optimum-intel.git@v1.22.0 + +# #whowhatbench +git clone --depth 1 --branch 2025.0.0.0 https://github.com/openvinotoolkit/openvino.genai.git + +cd openvino.genai/tools/who_what_benchmark +pip install . +``` + +```bash +# For test +python run.py \ +--model-id facebook/opt-125m \ +--config config_optimum_cli.json \ +--root-dir experiment_dir \ +--dump-packages +``` \ No newline at end of file diff --git a/tools/llm_weight_compression/config_optimum_cli.json b/tools/llm_weight_compression/config_optimum_cli.json new file mode 100644 index 00000000000..1441af5a366 --- /dev/null +++ b/tools/llm_weight_compression/config_optimum_cli.json @@ -0,0 +1,23 @@ +{ + "compression": { + "backend": "optimum_cli", + "params": [ + { + "task": ["text-generation"], + "weight_format": ["int4"], + "ratio": [0.2, 0.4], + "group_size": [64, 128], + "awq": [false, true], + "dataset": ["auto"] + } + ] + }, + "evaluation": { + "backend": "lm_eval", + "params": { + "tasks": ["wikitext"], + "device": "cpu", + "limit": 3 + } + } +} diff --git a/tools/llm_weight_compression/requirements.txt b/tools/llm_weight_compression/requirements.txt new file mode 100644 index 00000000000..464090415c4 --- /dev/null +++ b/tools/llm_weight_compression/requirements.txt @@ -0,0 +1 @@ +# TODO diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py new file mode 100644 index 00000000000..6f404543e18 --- /dev/null +++ b/tools/llm_weight_compression/run.py @@ -0,0 +1,405 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import itertools +import json +import shutil +import subprocess +from dataclasses import asdict +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Any, Dict, List, Optional + +from optimum.intel import OVModelForCausalLM +from tabulate import tabulate +from transformers import AutoTokenizer + + +class CompressBackendType(Enum): + OPTIMUM_CLI = "optimum_cli" + NNCF = "nncf" + + +def export_base_model(model_id: str, base_model_dir: Path) -> None: + """ + Exports a base openvino model into the following folder structure + + {ROOT_DIR} + |-- {encoded model ID} + |-- fp32 + |-- openvino_model.xml + |-- openvino_model.bin + |-- ... + + :param model_id: A model ID of a model hosted on the [Hub](https://huggingface.co/models). + :param base_model_dir: A directory where the model should be saved. + """ + model = OVModelForCausalLM.from_pretrained( + model_id=model_id, export=True, load_in_8bit=False, load_in_4bit=False, compile=False, trust_remote_code=True + ) + + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + + model.save_pretrained(base_model_dir) + tokenizer.save_pretrained(base_model_dir) + + +def dump_all_packages(output_file: str) -> None: + """ + Generates a list of all installed Python packages and save it to a file. + + :param output_file: The path to the file where the package list + should be saved. + """ + with open(output_file, "w") as f: + subprocess.run(["pip", "freeze"], stdout=f) + + +def load_json(path: str): + with open(path, encoding="utf8") as f: + return json.load(f) + + +def save_json(data, path: str, indent: int = 4): + with open(path, "w", encoding="utf8") as outfile: + json.dump(data, outfile, indent=indent) + + +# ------------------------------ Params Grid ------------------------------ + + +class Params: + """ """ + + def get_key(self) -> str: + """ """ + raise NotImplementedError + + def save_to_json(self, path: str) -> None: + """ + :param path: + """ + raise NotImplementedError + + +@dataclass +class OptimumCLIParams(Params): + # -------------------------------------- # + task: Optional[str] = None + trust_remote_code: Optional[bool] = True + weight_format: Optional[str] = "fp32" + # -------------------------------------- # + ratio: Optional[float] = None + sym: bool = False + group_size: Optional[int] = None + backup_precision: Optional[str] = None + dataset: Optional[str] = None + all_layers: bool = False + # -------------------------------------- # + awq: bool = False + scale_estimation: bool = False + gptq: bool = False + lora_correction: bool = False + + def get_key(self) -> str: + # Skipped: task, trust_remote_code + key_items = [] + key_items.append(f"{self.weight_format}") + if self.sym: + key_items.append("sym") + if self.ratio is not None: + key_items.append(f"r{self.ratio}") + if self.group_size is not None: + key_items.append(f"gs{self.group_size}") + if self.backup_precision is not None: + key_items.append(f"{self.backup_precision}") + if self.dataset: + key_items.append(f"{self.dataset}") + + for field_name in ["all_layers", "awq", "scale_estimation", "gptq", "lora_correction"]: + if getattr(self, field_name): + key_items.append(field_name) + + return "_".join(key_items) + + def save_to_json(self, path: str) -> None: + data = asdict(self) + save_json(data, path) + + +@dataclass +class NNCFAPIParams(Params): + pass + + +def optimum_cli_create_params_grid(compression_params: List[Dict[str, List[Any]]]) -> List[OptimumCLIParams]: + """ """ + params_grid = [] + for p in compression_params: + params_grid.extend(get_all_param_combinations(p, OptimumCLIParams)) + return params_grid + + +def nncf_create_params_grid(compression_params: List[Dict[str, List[Any]]]) -> List[NNCFAPIParams]: + raise NotImplementedError + + +def visualize_experiments(model_id: str, params_grid: List[Params]): + """ + :param model_id: + :param params_grid: + """ + rows = [[model_id, params.get_key()] for params in params_grid] + print("List of configurations to test out:") + print(tabulate(tabular_data=rows, headers=["Model ID", "Experiment"], tablefmt="mixed_grid")) + + +# ------------------------------ Params Grid ------------------------------ + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model-id", type=str, required=True, help="A model ID of a model hosted on the [Hub](https://huggingface.co/models)") + parser.add_argument("--config", type=str, required=True) + parser.add_argument("--root-dir", type=str, required=True) + parser.add_argument("--show-only", action="store_true") + parser.add_argument("--dump-packages", action="store_true") + return parser.parse_args() + + +def encode_model_id(model_id): + """ + :param model_id: + """ + # return name.replace("/", "_").replace(".", "_") + if "/" in model_id: + model_id = "/".join(model_id.split("/")[1:]) + return model_id.replace("/", "_").replace(".", "_") + + +def run_command(command: str) -> None: + print(f"Run command: {command}") + subprocess.run(command, check=True, shell=True) + + +def run_optimum_cli( + model_id: Path, + output_dir: Path, + params: OptimumCLIParams, + log_filename: Optional[str] = None, +) -> None: + """ + :param model_id: + :param output_dir: + :param params: + :param log_filename: + """ + cmd_line = "optimum-cli" + cmd_line += " export openvino" + cmd_line += f" --model {model_id}" + if params.task: + cmd_line += f" --task {params.task}" + if params.trust_remote_code: + cmd_line += " --trust-remote-code" + if params.weight_format: + cmd_line += f" --weight-format {params.weight_format}" + if params.ratio: + cmd_line += f" --ratio {params.ratio}" + if params.sym: + cmd_line += " --sym" + if params.group_size: + cmd_line += f" --group-size {params.group_size}" + if params.backup_precision: + cmd_line += f" --backup-precision {params.backup_precision}" + if params.dataset: + cmd_line += f" --dataset {params.dataset}" + if params.all_layers: + cmd_line += " --all-layers" + if params.awq: + cmd_line += " --awq" + if params.scale_estimation: + cmd_line += " --scale-estimation" + if params.gptq: + cmd_line += " --gptq" + if params.lora_correction: + cmd_line += " --lora-correction" + + # output argument + cmd_line += f" {output_dir.as_posix()}" + + if log_filename: + optimum_cli_log = output_dir.joinpath("optimum_cli_log.txt") + cmd_line += f" 2>&1 | tee -a {optimum_cli_log.as_posix()}" + + return run_command(cmd_line) + + +def run_nncf( + model_id: Path, + output_dir: Path, + params: NNCFAPIParams, + log_filename: Optional[str] = None, +) -> None: + """ + :param model_id: + :param output_dir: + :param params: + :param log_filename: + """ + raise NotImplementedError + + +def get_all_param_combinations(experiment: Dict[str, List[Any]], cls) -> List[Params]: + keys = experiment.keys() + values = experiment.values() + combinations = [cls(**dict(zip(keys, combination))) for combination in itertools.product(*values)] + return combinations + + +class EvaluateBackendType(Enum): + LM_EVAL = "lm_eval" + + +def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): + """ + :param model_dir: + :param evaluation_params: + """ + cmd_line = "lm_eval" + cmd_line += f" --model openvino" + + tasks_arg = ",".join(evaluation_params["tasks"]) + cmd_line += f" --tasks {tasks_arg}" + + cmd_line += f" --model_args pretrained={model_dir.as_posix()}" + + num_fewshot = evaluation_params.get("num_fewshot") + if num_fewshot: + cmd_line += f" --num_fewshot {num_fewshot}" + + batch_size = evaluation_params.get("batch_size") + if batch_size: + cmd_line += f" --batch_size {batch_size}" + + device = evaluation_params.get("device") + if device: + cmd_line += f" --device {device}" + + cmd_line += f" --output_path {model_dir.as_posix()}" + + limit = evaluation_params.get("limit") + if limit: + cmd_line += f" --limit {limit}" + + cmd_line += f" --trust_remote_code" + + return run_command(cmd_line) + + +def evaluate(model_dir: Path, evaluation_config: Dict[str, Any]): + """ + """ + backend = EvaluateBackendType(evaluation_config["backend"]) + evaluation_params = evaluation_config["params"] + + print(f"Run evaluation ({backend.name}): {model_dir.as_posix()}") + + if backend == EvaluateBackendType.LM_EVAL: + run_lm_eval_cli(model_dir, evaluation_params) + else: + raise NotImplementedError + + +def compress(model_id: str, + root_model_dir: Path, + compression_config: Dict[str, Any], + show_only: bool = False) -> None: + """ + :param model_id: + :param root_model_dir: + :param compression_config: + """ + backend = CompressBackendType(compression_config["backend"]) + compression_params = compression_config["params"] + + if backend == CompressBackendType.OPTIMUM_CLI: + grid = optimum_cli_create_params_grid(compression_params) + elif backend == CompressBackendType.NNCF: + grid = nncf_create_params_grid(compression_params) + + visualize_experiments(model_id, grid) + + if show_only: + return + + for params in grid: + EXPERIMENT_DIR = root_model_dir / params.get_key() + if EXPERIMENT_DIR.exists(): + shutil.rmtree(EXPERIMENT_DIR) + EXPERIMENT_DIR.mkdir(exist_ok=True, parents=True) + + print(f"Applying configuration: {params.get_key()}") + + if backend == CompressBackendType.OPTIMUM_CLI: + params_filename = "optimum_cli_params.json" + run_optimum_cli(model_id, EXPERIMENT_DIR, params) + elif backend == CompressBackendType.NNCF: + params_filename = "nncf_params.json" + run_nncf(model_id, EXPERIMENT_DIR, params) + + # --------- Save params --------- + print(f"Saving compression parameters: {EXPERIMENT_DIR / params_filename}") + params.save_to_json(EXPERIMENT_DIR / params_filename) + + +def main(): + args = parse_args() + + ROOT_DIR = Path(args.root_dir) + ROOT_MODEL_DIR = ROOT_DIR / encode_model_id(args.model_id) + + # --------- Export base model --------- + BASE_MODEL_DIR = ROOT_MODEL_DIR / "fp32" + if BASE_MODEL_DIR.exists(): + shutil.rmtree(BASE_MODEL_DIR) + BASE_MODEL_DIR.mkdir(exist_ok=True, parents=True) + print(f"Saving a base model: {BASE_MODEL_DIR}") + export_base_model(args.model_id, BASE_MODEL_DIR) + + config = load_json(args.config) + + # --------- Compress --------- + compression_config = config["compression"] + compress(args.model_id, ROOT_MODEL_DIR, compression_config) + + if args.show_only: + return + + # --------- Evaluate --------- + evaluation_config = config["evaluation"] + for model_dir in ROOT_MODEL_DIR.iterdir(): + if not model_dir.is_dir(): + continue + + try: + evaluate(model_dir, evaluation_config) + except Exception as e: + print(e) + + # --------- Save extra info --------- + if args.dump_packages: + dump_all_packages(ROOT_MODEL_DIR / "versions.txt") + + +if __name__ == "__main__": + main() From a007a437b08cc34c04fa07c2c52116ca524a3e20 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 18 Feb 2025 11:24:33 +0000 Subject: [PATCH 02/11] add lm_eval version --- tools/llm_weight_compression/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/llm_weight_compression/README.md b/tools/llm_weight_compression/README.md index c2d38caadcf..1294e740092 100644 --- a/tools/llm_weight_compression/README.md +++ b/tools/llm_weight_compression/README.md @@ -10,7 +10,8 @@ pip install --upgrade pip pip install openvino==2025.0.0 pip install nncf==2.15.0 pip install "git+https://github.com/huggingface/optimum.git@v1.24.0" -pip install git+https://github.com/huggingface/optimum-intel.git@v1.22.0 +pip install "git+https://github.com/huggingface/optimum-intel.git@v1.22.0" +pip install "git+https://github.com/EleutherAI/lm-evaluation-harness@v0.4.2" # #whowhatbench git clone --depth 1 --branch 2025.0.0.0 https://github.com/openvinotoolkit/openvino.genai.git From bd3a0014bb94db0814326c51798e4a78c4f95ff6 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 18 Feb 2025 11:41:01 +0000 Subject: [PATCH 03/11] minor improvements --- tools/llm_weight_compression/run.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index 6f404543e18..117fb7947af 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -37,9 +37,10 @@ def export_base_model(model_id: str, base_model_dir: Path) -> None: {ROOT_DIR} |-- {encoded model ID} |-- fp32 - |-- openvino_model.xml - |-- openvino_model.bin - |-- ... + |-- model + |-- openvino_model.xml + |-- openvino_model.bin + |-- ... :param model_id: A model ID of a model hosted on the [Hub](https://huggingface.co/models). :param base_model_dir: A directory where the model should be saved. @@ -50,8 +51,8 @@ def export_base_model(model_id: str, base_model_dir: Path) -> None: tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) - model.save_pretrained(base_model_dir) - tokenizer.save_pretrained(base_model_dir) + model.save_pretrained(base_model_dir.joinpath("model")) + tokenizer.save_pretrained(base_model_dir.joinpath("model")) def dump_all_packages(output_file: str) -> None: @@ -235,7 +236,7 @@ def run_optimum_cli( cmd_line += " --lora-correction" # output argument - cmd_line += f" {output_dir.as_posix()}" + cmd_line += f" {output_dir.joinpath('model').as_posix()}" if log_filename: optimum_cli_log = output_dir.joinpath("optimum_cli_log.txt") @@ -281,7 +282,7 @@ def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): tasks_arg = ",".join(evaluation_params["tasks"]) cmd_line += f" --tasks {tasks_arg}" - cmd_line += f" --model_args pretrained={model_dir.as_posix()}" + cmd_line += f" --model_args pretrained={model_dir.joinpath('model').as_posix()}" num_fewshot = evaluation_params.get("num_fewshot") if num_fewshot: @@ -295,7 +296,7 @@ def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): if device: cmd_line += f" --device {device}" - cmd_line += f" --output_path {model_dir.as_posix()}" + cmd_line += f" --output_path {model_dir.joinpath('lm_eval_results.json').as_posix()}" limit = evaluation_params.get("limit") if limit: From 6ec8952995a593bf4384073646f183d6d84cb631 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 18 Feb 2025 12:59:18 +0000 Subject: [PATCH 04/11] add who_what_benchmark --- ...m_cli.json => config_optimum_lm_eval.json} | 0 .../config_optimum_wwb.json | 23 ++++++++++++++ tools/llm_weight_compression/run.py | 30 ++++++++++++++++--- 3 files changed, 49 insertions(+), 4 deletions(-) rename tools/llm_weight_compression/{config_optimum_cli.json => config_optimum_lm_eval.json} (100%) create mode 100644 tools/llm_weight_compression/config_optimum_wwb.json diff --git a/tools/llm_weight_compression/config_optimum_cli.json b/tools/llm_weight_compression/config_optimum_lm_eval.json similarity index 100% rename from tools/llm_weight_compression/config_optimum_cli.json rename to tools/llm_weight_compression/config_optimum_lm_eval.json diff --git a/tools/llm_weight_compression/config_optimum_wwb.json b/tools/llm_weight_compression/config_optimum_wwb.json new file mode 100644 index 00000000000..e78fc602f9b --- /dev/null +++ b/tools/llm_weight_compression/config_optimum_wwb.json @@ -0,0 +1,23 @@ +{ + "compression": { + "backend": "optimum_cli", + "params": [ + { + "task": ["text-generation"], + "weight_format": ["int4"], + "ratio": [0.2], + "group_size": [64], + "awq": [false], + "dataset": ["auto"] + } + ] + }, + "evaluation": { + "backend": "who_what_benchmark", + "params": { + "model_type": "text", + "device": "CPU", + "language": "en" + } + } +} diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index 117fb7947af..c6baa37cbb1 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -269,6 +269,7 @@ def get_all_param_combinations(experiment: Dict[str, List[Any]], cls) -> List[Pa class EvaluateBackendType(Enum): LM_EVAL = "lm_eval" + WHO_WHAT_BENCHMARK = "who_what_benchmark" def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): @@ -307,7 +308,27 @@ def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): return run_command(cmd_line) -def evaluate(model_dir: Path, evaluation_config: Dict[str, Any]): +def run_who_what_benchmark_cli(model_dir: Path, base_model_dir: Path, evaluation_params: Dict[str, Any]): + if model_dir.resolve() == base_model_dir.resolve(): + return + + language = evaluation_params['language'] + gt_data_filename = f"gt_{language}.csv" + + cmd_line = "wwb" + cmd_line += f" --base-model {base_model_dir.joinpath('model')}" + cmd_line += f" --target-model {model_dir.joinpath('model')}" + cmd_line += f" --gt-data {base_model_dir.joinpath(gt_data_filename)}" + cmd_line += f" --model-type {evaluation_params['model_type']}" + cmd_line += f" --device {evaluation_params['device']}" + cmd_line += f" --language {language}" + # cmd_line += " --hf" + cmd_line += f" --output {model_dir.as_posix()}" + + return run_command(cmd_line) + + +def evaluate(model_dir: Path, base_model_dir: Path, evaluation_config: Dict[str, Any]): """ """ backend = EvaluateBackendType(evaluation_config["backend"]) @@ -317,8 +338,9 @@ def evaluate(model_dir: Path, evaluation_config: Dict[str, Any]): if backend == EvaluateBackendType.LM_EVAL: run_lm_eval_cli(model_dir, evaluation_params) - else: - raise NotImplementedError + + if backend == EvaluateBackendType.WHO_WHAT_BENCHMARK: + run_who_what_benchmark_cli(model_dir, base_model_dir, evaluation_params) def compress(model_id: str, @@ -393,7 +415,7 @@ def main(): continue try: - evaluate(model_dir, evaluation_config) + evaluate(model_dir, BASE_MODEL_DIR, evaluation_config) except Exception as e: print(e) From 478aef01c964542498498c0f6d2655ba2afe68df Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 18 Feb 2025 13:48:23 +0000 Subject: [PATCH 05/11] minor --- tools/llm_weight_compression/run.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index c6baa37cbb1..27169eb545f 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -272,7 +272,7 @@ class EvaluateBackendType(Enum): WHO_WHAT_BENCHMARK = "who_what_benchmark" -def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): +def run_lm_eval(model_dir: Path, evaluation_params: Dict[str, Any]): """ :param model_dir: :param evaluation_params: @@ -308,7 +308,7 @@ def run_lm_eval_cli(model_dir: Path, evaluation_params: Dict[str, Any]): return run_command(cmd_line) -def run_who_what_benchmark_cli(model_dir: Path, base_model_dir: Path, evaluation_params: Dict[str, Any]): +def run_who_what_benchmark(model_dir: Path, base_model_dir: Path, evaluation_params: Dict[str, Any]): if model_dir.resolve() == base_model_dir.resolve(): return @@ -337,10 +337,10 @@ def evaluate(model_dir: Path, base_model_dir: Path, evaluation_config: Dict[str, print(f"Run evaluation ({backend.name}): {model_dir.as_posix()}") if backend == EvaluateBackendType.LM_EVAL: - run_lm_eval_cli(model_dir, evaluation_params) + run_lm_eval(model_dir, evaluation_params) if backend == EvaluateBackendType.WHO_WHAT_BENCHMARK: - run_who_what_benchmark_cli(model_dir, base_model_dir, evaluation_params) + run_who_what_benchmark(model_dir, base_model_dir, evaluation_params) def compress(model_id: str, From 6adf764ef8277124dd2ab189e5f5889df375b93f Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 18 Feb 2025 16:34:36 +0000 Subject: [PATCH 06/11] parse results --- tools/llm_weight_compression/run.py | 77 ++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index 27169eb545f..94f834846dd 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -25,6 +25,10 @@ from transformers import AutoTokenizer +LM_EVAL_RESULTS_FILENAME = "lm_eval_results.json" +OPTIMUM_CLI_PARAMS_FILENAME = "optimum_cli_params.json" + + class CompressBackendType(Enum): OPTIMUM_CLI = "optimum_cli" NNCF = "nncf" @@ -297,7 +301,7 @@ def run_lm_eval(model_dir: Path, evaluation_params: Dict[str, Any]): if device: cmd_line += f" --device {device}" - cmd_line += f" --output_path {model_dir.joinpath('lm_eval_results.json').as_posix()}" + cmd_line += f" --output_path {model_dir.joinpath(LM_EVAL_RESULTS_FILENAME).as_posix()}" limit = evaluation_params.get("limit") if limit: @@ -374,7 +378,7 @@ def compress(model_id: str, print(f"Applying configuration: {params.get_key()}") if backend == CompressBackendType.OPTIMUM_CLI: - params_filename = "optimum_cli_params.json" + params_filename = OPTIMUM_CLI_PARAMS_FILENAME run_optimum_cli(model_id, EXPERIMENT_DIR, params) elif backend == CompressBackendType.NNCF: params_filename = "nncf_params.json" @@ -385,6 +389,72 @@ def compress(model_id: str, params.save_to_json(EXPERIMENT_DIR / params_filename) +class ResultsParser: + + @staticmethod + def parse_lm_eval(path: Path): + + METRICS = [ + "acc", + "ppl", + "word_perplexity", + "exact_match,strict-match", + "perplexity", + "similarity", + "fdt_norm", + ] + METRICS.extend([metric + ",none" for metric in METRICS]) + + data = load_json(path) + limit = data.get("config", {}).get("limit", None) + results_section = data.get("results") + + results = [] + for task, task_results in results_section.items(): + res = {} + for metric, value in task_results.items(): + res["task"] = task + + if metric in METRICS: + metric = metric.replace(",none", "") + res[metric] = value + res["limit"] = limit + results.append(res) + + return results + + @staticmethod + def parse_optimum_params(path: Path, fields: List[str]): + data = load_json(path) + return {field_name: data[field_name] for field_name in fields} + + @staticmethod + def parse(root_model_dir: Path): + c = {} # configuration_key -> {/* data */} + + for model_dir in root_model_dir.iterdir(): + if not model_dir.is_dir(): + continue + + configuration_key = model_dir.name + + c[configuration_key] = {} + c[configuration_key]["model"] = root_model_dir.name + c[configuration_key]["configuration"] = configuration_key + + # Parse the `lm_eval_results.json` file + path = model_dir.joinpath(LM_EVAL_RESULTS_FILENAME) + if path.exists(): + c[configuration_key]["lm_eval"] = ResultsParser.parse_lm_eval(path) + + # Parse the `optimum_cli_params.json` file + path = model_dir.joinpath(OPTIMUM_CLI_PARAMS_FILENAME) + if path.exists(): + c[configuration_key]["optimum_params"] = ResultsParser.parse_optimum_params(path, ["weight_format", "ratio", "group_size"]) # TODO + + # print(json.dumps(c, indent=4)) + + def main(): args = parse_args() @@ -423,6 +493,9 @@ def main(): if args.dump_packages: dump_all_packages(ROOT_MODEL_DIR / "versions.txt") + # --------- Parse results --------- + ResultsParser.parse(ROOT_MODEL_DIR) + if __name__ == "__main__": main() From a0b0dce413ffe253b669fa95f44e6375f195a7b4 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Tue, 18 Feb 2025 16:37:38 +0000 Subject: [PATCH 07/11] fix style --- tools/llm_weight_compression/run.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index 94f834846dd..4943d62088d 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -24,7 +24,6 @@ from tabulate import tabulate from transformers import AutoTokenizer - LM_EVAL_RESULTS_FILENAME = "lm_eval_results.json" OPTIMUM_CLI_PARAMS_FILENAME = "optimum_cli_params.json" @@ -174,7 +173,12 @@ def visualize_experiments(model_id: str, params_grid: List[Params]): def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument("--model-id", type=str, required=True, help="A model ID of a model hosted on the [Hub](https://huggingface.co/models)") + parser.add_argument( + "--model-id", + type=str, + required=True, + help="A model ID of a model hosted on the [Hub](https://huggingface.co/models)", + ) parser.add_argument("--config", type=str, required=True) parser.add_argument("--root-dir", type=str, required=True) parser.add_argument("--show-only", action="store_true") @@ -282,7 +286,7 @@ def run_lm_eval(model_dir: Path, evaluation_params: Dict[str, Any]): :param evaluation_params: """ cmd_line = "lm_eval" - cmd_line += f" --model openvino" + cmd_line += " --model openvino" tasks_arg = ",".join(evaluation_params["tasks"]) cmd_line += f" --tasks {tasks_arg}" @@ -307,7 +311,7 @@ def run_lm_eval(model_dir: Path, evaluation_params: Dict[str, Any]): if limit: cmd_line += f" --limit {limit}" - cmd_line += f" --trust_remote_code" + cmd_line += " --trust_remote_code" return run_command(cmd_line) @@ -316,7 +320,7 @@ def run_who_what_benchmark(model_dir: Path, base_model_dir: Path, evaluation_par if model_dir.resolve() == base_model_dir.resolve(): return - language = evaluation_params['language'] + language = evaluation_params["language"] gt_data_filename = f"gt_{language}.csv" cmd_line = "wwb" @@ -333,8 +337,7 @@ def run_who_what_benchmark(model_dir: Path, base_model_dir: Path, evaluation_par def evaluate(model_dir: Path, base_model_dir: Path, evaluation_config: Dict[str, Any]): - """ - """ + """ """ backend = EvaluateBackendType(evaluation_config["backend"]) evaluation_params = evaluation_config["params"] @@ -347,10 +350,7 @@ def evaluate(model_dir: Path, base_model_dir: Path, evaluation_config: Dict[str, run_who_what_benchmark(model_dir, base_model_dir, evaluation_params) -def compress(model_id: str, - root_model_dir: Path, - compression_config: Dict[str, Any], - show_only: bool = False) -> None: +def compress(model_id: str, root_model_dir: Path, compression_config: Dict[str, Any], show_only: bool = False) -> None: """ :param model_id: :param root_model_dir: @@ -450,7 +450,9 @@ def parse(root_model_dir: Path): # Parse the `optimum_cli_params.json` file path = model_dir.joinpath(OPTIMUM_CLI_PARAMS_FILENAME) if path.exists(): - c[configuration_key]["optimum_params"] = ResultsParser.parse_optimum_params(path, ["weight_format", "ratio", "group_size"]) # TODO + c[configuration_key]["optimum_params"] = ResultsParser.parse_optimum_params( + path, ["weight_format", "ratio", "group_size"] + ) # TODO # print(json.dumps(c, indent=4)) From caa1c91b8a4ac151c167be044cb688b196dafefc Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 19 Feb 2025 12:03:07 +0000 Subject: [PATCH 08/11] update --- tools/llm_weight_compression/README.md | 5 +- tools/llm_weight_compression/run.py | 86 ++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/tools/llm_weight_compression/README.md b/tools/llm_weight_compression/README.md index 1294e740092..cfeca63d2e3 100644 --- a/tools/llm_weight_compression/README.md +++ b/tools/llm_weight_compression/README.md @@ -12,6 +12,7 @@ pip install nncf==2.15.0 pip install "git+https://github.com/huggingface/optimum.git@v1.24.0" pip install "git+https://github.com/huggingface/optimum-intel.git@v1.22.0" pip install "git+https://github.com/EleutherAI/lm-evaluation-harness@v0.4.2" +pip install xlsxwriter # #whowhatbench git clone --depth 1 --branch 2025.0.0.0 https://github.com/openvinotoolkit/openvino.genai.git @@ -24,7 +25,7 @@ pip install . # For test python run.py \ --model-id facebook/opt-125m \ ---config config_optimum_cli.json \ +--config config_optimum_lm_eval.json \ --root-dir experiment_dir \ --dump-packages -``` \ No newline at end of file +``` diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index 4943d62088d..c0ed94bcc67 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -20,6 +20,8 @@ from pathlib import Path from typing import Any, Dict, List, Optional +import pandas as pd + from optimum.intel import OVModelForCausalLM from tabulate import tabulate from transformers import AutoTokenizer @@ -450,11 +452,84 @@ def parse(root_model_dir: Path): # Parse the `optimum_cli_params.json` file path = model_dir.joinpath(OPTIMUM_CLI_PARAMS_FILENAME) if path.exists(): + # TODO(andrey-churkin): Add more fields c[configuration_key]["optimum_params"] = ResultsParser.parse_optimum_params( path, ["weight_format", "ratio", "group_size"] - ) # TODO - - # print(json.dumps(c, indent=4)) + ) + + return c + + +def save_results(results: Dict[str, Dict[str, Any]], root_path: Path): +# { +# "int4_r0.2_gs64_auto": { +# "model": "opt-125m", +# "configuration": "int4_r0.2_gs64_auto", +# "lm_eval": [{...}, ...], +# "optimum_params": { +# "weight_format": "int4", +# "ratio": 0.2, +# "group_size": 64 +# } +# }, +# "fp32": { +# "model": "opt-125m", +# "configuration": "fp32", +# "lm_eval": [{...}, ...], +# }, +# ... +# } + rows: List[Dict[str, Any]] = [] + for val in results.values(): + row = { + "model": val["model"], + "configuration": val["configuration"], + } + # Add optimum params + row.update(val.get("optimum_params", {})) + + # Add lm_eval results + lm_eval = val.get("lm_eval", []) + for dct in lm_eval: + new_row = row.copy() + new_row.update(dct) + rows.append(new_row) + + pd.set_option("display.precision", 2) + df = pd.DataFrame(rows) + df.to_csv(root_path / "raw_results.csv") + + dump_to_excel(df, root_path / "results.xlsx") + + +def dump_to_excel(df, output_path: Path): + # to have all columns, not only pivot's values, but also index one. + print(df.columns) + + writer = pd.ExcelWriter(output_path, engine="xlsxwriter") + df.to_excel(writer, sheet_name="all", index=False) + # (max_row, max_col) = df.shape + workbook = writer.book + worksheet = writer.sheets["all"] + + format1 = workbook.add_format({"num_format": "#,##0.00"}) + worksheet.set_column("A:X", 18, format1) + col_names = [{"header": col_name} for col_name in df.columns] + worksheet.add_table( + 0, + 0, + df.shape[0], + df.shape[1] - 1, + { + "columns": col_names, + # 'style' = option Format as table value and is case sensitive + # (look at the exact name into Excel) + "style": None, + }, + ) + worksheet.autofit() + workbook.close() + print("Path to parsed results: ", output_path.resolve()) def main(): @@ -496,7 +571,10 @@ def main(): dump_all_packages(ROOT_MODEL_DIR / "versions.txt") # --------- Parse results --------- - ResultsParser.parse(ROOT_MODEL_DIR) + results = ResultsParser.parse(ROOT_MODEL_DIR) + + # --------- Save results --------- + save_results(results, ROOT_MODEL_DIR) if __name__ == "__main__": From 59e188e020c8c5f6f45c06caa46459bc701e2086 Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 19 Feb 2025 15:22:56 +0000 Subject: [PATCH 09/11] update --- tools/llm_weight_compression/run.py | 36 ++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index c0ed94bcc67..dcab07411dd 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -28,6 +28,7 @@ LM_EVAL_RESULTS_FILENAME = "lm_eval_results.json" OPTIMUM_CLI_PARAMS_FILENAME = "optimum_cli_params.json" +WWB_METRICS_FILENAME = "metrics.csv" class CompressBackendType(Enum): @@ -394,7 +395,7 @@ def compress(model_id: str, root_model_dir: Path, compression_config: Dict[str, class ResultsParser: @staticmethod - def parse_lm_eval(path: Path): + def parse_lm_eval_metrics(path: Path): METRICS = [ "acc", @@ -425,6 +426,14 @@ def parse_lm_eval(path: Path): return results + @staticmethod + def parse_who_what_benchmark_metrics(path: Path): + # TODO(andrey-churkin): Clarify possible field names + df = pd.read_csv(path) + return { + "similarity": float(df['similarity'][0]), + } + @staticmethod def parse_optimum_params(path: Path, fields: List[str]): data = load_json(path) @@ -447,7 +456,13 @@ def parse(root_model_dir: Path): # Parse the `lm_eval_results.json` file path = model_dir.joinpath(LM_EVAL_RESULTS_FILENAME) if path.exists(): - c[configuration_key]["lm_eval"] = ResultsParser.parse_lm_eval(path) + c[configuration_key]["lm_eval"] = ResultsParser.parse_lm_eval_metrics(path) + + # Parse the WWB metrics file + path = model_dir.joinpath(WWB_METRICS_FILENAME) + if path.exists(): + # TODO(andrey-churkin): Find the format specification for the `metrics.csv` file + c[configuration_key]["who_what_benchmark"] = ResultsParser.parse_who_what_benchmark_metrics(path) # Parse the `optimum_cli_params.json` file path = model_dir.joinpath(OPTIMUM_CLI_PARAMS_FILENAME) @@ -485,15 +500,24 @@ def save_results(results: Dict[str, Dict[str, Any]], root_path: Path): "model": val["model"], "configuration": val["configuration"], } + # Add optimum params row.update(val.get("optimum_params", {})) + # Add who_what_benchmark results + row.update(val.get("who_what_benchmark", {})) # Add lm_eval results lm_eval = val.get("lm_eval", []) - for dct in lm_eval: - new_row = row.copy() - new_row.update(dct) - rows.append(new_row) + if lm_eval: + new_rows = [] + for dct in lm_eval: + new_row = row.copy() + new_row.update(dct) + new_rows.append(new_row) + else: + new_rows = [row] + + rows.extend(new_rows) pd.set_option("display.precision", 2) df = pd.DataFrame(rows) From b5f72d5d9dc3fdc2fa0de6e77b1194ec97ecfe2c Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Wed, 19 Feb 2025 16:43:09 +0000 Subject: [PATCH 10/11] minor update --- tools/llm_weight_compression/run.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index dcab07411dd..24899a93e6a 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -428,11 +428,14 @@ def parse_lm_eval_metrics(path: Path): @staticmethod def parse_who_what_benchmark_metrics(path: Path): - # TODO(andrey-churkin): Clarify possible field names df = pd.read_csv(path) - return { - "similarity": float(df['similarity'][0]), - } + + val = {} + for name in df: + if name in ["similarity", "FDT", "FDT norm", "SDT", "SDT norm"]: + val[name] = float(df[name][0]) + + return val @staticmethod def parse_optimum_params(path: Path, fields: List[str]): From 3e4cfca6942df39b929e9f825c348e79569e1a3e Mon Sep 17 00:00:00 2001 From: Andrey Churkin Date: Thu, 20 Feb 2025 14:25:03 +0000 Subject: [PATCH 11/11] minor improvements --- tools/llm_weight_compression/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/llm_weight_compression/run.py b/tools/llm_weight_compression/run.py index 24899a93e6a..5ddc6d6e42a 100644 --- a/tools/llm_weight_compression/run.py +++ b/tools/llm_weight_compression/run.py @@ -167,7 +167,7 @@ def visualize_experiments(model_id: str, params_grid: List[Params]): :param params_grid: """ rows = [[model_id, params.get_key()] for params in params_grid] - print("List of configurations to test out:") + print(f"List of configurations to test out ({len(params_grid)}):") print(tabulate(tabular_data=rows, headers=["Model ID", "Experiment"], tablefmt="mixed_grid")) @@ -577,7 +577,7 @@ def main(): # --------- Compress --------- compression_config = config["compression"] - compress(args.model_id, ROOT_MODEL_DIR, compression_config) + compress(args.model_id, ROOT_MODEL_DIR, compression_config, show_only=args.show_only) if args.show_only: return