From aa3d22cec9b19eb48000719c74a51610bd42c5b7 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Tue, 25 Feb 2025 14:46:01 -0500 Subject: [PATCH 1/4] force gpu usage for blockwise8, add info to supported precisions --- nvflare/app_opt/pt/quantization/constant.py | 10 +++-- .../app_opt/pt/quantization/dequantizor.py | 18 ++++---- nvflare/app_opt/pt/quantization/quantizor.py | 43 +++++++++---------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/nvflare/app_opt/pt/quantization/constant.py b/nvflare/app_opt/pt/quantization/constant.py index b26970c8dd..d4d6acb1e7 100644 --- a/nvflare/app_opt/pt/quantization/constant.py +++ b/nvflare/app_opt/pt/quantization/constant.py @@ -12,15 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Supported Input Data Type +# Message quantization is mainly for reducing the message that can be +# significantly large, e.g. LLMs. Thus, the supported input data types +# we consider are common ones during LLM training, including fp32, fp16, and bf16. DATA_TYPE = [ - "FLOAT64", "FLOAT32", "FLOAT16", "BFLOAT16", - "UINT8", - "INT8", ] +# Supported Quantization Type to reduce the above input data types +# The quantization types are mainly for reducing the model size, +# Hence, we support 16-, 8-, and 4-bits quantization. QUANTIZATION_TYPE = [ "FLOAT16", "BLOCKWISE8", diff --git a/nvflare/app_opt/pt/quantization/dequantizor.py b/nvflare/app_opt/pt/quantization/dequantizor.py index bd63da429d..94fd067ff8 100644 --- a/nvflare/app_opt/pt/quantization/dequantizor.py +++ b/nvflare/app_opt/pt/quantization/dequantizor.py @@ -84,17 +84,18 @@ def dequantization( params[param_name] = values elif quantization_type in ["blockwise8", "float4", "normfloat4"]: # use bitsandbytes to dequantize the values + # need GPU for general support # extract quantization state if quantization_type == "blockwise8": if source_data_format == "numpy": # first convert numpy array to tensor if numpy - quantized = torch.as_tensor(values) - absmax = torch.as_tensor(quant_state[param_name]["absmax"]) - code = torch.as_tensor(quant_state[param_name]["code"]) + quantized = torch.as_tensor(values).cuda() + absmax = torch.as_tensor(quant_state[param_name]["absmax"]).cuda() + code = torch.as_tensor(quant_state[param_name]["code"]).cuda() elif source_data_format == "torch": - quantized = values - absmax = quant_state[param_name]["absmax"] - code = quant_state[param_name]["code"] + quantized = values.cuda() + absmax = quant_state[param_name]["absmax"].cuda() + code = quant_state[param_name]["code"].cuda() # de-quanitze dequantized = dequantize_blockwise(quantized, absmax=absmax, code=code) else: @@ -125,6 +126,7 @@ def dequantization( dequantized = dequantize_4bit(quantized, quantize_state, quant_type="fp4") else: dequantized = dequantize_4bit(quantized, quantize_state, quant_type="nf4") + if source_data_format == "numpy": params[param_name] = dequantized.cpu().numpy() elif source_data_format == "torch": @@ -135,16 +137,12 @@ def dequantization( # convert back to original data type if source_data_type == "float32": params[param_name] = params[param_name].astype(np.float32) - elif source_data_type == "float64": - params[param_name] = params[param_name].astype(np.float64) elif source_data_type == "float16": params[param_name] = params[param_name].astype(np.float16) elif source_data_format == "torch": # convert back to original data type if source_data_type == "float32": params[param_name] = params[param_name].float() - elif source_data_type == "float64": - params[param_name] = params[param_name].double() elif source_data_type == "float16": params[param_name] = params[param_name].half() elif source_data_type == "bfloat16": diff --git a/nvflare/app_opt/pt/quantization/quantizor.py b/nvflare/app_opt/pt/quantization/quantizor.py index 43f7f7c117..9c38eedae9 100644 --- a/nvflare/app_opt/pt/quantization/quantizor.py +++ b/nvflare/app_opt/pt/quantization/quantizor.py @@ -120,33 +120,31 @@ def quantization(self, params: dict, fl_ctx: FLContext): elif self.quantization_type in ["blockwise8", "float4", "normfloat4"]: # use bitsandbytes to quantize the values # input is a tensor, output is a tuple of (quantized tensor, quantized_state) - if self.quantization_type == "blockwise8": - if source_data_format == "numpy": - # if numpy, first convert numpy array to tensor - values_tensor = torch.as_tensor(values) - elif source_data_format == "torch": - values_tensor = values - # then quantize the tensor + # CPU has limited support for 8- and 4-bits quantization + # For general purpose, here we use GPU + if source_data_format == "numpy": + # if numpy, first convert numpy array to tensor, need to use GPU + values_tensor = torch.as_tensor(values).cuda() + elif source_data_format == "torch": + # if torch, directly use the tensor, need to use GPU + values_tensor = values.cuda() + + if self.quantization_type == "blockwise8": + # quantize the tensor quantized, quantized_state = quantize_blockwise(values_tensor) # add the quantization state and values, keep source data format if source_data_format == "numpy": - quant_state[param_name]["absmax"] = quantized_state.absmax.numpy() - quant_state[param_name]["code"] = quantized_state.code.numpy() - values = quantized.numpy() + quant_state[param_name]["absmax"] = quantized_state.absmax.cpu().numpy() + quant_state[param_name]["code"] = quantized_state.code.cpu().numpy() + values = quantized.cpu().numpy() elif source_data_format == "torch": - quant_state[param_name]["absmax"] = quantized_state.absmax - quant_state[param_name]["code"] = quantized_state.code - values = quantized + quant_state[param_name]["absmax"] = quantized_state.absmax.cpu() + quant_state[param_name]["code"] = quantized_state.code.cpu() + values = quantized.cpu() n_bytes_meta += quant_state[param_name]["absmax"].nbytes n_bytes_meta += quant_state[param_name]["code"].nbytes else: - if source_data_format == "numpy": - # if numpy, first convert numpy array to tensor, need to use GPU - values_tensor = torch.as_tensor(values).cuda() - elif source_data_format == "torch": - # if torch, directly use the tensor, need to use GPU - values_tensor = values.cuda() # then quantize the tensor if self.quantization_type == "float4": quantized, quantized_state = quantize_4bit(values_tensor, quant_type="fp4") @@ -154,7 +152,7 @@ def quantization(self, params: dict, fl_ctx: FLContext): quantized, quantized_state = quantize_4bit(values_tensor, quant_type="nf4") # add the quantization state and values, keep source data format quantized_state = quantized_state.as_dict() - + # prepared the message for state_name, state in quantized_state.items(): if isinstance(state, torch.Tensor): if source_data_format == "numpy": @@ -171,6 +169,7 @@ def quantization(self, params: dict, fl_ctx: FLContext): values = quantized.cpu().numpy() elif source_data_format == "torch": values = quantized.cpu() + params[param_name] = values n_bytes_after += params[param_name].nbytes @@ -203,8 +202,8 @@ def process_dxo(self, dxo: DXO, shareable: Shareable, fl_ctx: FLContext) -> Unio # thus the subsequent communications to the rest of clients will no longer need to apply quantization # This will not apply to client job, since the client job will be 1-1 and quantization applies to each client # Potentially: - # If clients talks to each other, it will also be 1-N and same rule applies - # If 1-N server-client filters can be different (Filter_1 applies to server-client_subset_1, etc.), then + # - If clients talks to each other, it will also be 1-N and same rule applies + # - If 1-N server-client filters can be different (Filter_1 applies to server-client_subset_1, etc.), then # a deep copy of the server data should be made by filter before applying a different filter # quantized_flag None if does not exist in meta From 63b952719498cc374bc5ea41944913873b2d6ca4 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 27 Feb 2025 14:28:34 -0500 Subject: [PATCH 2/4] class rename --- examples/advanced/llm_hf/sft_job.py | 8 ++++---- .../08.2_llm_sft/sft_job.py | 8 ++++---- .../08.3_llm_peft/peft_job.py | 8 ++++---- .../08.4_llm_quantization/sft_job.py | 8 ++++---- .../pt/quantization/{dequantizor.py => dequantizer.py} | 2 +- .../pt/quantization/{quantizor.py => quantizer.py} | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) rename nvflare/app_opt/pt/quantization/{dequantizor.py => dequantizer.py} (99%) rename nvflare/app_opt/pt/quantization/{quantizor.py => quantizer.py} (99%) diff --git a/examples/advanced/llm_hf/sft_job.py b/examples/advanced/llm_hf/sft_job.py index c13cbb11d1..791bbd08eb 100644 --- a/examples/advanced/llm_hf/sft_job.py +++ b/examples/advanced/llm_hf/sft_job.py @@ -19,8 +19,8 @@ from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector from nvflare.app_common.workflows.fedavg import FedAvg from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor -from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor -from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor +from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer +from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer from nvflare.job_config.script_runner import ScriptRunner @@ -67,8 +67,8 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizor(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizor() + quantizor = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizor = ModelDequantizer() job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py index ad73a4da8c..b64795a4ea 100644 --- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py +++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py @@ -19,8 +19,8 @@ from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector from nvflare.app_common.workflows.fedavg import FedAvg from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor -from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor -from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor +from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer +from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer from nvflare.job_config.script_runner import ScriptRunner @@ -67,8 +67,8 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizor(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizor() + quantizor = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizor = ModelDequantizer() job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py index 5e6fd99f4e..e80a406fad 100644 --- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py +++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py @@ -19,8 +19,8 @@ from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector from nvflare.app_common.workflows.fedavg import FedAvg from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor -from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor -from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor +from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer +from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer from nvflare.job_config.script_runner import ScriptRunner @@ -67,8 +67,8 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizor(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizor() + quantizor = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizor = ModelDequantizer() job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py index c13cbb11d1..791bbd08eb 100644 --- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py +++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py @@ -19,8 +19,8 @@ from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector from nvflare.app_common.workflows.fedavg import FedAvg from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor -from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor -from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor +from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer +from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer from nvflare.job_config.script_runner import ScriptRunner @@ -67,8 +67,8 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizor(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizor() + quantizor = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizor = ModelDequantizer() job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) diff --git a/nvflare/app_opt/pt/quantization/dequantizor.py b/nvflare/app_opt/pt/quantization/dequantizer.py similarity index 99% rename from nvflare/app_opt/pt/quantization/dequantizor.py rename to nvflare/app_opt/pt/quantization/dequantizer.py index 94fd067ff8..9d56723fb6 100644 --- a/nvflare/app_opt/pt/quantization/dequantizor.py +++ b/nvflare/app_opt/pt/quantization/dequantizer.py @@ -26,7 +26,7 @@ from nvflare.app_opt.pt.quantization.constant import QUANTIZATION_TYPE -class ModelDequantizor(DXOFilter): +class ModelDequantizer(DXOFilter): def __init__(self): """Filter to dequantize Shareable object to recover from quantization diff --git a/nvflare/app_opt/pt/quantization/quantizor.py b/nvflare/app_opt/pt/quantization/quantizer.py similarity index 99% rename from nvflare/app_opt/pt/quantization/quantizor.py rename to nvflare/app_opt/pt/quantization/quantizer.py index 9c38eedae9..91ca53d72c 100644 --- a/nvflare/app_opt/pt/quantization/quantizor.py +++ b/nvflare/app_opt/pt/quantization/quantizer.py @@ -26,7 +26,7 @@ from nvflare.app_opt.pt.quantization.constant import DATA_TYPE, QUANTIZATION_TYPE -class ModelQuantizor(DXOFilter): +class ModelQuantizer(DXOFilter): def __init__( self, quantization_type="float16", From 0d4c196bb5b5c3e598d27776fc0bd25b74fe9c19 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 27 Feb 2025 14:31:22 -0500 Subject: [PATCH 3/4] add GPU note in constant --- nvflare/app_opt/pt/quantization/constant.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nvflare/app_opt/pt/quantization/constant.py b/nvflare/app_opt/pt/quantization/constant.py index d4d6acb1e7..731b51f837 100644 --- a/nvflare/app_opt/pt/quantization/constant.py +++ b/nvflare/app_opt/pt/quantization/constant.py @@ -25,6 +25,7 @@ # Supported Quantization Type to reduce the above input data types # The quantization types are mainly for reducing the model size, # Hence, we support 16-, 8-, and 4-bits quantization. +# Note that 8- and 4-bits quantization needs GPU support. QUANTIZATION_TYPE = [ "FLOAT16", "BLOCKWISE8", From c3396a58e88531fa65d4d1aa9cec86678769b888 Mon Sep 17 00:00:00 2001 From: Ziyue Xu Date: Thu, 27 Feb 2025 16:11:10 -0500 Subject: [PATCH 4/4] name update --- examples/advanced/llm_hf/sft_job.py | 12 ++++++------ .../08.2_llm_sft/sft_job.py | 12 ++++++------ .../08.3_llm_peft/peft_job.py | 12 ++++++------ .../08.4_llm_quantization/sft_job.py | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/examples/advanced/llm_hf/sft_job.py b/examples/advanced/llm_hf/sft_job.py index 791bbd08eb..e767b6f617 100644 --- a/examples/advanced/llm_hf/sft_job.py +++ b/examples/advanced/llm_hf/sft_job.py @@ -67,10 +67,10 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizer(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizer() - job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) - job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) + quantizer = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizer = ModelDequantizer() + job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) # Define the model persistor and send to server # First send the model to the server @@ -106,8 +106,8 @@ def main(): job.to(runner, site_name, tasks=["train"]) if args.quantize_mode: - job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) - job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) + job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) # Export the job print("job_dir=", job_dir) diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py index b64795a4ea..bbc0f6b91b 100644 --- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py +++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py @@ -67,10 +67,10 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizer(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizer() - job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) - job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) + quantizer = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizer = ModelDequantizer() + job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) # Define the model persistor and send to server # First send the model to the server @@ -106,8 +106,8 @@ def main(): job.to(runner, site_name, tasks=["train"]) if args.quantize_mode: - job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) - job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) + job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) # Export the job print("job_dir=", job_dir) diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py index e80a406fad..6bd90d12b2 100644 --- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py +++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py @@ -67,10 +67,10 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizer(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizer() - job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) - job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) + quantizer = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizer = ModelDequantizer() + job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) # Define the model persistor and send to server # First send the model to the server @@ -106,8 +106,8 @@ def main(): job.to(runner, site_name, tasks=["train"]) if args.quantize_mode: - job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) - job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) + job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) # Export the job print("job_dir=", job_dir) diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py index 791bbd08eb..e767b6f617 100644 --- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py +++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py @@ -67,10 +67,10 @@ def main(): if args.quantize_mode: # If using quantization, add quantize filters. - quantizor = ModelQuantizer(quantization_type=args.quantize_mode) - dequantizor = ModelDequantizer() - job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) - job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) + quantizer = ModelQuantizer(quantization_type=args.quantize_mode) + dequantizer = ModelDequantizer() + job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT) # Define the model persistor and send to server # First send the model to the server @@ -106,8 +106,8 @@ def main(): job.to(runner, site_name, tasks=["train"]) if args.quantize_mode: - job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) - job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) + job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT) + job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA) # Export the job print("job_dir=", job_dir)