From aa3d22cec9b19eb48000719c74a51610bd42c5b7 Mon Sep 17 00:00:00 2001
From: Ziyue Xu <ziyuex@nvidia.com>
Date: Tue, 25 Feb 2025 14:46:01 -0500
Subject: [PATCH 1/4] force gpu usage for blockwise8, add info to supported
 precisions

---
 nvflare/app_opt/pt/quantization/constant.py   | 10 +++--
 .../app_opt/pt/quantization/dequantizor.py    | 18 ++++----
 nvflare/app_opt/pt/quantization/quantizor.py  | 43 +++++++++----------
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/nvflare/app_opt/pt/quantization/constant.py b/nvflare/app_opt/pt/quantization/constant.py
index b26970c8dd..d4d6acb1e7 100644
--- a/nvflare/app_opt/pt/quantization/constant.py
+++ b/nvflare/app_opt/pt/quantization/constant.py
@@ -12,15 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Supported Input Data Type
+# Message quantization is mainly for reducing the message that can be
+# significantly large, e.g. LLMs. Thus, the supported input data types
+# we consider are common ones during LLM training, including fp32, fp16, and bf16.
 DATA_TYPE = [
-    "FLOAT64",
     "FLOAT32",
     "FLOAT16",
     "BFLOAT16",
-    "UINT8",
-    "INT8",
 ]
 
+# Supported Quantization Type to reduce the above input data types
+# The quantization types are mainly for reducing the model size,
+# Hence, we support 16-, 8-, and 4-bits quantization.
 QUANTIZATION_TYPE = [
     "FLOAT16",
     "BLOCKWISE8",
diff --git a/nvflare/app_opt/pt/quantization/dequantizor.py b/nvflare/app_opt/pt/quantization/dequantizor.py
index bd63da429d..94fd067ff8 100644
--- a/nvflare/app_opt/pt/quantization/dequantizor.py
+++ b/nvflare/app_opt/pt/quantization/dequantizor.py
@@ -84,17 +84,18 @@ def dequantization(
                     params[param_name] = values
                 elif quantization_type in ["blockwise8", "float4", "normfloat4"]:
                     # use bitsandbytes to dequantize the values
+                    # need GPU for general support
                     # extract quantization state
                     if quantization_type == "blockwise8":
                         if source_data_format == "numpy":
                             # first convert numpy array to tensor if numpy
-                            quantized = torch.as_tensor(values)
-                            absmax = torch.as_tensor(quant_state[param_name]["absmax"])
-                            code = torch.as_tensor(quant_state[param_name]["code"])
+                            quantized = torch.as_tensor(values).cuda()
+                            absmax = torch.as_tensor(quant_state[param_name]["absmax"]).cuda()
+                            code = torch.as_tensor(quant_state[param_name]["code"]).cuda()
                         elif source_data_format == "torch":
-                            quantized = values
-                            absmax = quant_state[param_name]["absmax"]
-                            code = quant_state[param_name]["code"]
+                            quantized = values.cuda()
+                            absmax = quant_state[param_name]["absmax"].cuda()
+                            code = quant_state[param_name]["code"].cuda()
                         # de-quanitze
                         dequantized = dequantize_blockwise(quantized, absmax=absmax, code=code)
                     else:
@@ -125,6 +126,7 @@ def dequantization(
                             dequantized = dequantize_4bit(quantized, quantize_state, quant_type="fp4")
                         else:
                             dequantized = dequantize_4bit(quantized, quantize_state, quant_type="nf4")
+
                     if source_data_format == "numpy":
                         params[param_name] = dequantized.cpu().numpy()
                     elif source_data_format == "torch":
@@ -135,16 +137,12 @@ def dequantization(
                     # convert back to original data type
                     if source_data_type == "float32":
                         params[param_name] = params[param_name].astype(np.float32)
-                    elif source_data_type == "float64":
-                        params[param_name] = params[param_name].astype(np.float64)
                     elif source_data_type == "float16":
                         params[param_name] = params[param_name].astype(np.float16)
                 elif source_data_format == "torch":
                     # convert back to original data type
                     if source_data_type == "float32":
                         params[param_name] = params[param_name].float()
-                    elif source_data_type == "float64":
-                        params[param_name] = params[param_name].double()
                     elif source_data_type == "float16":
                         params[param_name] = params[param_name].half()
                     elif source_data_type == "bfloat16":
diff --git a/nvflare/app_opt/pt/quantization/quantizor.py b/nvflare/app_opt/pt/quantization/quantizor.py
index 43f7f7c117..9c38eedae9 100644
--- a/nvflare/app_opt/pt/quantization/quantizor.py
+++ b/nvflare/app_opt/pt/quantization/quantizor.py
@@ -120,33 +120,31 @@ def quantization(self, params: dict, fl_ctx: FLContext):
                 elif self.quantization_type in ["blockwise8", "float4", "normfloat4"]:
                     # use bitsandbytes to quantize the values
                     # input is a tensor, output is a tuple of (quantized tensor, quantized_state)
-                    if self.quantization_type == "blockwise8":
-                        if source_data_format == "numpy":
-                            # if numpy, first convert numpy array to tensor
-                            values_tensor = torch.as_tensor(values)
-                        elif source_data_format == "torch":
-                            values_tensor = values
 
-                        # then quantize the tensor
+                    # CPU has limited support for 8- and 4-bits quantization
+                    # For general purpose, here we use GPU
+                    if source_data_format == "numpy":
+                        # if numpy, first convert numpy array to tensor, need to use GPU
+                        values_tensor = torch.as_tensor(values).cuda()
+                    elif source_data_format == "torch":
+                        # if torch, directly use the tensor, need to use GPU
+                        values_tensor = values.cuda()
+
+                    if self.quantization_type == "blockwise8":
+                        # quantize the tensor
                         quantized, quantized_state = quantize_blockwise(values_tensor)
                         # add the quantization state and values, keep source data format
                         if source_data_format == "numpy":
-                            quant_state[param_name]["absmax"] = quantized_state.absmax.numpy()
-                            quant_state[param_name]["code"] = quantized_state.code.numpy()
-                            values = quantized.numpy()
+                            quant_state[param_name]["absmax"] = quantized_state.absmax.cpu().numpy()
+                            quant_state[param_name]["code"] = quantized_state.code.cpu().numpy()
+                            values = quantized.cpu().numpy()
                         elif source_data_format == "torch":
-                            quant_state[param_name]["absmax"] = quantized_state.absmax
-                            quant_state[param_name]["code"] = quantized_state.code
-                            values = quantized
+                            quant_state[param_name]["absmax"] = quantized_state.absmax.cpu()
+                            quant_state[param_name]["code"] = quantized_state.code.cpu()
+                            values = quantized.cpu()
                         n_bytes_meta += quant_state[param_name]["absmax"].nbytes
                         n_bytes_meta += quant_state[param_name]["code"].nbytes
                     else:
-                        if source_data_format == "numpy":
-                            # if numpy, first convert numpy array to tensor, need to use GPU
-                            values_tensor = torch.as_tensor(values).cuda()
-                        elif source_data_format == "torch":
-                            # if torch, directly use the tensor, need to use GPU
-                            values_tensor = values.cuda()
                         # then quantize the tensor
                         if self.quantization_type == "float4":
                             quantized, quantized_state = quantize_4bit(values_tensor, quant_type="fp4")
@@ -154,7 +152,7 @@ def quantization(self, params: dict, fl_ctx: FLContext):
                             quantized, quantized_state = quantize_4bit(values_tensor, quant_type="nf4")
                         # add the quantization state and values, keep source data format
                         quantized_state = quantized_state.as_dict()
-
+                        # prepared the message
                         for state_name, state in quantized_state.items():
                             if isinstance(state, torch.Tensor):
                                 if source_data_format == "numpy":
@@ -171,6 +169,7 @@ def quantization(self, params: dict, fl_ctx: FLContext):
                             values = quantized.cpu().numpy()
                         elif source_data_format == "torch":
                             values = quantized.cpu()
+
                     params[param_name] = values
                 n_bytes_after += params[param_name].nbytes
 
@@ -203,8 +202,8 @@ def process_dxo(self, dxo: DXO, shareable: Shareable, fl_ctx: FLContext) -> Unio
         # thus the subsequent communications to the rest of clients will no longer need to apply quantization
         # This will not apply to client job, since the client job will be 1-1 and quantization applies to each client
         # Potentially:
-        # If clients talks to each other, it will also be 1-N and same rule applies
-        # If 1-N server-client filters can be different (Filter_1 applies to server-client_subset_1, etc.), then
+        # - If clients talks to each other, it will also be 1-N and same rule applies
+        # - If 1-N server-client filters can be different (Filter_1 applies to server-client_subset_1, etc.), then
         # a deep copy of the server data should be made by filter before applying a different filter
 
         # quantized_flag None if does not exist in meta

From 63b952719498cc374bc5ea41944913873b2d6ca4 Mon Sep 17 00:00:00 2001
From: Ziyue Xu <ziyuex@nvidia.com>
Date: Thu, 27 Feb 2025 14:28:34 -0500
Subject: [PATCH 2/4] class rename

---
 examples/advanced/llm_hf/sft_job.py                       | 8 ++++----
 .../08.2_llm_sft/sft_job.py                               | 8 ++++----
 .../08.3_llm_peft/peft_job.py                             | 8 ++++----
 .../08.4_llm_quantization/sft_job.py                      | 8 ++++----
 .../pt/quantization/{dequantizor.py => dequantizer.py}    | 2 +-
 .../pt/quantization/{quantizor.py => quantizer.py}        | 2 +-
 6 files changed, 18 insertions(+), 18 deletions(-)
 rename nvflare/app_opt/pt/quantization/{dequantizor.py => dequantizer.py} (99%)
 rename nvflare/app_opt/pt/quantization/{quantizor.py => quantizer.py} (99%)

diff --git a/examples/advanced/llm_hf/sft_job.py b/examples/advanced/llm_hf/sft_job.py
index c13cbb11d1..791bbd08eb 100644
--- a/examples/advanced/llm_hf/sft_job.py
+++ b/examples/advanced/llm_hf/sft_job.py
@@ -19,8 +19,8 @@
 from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector
 from nvflare.app_common.workflows.fedavg import FedAvg
 from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor
-from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor
-from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor
+from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer
+from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer
 from nvflare.job_config.script_runner import ScriptRunner
 
 
@@ -67,8 +67,8 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizor(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizor()
+        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizor = ModelDequantizer()
         job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
         job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py
index ad73a4da8c..b64795a4ea 100644
--- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py
+++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py
@@ -19,8 +19,8 @@
 from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector
 from nvflare.app_common.workflows.fedavg import FedAvg
 from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor
-from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor
-from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor
+from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer
+from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer
 from nvflare.job_config.script_runner import ScriptRunner
 
 
@@ -67,8 +67,8 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizor(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizor()
+        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizor = ModelDequantizer()
         job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
         job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py
index 5e6fd99f4e..e80a406fad 100644
--- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py
+++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py
@@ -19,8 +19,8 @@
 from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector
 from nvflare.app_common.workflows.fedavg import FedAvg
 from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor
-from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor
-from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor
+from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer
+from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer
 from nvflare.job_config.script_runner import ScriptRunner
 
 
@@ -67,8 +67,8 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizor(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizor()
+        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizor = ModelDequantizer()
         job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
         job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py
index c13cbb11d1..791bbd08eb 100644
--- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py
+++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py
@@ -19,8 +19,8 @@
 from nvflare.app_common.widgets.intime_model_selector import IntimeModelSelector
 from nvflare.app_common.workflows.fedavg import FedAvg
 from nvflare.app_opt.pt.file_model_persistor import PTFileModelPersistor
-from nvflare.app_opt.pt.quantization.dequantizor import ModelDequantizor
-from nvflare.app_opt.pt.quantization.quantizor import ModelQuantizor
+from nvflare.app_opt.pt.quantization.dequantizer import ModelDequantizer
+from nvflare.app_opt.pt.quantization.quantizer import ModelQuantizer
 from nvflare.job_config.script_runner import ScriptRunner
 
 
@@ -67,8 +67,8 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizor(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizor()
+        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizor = ModelDequantizer()
         job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
         job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
diff --git a/nvflare/app_opt/pt/quantization/dequantizor.py b/nvflare/app_opt/pt/quantization/dequantizer.py
similarity index 99%
rename from nvflare/app_opt/pt/quantization/dequantizor.py
rename to nvflare/app_opt/pt/quantization/dequantizer.py
index 94fd067ff8..9d56723fb6 100644
--- a/nvflare/app_opt/pt/quantization/dequantizor.py
+++ b/nvflare/app_opt/pt/quantization/dequantizer.py
@@ -26,7 +26,7 @@
 from nvflare.app_opt.pt.quantization.constant import QUANTIZATION_TYPE
 
 
-class ModelDequantizor(DXOFilter):
+class ModelDequantizer(DXOFilter):
     def __init__(self):
         """Filter to dequantize Shareable object to recover from quantization
 
diff --git a/nvflare/app_opt/pt/quantization/quantizor.py b/nvflare/app_opt/pt/quantization/quantizer.py
similarity index 99%
rename from nvflare/app_opt/pt/quantization/quantizor.py
rename to nvflare/app_opt/pt/quantization/quantizer.py
index 9c38eedae9..91ca53d72c 100644
--- a/nvflare/app_opt/pt/quantization/quantizor.py
+++ b/nvflare/app_opt/pt/quantization/quantizer.py
@@ -26,7 +26,7 @@
 from nvflare.app_opt.pt.quantization.constant import DATA_TYPE, QUANTIZATION_TYPE
 
 
-class ModelQuantizor(DXOFilter):
+class ModelQuantizer(DXOFilter):
     def __init__(
         self,
         quantization_type="float16",

From 0d4c196bb5b5c3e598d27776fc0bd25b74fe9c19 Mon Sep 17 00:00:00 2001
From: Ziyue Xu <ziyuex@nvidia.com>
Date: Thu, 27 Feb 2025 14:31:22 -0500
Subject: [PATCH 3/4] add GPU note in constant

---
 nvflare/app_opt/pt/quantization/constant.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nvflare/app_opt/pt/quantization/constant.py b/nvflare/app_opt/pt/quantization/constant.py
index d4d6acb1e7..731b51f837 100644
--- a/nvflare/app_opt/pt/quantization/constant.py
+++ b/nvflare/app_opt/pt/quantization/constant.py
@@ -25,6 +25,7 @@
 # Supported Quantization Type to reduce the above input data types
 # The quantization types are mainly for reducing the model size,
 # Hence, we support 16-, 8-, and 4-bits quantization.
+# Note that 8- and 4-bits quantization needs GPU support.
 QUANTIZATION_TYPE = [
     "FLOAT16",
     "BLOCKWISE8",

From c3396a58e88531fa65d4d1aa9cec86678769b888 Mon Sep 17 00:00:00 2001
From: Ziyue Xu <ziyuex@nvidia.com>
Date: Thu, 27 Feb 2025 16:11:10 -0500
Subject: [PATCH 4/4] name update

---
 examples/advanced/llm_hf/sft_job.py                  | 12 ++++++------
 .../08.2_llm_sft/sft_job.py                          | 12 ++++++------
 .../08.3_llm_peft/peft_job.py                        | 12 ++++++------
 .../08.4_llm_quantization/sft_job.py                 | 12 ++++++------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/examples/advanced/llm_hf/sft_job.py b/examples/advanced/llm_hf/sft_job.py
index 791bbd08eb..e767b6f617 100644
--- a/examples/advanced/llm_hf/sft_job.py
+++ b/examples/advanced/llm_hf/sft_job.py
@@ -67,10 +67,10 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizer()
-        job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
-        job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
+        quantizer = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizer = ModelDequantizer()
+        job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
+        job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
     # Define the model persistor and send to server
     # First send the model to the server
@@ -106,8 +106,8 @@ def main():
         job.to(runner, site_name, tasks=["train"])
 
         if args.quantize_mode:
-            job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
-            job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
+            job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
+            job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
 
     # Export the job
     print("job_dir=", job_dir)
diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py
index b64795a4ea..bbc0f6b91b 100644
--- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py
+++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.2_llm_sft/sft_job.py
@@ -67,10 +67,10 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizer()
-        job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
-        job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
+        quantizer = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizer = ModelDequantizer()
+        job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
+        job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
     # Define the model persistor and send to server
     # First send the model to the server
@@ -106,8 +106,8 @@ def main():
         job.to(runner, site_name, tasks=["train"])
 
         if args.quantize_mode:
-            job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
-            job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
+            job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
+            job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
 
     # Export the job
     print("job_dir=", job_dir)
diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py
index e80a406fad..6bd90d12b2 100644
--- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py
+++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.3_llm_peft/peft_job.py
@@ -67,10 +67,10 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizer()
-        job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
-        job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
+        quantizer = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizer = ModelDequantizer()
+        job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
+        job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
     # Define the model persistor and send to server
     # First send the model to the server
@@ -106,8 +106,8 @@ def main():
         job.to(runner, site_name, tasks=["train"])
 
         if args.quantize_mode:
-            job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
-            job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
+            job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
+            job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
 
     # Export the job
     print("job_dir=", job_dir)
diff --git a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py
index 791bbd08eb..e767b6f617 100644
--- a/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py
+++ b/examples/tutorials/self-paced-training/part-4_advanced_federated_learning/chapter-8_federated_LLM_training/08.4_llm_quantization/sft_job.py
@@ -67,10 +67,10 @@ def main():
 
     if args.quantize_mode:
         # If using quantization, add quantize filters.
-        quantizor = ModelQuantizer(quantization_type=args.quantize_mode)
-        dequantizor = ModelDequantizer()
-        job.to(quantizor, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
-        job.to(dequantizor, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
+        quantizer = ModelQuantizer(quantization_type=args.quantize_mode)
+        dequantizer = ModelDequantizer()
+        job.to(quantizer, "server", tasks=["train"], filter_type=FilterType.TASK_DATA)
+        job.to(dequantizer, "server", tasks=["train"], filter_type=FilterType.TASK_RESULT)
 
     # Define the model persistor and send to server
     # First send the model to the server
@@ -106,8 +106,8 @@ def main():
         job.to(runner, site_name, tasks=["train"])
 
         if args.quantize_mode:
-            job.to(quantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
-            job.to(dequantizor, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
+            job.to(quantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_RESULT)
+            job.to(dequantizer, site_name, tasks=["train"], filter_type=FilterType.TASK_DATA)
 
     # Export the job
     print("job_dir=", job_dir)