diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/lightning_fl_job.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/lightning_fl_job.py
new file mode 100644
index 0000000000..b51724a962
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/lightning_fl_job.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from src.lit_net import LitNet
+
+from nvflare.app_common.workflows.fedavg import FedAvg
+from nvflare.app_opt.pt.job_config.base_fed_job import BaseFedJob
+from nvflare.job_config.script_runner import ScriptRunner
+
+if __name__ == "__main__":
+ n_clients = 5
+ num_rounds = 2
+
+ job = BaseFedJob(
+ name="cifar10_lightning_fedavg",
+ initial_model=LitNet(),
+ )
+
+ controller = FedAvg(
+ num_clients=n_clients,
+ num_rounds=num_rounds,
+ )
+ job.to(controller, "server")
+
+ # Add clients
+ for i in range(n_clients):
+ runner = ScriptRunner(
+ script="src/cifar10_lightning_fl.py", script_args="" # f"--batch_size 32 --data_path /tmp/data/site-{i}"
+ )
+ job.to(runner, f"site-{i + 1}")
+
+ job.export_job("/tmp/nvflare/jobs/job_config")
+ job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0", log_config="./log_config.json")
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/log_config.json b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/log_config.json
new file mode 100644
index 0000000000..e5732b4950
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/log_config.json
@@ -0,0 +1,87 @@
+{
+ "version": 1,
+ "disable_existing_loggers": false,
+ "formatters": {
+ "baseFormatter": {
+ "()": "nvflare.fuel.utils.log_utils.BaseFormatter",
+ "fmt": "%(asctime)s - %(name)s - %(levelname)s - %(fl_ctx)s - %(message)s"
+ },
+ "colorFormatter": {
+ "()": "nvflare.fuel.utils.log_utils.ColorFormatter",
+ "fmt": "%(asctime)s - %(levelname)s - %(message)s",
+ "datefmt": "%Y-%m-%d %H:%M:%S"
+ },
+ "jsonFormatter": {
+ "()": "nvflare.fuel.utils.log_utils.JsonFormatter",
+ "fmt": "%(asctime)s - %(identity)s - %(name)s - %(fullName)s - %(levelname)s - %(fl_ctx)s - %(message)s"
+ }
+ },
+ "filters": {
+ "FLFilter": {
+ "()": "nvflare.fuel.utils.log_utils.LoggerNameFilter",
+ "logger_names": ["custom", "nvflare.app_common", "nvflare.app_opt"]
+ }
+ },
+ "handlers": {
+ "consoleHandler": {
+ "class": "logging.StreamHandler",
+ "level": "INFO",
+ "formatter": "colorFormatter",
+ "filters": ["FLFilter"],
+ "stream": "ext://sys.stdout"
+ },
+ "logFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "DEBUG",
+ "formatter": "baseFormatter",
+ "filename": "log.txt",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10
+ },
+ "errorFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "ERROR",
+ "formatter": "baseFormatter",
+ "filename": "log_error.txt",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10
+ },
+ "jsonFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "DEBUG",
+ "formatter": "jsonFormatter",
+ "filename": "log.json",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10
+ },
+ "FLFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "DEBUG",
+ "formatter": "baseFormatter",
+ "filters": ["FLFilter"],
+ "filename": "log_fl.txt",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10,
+ "delay": true
+ }
+ },
+ "loggers": {
+ "root": {
+ "level": "INFO",
+ "handlers": ["consoleHandler", "logFileHandler", "errorFileHandler", "jsonFileHandler", "FLFileHandler"]
+ }
+ }
+}
+
+
+
+
+
+
+
+
+
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/requirements.txt b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/requirements.txt
new file mode 100644
index 0000000000..5db2fd4d24
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/requirements.txt
@@ -0,0 +1,5 @@
+nvflare~=2.5.0rc
+torch
+torchvision
+pytorch_lightning
+tensorboard
\ No newline at end of file
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/src/cifar10_lightning_fl.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/src/cifar10_lightning_fl.py
new file mode 100644
index 0000000000..18861f604f
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/src/cifar10_lightning_fl.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torchvision
+import torchvision.transforms as transforms
+from lit_net import LitNet
+from pytorch_lightning import LightningDataModule, Trainer, seed_everything
+from torch.utils.data import DataLoader, random_split
+
+# (1) import nvflare lightning client API
+import nvflare.client.lightning as flare
+
+seed_everything(7)
+
+
+DATASET_PATH = "/tmp/nvflare/data"
+BATCH_SIZE = 4
+
+transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+
+class CIFAR10DataModule(LightningDataModule):
+ def __init__(self, data_dir: str = DATASET_PATH, batch_size: int = BATCH_SIZE):
+ super().__init__()
+ self.data_dir = data_dir
+ self.batch_size = batch_size
+
+ def prepare_data(self):
+ torchvision.datasets.CIFAR10(root=self.data_dir, train=True, download=True, transform=transform)
+ torchvision.datasets.CIFAR10(root=self.data_dir, train=False, download=True, transform=transform)
+
+ def setup(self, stage: str):
+ # Assign train/val datasets for use in dataloaders
+ if stage == "fit" or stage == "validate":
+ cifar_full = torchvision.datasets.CIFAR10(
+ root=self.data_dir, train=True, download=False, transform=transform
+ )
+ self.cifar_train, self.cifar_val = random_split(cifar_full, [0.8, 0.2])
+
+ # Assign test dataset for use in dataloader(s)
+ if stage == "test" or stage == "predict":
+ self.cifar_test = torchvision.datasets.CIFAR10(
+ root=self.data_dir, train=False, download=False, transform=transform
+ )
+
+ def train_dataloader(self):
+ return DataLoader(self.cifar_train, batch_size=self.batch_size)
+
+ def val_dataloader(self):
+ return DataLoader(self.cifar_val, batch_size=self.batch_size)
+
+ def test_dataloader(self):
+ return DataLoader(self.cifar_test, batch_size=self.batch_size)
+
+ def predict_dataloader(self):
+ return DataLoader(self.cifar_test, batch_size=self.batch_size)
+
+
+def main():
+ model = LitNet()
+ cifar10_dm = CIFAR10DataModule()
+ trainer = Trainer(max_epochs=1, devices=1, accelerator="gpu" if torch.cuda.is_available() else "cpu")
+
+ # (2) patch the lightning trainer
+ flare.patch(trainer)
+
+ while flare.is_running():
+ # (3) receives FLModel from NVFlare
+ # Note that we don't need to pass this input_model to trainer
+ # because after flare.patch the trainer.fit/validate will get the
+ # global model internally
+ input_model = flare.receive()
+ print(f"\n[Current Round={input_model.current_round}, Site = {flare.get_site_name()}]\n")
+
+ # (4) evaluate the current global model to allow server-side model selection
+ print("--- validate global model ---")
+ trainer.validate(model, datamodule=cifar10_dm)
+
+ # perform local training starting with the received global model
+ print("--- train new model ---")
+ trainer.fit(model, datamodule=cifar10_dm)
+
+ # test local model
+ print("--- test new model ---")
+ trainer.test(ckpt_path="best", datamodule=cifar10_dm)
+
+ # get predictions
+ print("--- prediction with new best model ---")
+ trainer.predict(ckpt_path="best", datamodule=cifar10_dm)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/src/lit_net.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/src/lit_net.py
new file mode 100644
index 0000000000..d70b85ca5a
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/code/src/lit_net.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from pytorch_lightning import LightningModule
+from torchmetrics import Accuracy
+
+NUM_CLASSES = 10
+criterion = nn.CrossEntropyLoss()
+
+
+class Net(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.conv1 = nn.Conv2d(3, 6, 5)
+ self.pool = nn.MaxPool2d(2, 2)
+ self.conv2 = nn.Conv2d(6, 16, 5)
+ self.fc1 = nn.Linear(16 * 5 * 5, 120)
+ self.fc2 = nn.Linear(120, 84)
+ self.fc3 = nn.Linear(84, 10)
+
+ def forward(self, x):
+ x = self.pool(F.relu(self.conv1(x)))
+ x = self.pool(F.relu(self.conv2(x)))
+ x = torch.flatten(x, 1) # flatten all dimensions except batch
+ x = F.relu(self.fc1(x))
+ x = F.relu(self.fc2(x))
+ x = self.fc3(x)
+ return x
+
+
+class LitNet(LightningModule):
+ def __init__(self):
+ super().__init__()
+ self.save_hyperparameters()
+ self.model = Net()
+ self.train_acc = Accuracy(task="multiclass", num_classes=NUM_CLASSES)
+ self.valid_acc = Accuracy(task="multiclass", num_classes=NUM_CLASSES)
+ # (optional) pass additional information via self.__fl_meta__
+ self.__fl_meta__ = {}
+
+ def forward(self, x):
+ out = self.model(x)
+ return out
+
+ def training_step(self, batch, batch_idx):
+ x, labels = batch
+ outputs = self(x)
+ loss = criterion(outputs, labels)
+ self.train_acc(outputs, labels)
+ self.log("train_loss", loss)
+ self.log("train_acc", self.train_acc, on_step=True, on_epoch=False)
+ return loss
+
+ def evaluate(self, batch, stage=None):
+ x, labels = batch
+ outputs = self(x)
+ loss = criterion(outputs, labels)
+ self.valid_acc(outputs, labels)
+
+ if stage:
+ self.log(f"{stage}_loss", loss)
+ self.log(f"{stage}_acc", self.valid_acc, on_step=True, on_epoch=True)
+ return outputs
+
+ def validation_step(self, batch, batch_idx):
+ self.evaluate(batch, "val")
+
+ def test_step(self, batch, batch_idx):
+ self.evaluate(batch, "test")
+
+ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any:
+ return self.evaluate(batch)
+
+ def configure_optimizers(self):
+ optimizer = optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
+ return {"optimizer": optimizer}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/convert_torch_lightning_to_fl.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/convert_torch_lightning_to_fl.ipynb
new file mode 100644
index 0000000000..a1b06dbcd3
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.0_introduction/convert_torch_lightning_to_federated_learning/convert_torch_lightning_to_fl.ipynb
@@ -0,0 +1,446 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7a5c3d67-a6ea-4f59-84d2-effc3ef016e1",
+ "metadata": {},
+ "source": [
+ "# Converting PyTorch Lightning to FL\n",
+ "\n",
+ "In chapter 1, we have learned how to convert the PyTorch code to a federated learning job with NVFlare. In this section and next section, we will learn how to convert the PyTorch Lightning code to a federated learning job with NVFlare.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fcf2b4a8-ed42-421d-8898-c0c93f9d8a09",
+ "metadata": {},
+ "source": [
+ "## Basic Concepts\n",
+ "At the heart of NVFlare lies the concept of collaboration through\n",
+ "\"tasks.\" An FL controller assigns tasks (e.g., training on local data) to one or more FL clients, processes returned\n",
+ "results (e.g., model weight updates), and may assign additional\n",
+ "tasks based on these results and other factors (e.g., a pre-configured\n",
+ "number of training rounds). The clients run executors which can listen for tasks and perform the necessary computations locally, such as model training. This task-based interaction repeats\n",
+ "until the experiment’s objectives are met. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b68cb248-dc6a-48d1-880d-33c4324d9723",
+ "metadata": {},
+ "source": [
+ "## Federated Averaging with NVFlare\n",
+ "Given the flexible controller and executor concepts, it is easy to implement different computing & communication patterns with NVFlare, such as [FedAvg](https://proceedings.mlr.press/v54/mcmahan17a?ref=https://githubhelp.com) and [cyclic weight transfer](https://academic.oup.com/jamia/article/25/8/945/4956468). \n",
+ "\n",
+ "The controller's `run()` routine is responsible for assigning tasks and processing task results from the Executors. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b2f84fb1-9dd3-4c72-a727-c4614260f02f",
+ "metadata": {},
+ "source": [
+ "### Server Code\n",
+ "First, we provide a simple implementation of the [FedAvg](https://proceedings.mlr.press/v54/mcmahan17a?ref=https://githubhelp.com) algorithm with NVFlare. \n",
+ "The `run()` routine implements the main algorithmic logic. \n",
+ "Subroutines, like `sample_clients()` and `scatter_and_gather_model()` utilize the communicator object, native to each Controller to get the list of available clients,\n",
+ "distribute the current global model to the clients, and collect their results.\n",
+ "\n",
+ "The FedAvg controller implements these main steps:\n",
+ "1. FL server initializes an initial model using `self.load_model()`.\n",
+ "2. For each round (global iteration):\n",
+ " - FL server samples available clients using `self.sample_clients()`.\n",
+ " - FL server sends the global model to clients and waits for their updates using `self.send_model_and_wait()`.\n",
+ " - FL server aggregates all the `results` and produces a new global model using `self.update_model()`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d62a13d5-1130-44e6-8818-70e30de401e6",
+ "metadata": {},
+ "source": [
+ "```python\n",
+ "class FedAvg(BaseFedAvg):\n",
+ " def run(self) -> None:\n",
+ " self.info(\"Start FedAvg.\")\n",
+ "\n",
+ " model = self.load_model()\n",
+ " model.start_round = self.start_round\n",
+ " model.total_rounds = self.num_rounds\n",
+ "\n",
+ " for self.current_round in range(self.start_round, self.start_round + self.num_rounds):\n",
+ " self.info(f\"Round {self.current_round} started.\")\n",
+ " model.current_round = self.current_round\n",
+ "\n",
+ " clients = self.sample_clients(self.num_clients)\n",
+ "\n",
+ " results = self.send_model_and_wait(targets=clients, data=model)\n",
+ "\n",
+ " aggregate_results = self.aggregate(results)\n",
+ "\n",
+ " model = self.update_model(model, aggregate_results)\n",
+ "\n",
+ " self.save_model(model)\n",
+ "\n",
+ " self.info(\"Finished FedAvg.\")\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d24b6476-089a-4e9d-825b-07107bd5d84a",
+ "metadata": {},
+ "source": [
+ "### Client Code \n",
+ "Given a CIFAR10 [PyTorch Lightning](https://lightning.ai/) code example with the network wrapped into a LightningModule [LitNet](code/src/lit_net.py) class, we wish to adapt this centralized training code to something that can run in a federated setting."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1c551053-5460-4d83-8578-796074170342",
+ "metadata": {},
+ "source": [
+ "On the client side, the training workflow is as follows:\n",
+ "1. Receive the model from the FL server.\n",
+ "2. Perform local training on the received global model\n",
+ "and/or evaluate the received global model for model\n",
+ "selection.\n",
+ "3. Send the new model back to the FL server."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c02bfc2a-783c-494f-9427-c38f40a2e870",
+ "metadata": {},
+ "source": [
+ "Using NVFlare's Client Lightning API, we can easily adapt machine learning code that was written for centralized training and apply it in a federated scenario.\n",
+ "For general use cases, we can use the Client Lightning API patch function:\n",
+ "- `flare.patch(trainer)`: Patch the lightning trainer. After flare.patch, functions such as `trainer.fit()` and `trainer.validate()` will get the global model internally and automatically send the result model to the FL server."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9115ee07-d848-4a7c-99ad-64e20ab7093c",
+ "metadata": {},
+ "source": [
+ "With this method, the developers can use the Client API\n",
+ "to change their centralized training code to an FL scenario with\n",
+ "these simple code changes shown below.\n",
+ "```python\n",
+ " # (1) import nvflare lightning client API\n",
+ " import nvflare.client.lightning as flare\n",
+ "\n",
+ " # (2) patch the lightning trainer\n",
+ " flare.patch(trainer)\n",
+ "\n",
+ " while flare.is_running():\n",
+ " \n",
+ " # Note that we can optionally receive the FLModel from NVFLARE.\n",
+ " # We don't need to pass this input_model to trainer because after flare.patch \n",
+ " # the trainer.fit/validate will get the global model internally\n",
+ " input_model = flare.receive()\n",
+ "\n",
+ " trainer.validate(...)\n",
+ "\n",
+ " trainer.fit(...)\n",
+ "\n",
+ " trainer.test(...)\n",
+ "\n",
+ " trainer.predict(...)\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "67432f44-4144-4347-8d74-e7f57e065a14",
+ "metadata": {},
+ "source": [
+ "The full client training script is saved in a separate file, e.g. [code/src/cifar10_lightning_fl.py](code/src/cifar10_lightning_fl.py) doing CNN training on the [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5da34414-bac4-4352-8077-ab7ade998eec",
+ "metadata": {},
+ "source": [
+ "## Run an NVFlare Job\n",
+ "Now that we have defined the FedAvg controller to run our federated compute workflow on the FL server, and our client training script to receive the global models, run local training, and send the results back to the FL server, we can put everything together using NVFlare's Job API."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "17b69013",
+ "metadata": {},
+ "source": [
+ "## Install requirements\n",
+ "Make sure to install the required packages:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d14e8f49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "% pip install -r code/requirements.txt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0cedaf75-3a4a-4843-8017-7716b53149a2",
+ "metadata": {},
+ "source": [
+ "#### 1. Define the initial model\n",
+ "First, we define the global model used to initialize the model on the FL server. See [code/src/lit_net.py](code/src/lit_net.py)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "93889e62-b725-427c-8839-2771ca81d24c",
+ "metadata": {},
+ "source": [
+ "```python\n",
+ "from typing import Any\n",
+ "\n",
+ "import torch\n",
+ "import torch.nn as nn\n",
+ "import torch.nn.functional as F\n",
+ "import torch.optim as optim\n",
+ "from pytorch_lightning import LightningModule\n",
+ "from torchmetrics import Accuracy\n",
+ "\n",
+ "NUM_CLASSES = 10\n",
+ "criterion = nn.CrossEntropyLoss()\n",
+ "\n",
+ "class Net(nn.Module):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.conv1 = nn.Conv2d(3, 6, 5)\n",
+ " self.pool = nn.MaxPool2d(2, 2)\n",
+ " self.conv2 = nn.Conv2d(6, 16, 5)\n",
+ " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
+ " self.fc2 = nn.Linear(120, 84)\n",
+ " self.fc3 = nn.Linear(84, 10)\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x = self.pool(F.relu(self.conv1(x)))\n",
+ " x = self.pool(F.relu(self.conv2(x)))\n",
+ " x = torch.flatten(x, 1) # flatten all dimensions except batch\n",
+ " x = F.relu(self.fc1(x))\n",
+ " x = F.relu(self.fc2(x))\n",
+ " x = self.fc3(x)\n",
+ " return x\n",
+ "\n",
+ "\n",
+ "class LitNet(LightningModule):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " self.save_hyperparameters()\n",
+ " self.model = Net()\n",
+ " self.train_acc = Accuracy(task=\"multiclass\", num_classes=NUM_CLASSES)\n",
+ " self.valid_acc = Accuracy(task=\"multiclass\", num_classes=NUM_CLASSES)\n",
+ " # (optional) pass additional information via self.__fl_meta__\n",
+ " self.__fl_meta__ = {}\n",
+ "\n",
+ " def forward(self, x):\n",
+ " out = self.model(x)\n",
+ " return out\n",
+ "\n",
+ " def training_step(self, batch, batch_idx):\n",
+ " x, labels = batch\n",
+ " outputs = self(x)\n",
+ " loss = criterion(outputs, labels)\n",
+ " self.train_acc(outputs, labels)\n",
+ " self.log(\"train_loss\", loss)\n",
+ " self.log(\"train_acc\", self.train_acc, on_step=True, on_epoch=False)\n",
+ " return loss\n",
+ "\n",
+ " def evaluate(self, batch, stage=None):\n",
+ " x, labels = batch\n",
+ " outputs = self(x)\n",
+ " loss = criterion(outputs, labels)\n",
+ " self.valid_acc(outputs, labels)\n",
+ "\n",
+ " if stage:\n",
+ " self.log(f\"{stage}_loss\", loss)\n",
+ " self.log(f\"{stage}_acc\", self.valid_acc, on_step=True, on_epoch=True)\n",
+ " return outputs\n",
+ "\n",
+ " def validation_step(self, batch, batch_idx):\n",
+ " self.evaluate(batch, \"val\")\n",
+ "\n",
+ " def test_step(self, batch, batch_idx):\n",
+ " self.evaluate(batch, \"test\")\n",
+ "\n",
+ " def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> Any:\n",
+ " return self.evaluate(batch)\n",
+ "\n",
+ " def configure_optimizers(self):\n",
+ " optimizer = optim.SGD(self.parameters(), lr=0.001, momentum=0.9)\n",
+ " return {\"optimizer\": optimizer}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1b70da5d-ba8b-4e65-b47f-44bb9bddae4d",
+ "metadata": {},
+ "source": [
+ "#### 2. Define a FedJob\n",
+ "The `FedJob` is used to define how controllers and executors are placed within a federated job using the `to(object, target)` routine.\n",
+ "\n",
+ "Here we use a PyTorch `BaseFedJob`, where we can define the job name and the initial global model.\n",
+ "The `BaseFedJob` automatically configures components for model persistence, model selection, and TensorBoard streaming for convenience."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aaa2b6f4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from code.src.lit_net import LitNet\n",
+ "\n",
+ "from nvflare.app_common.workflows.fedavg import FedAvg\n",
+ "from nvflare.app_opt.pt.job_config.base_fed_job import BaseFedJob\n",
+ "from nvflare.job_config.script_runner import ScriptRunner\n",
+ "\n",
+ "job = BaseFedJob(\n",
+ " name=\"cifar10_lightning_fedavg\",\n",
+ " initial_model=LitNet(),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9361d9f8-54f3-4363-b3ba-706a7ae3a8e9",
+ "metadata": {},
+ "source": [
+ "#### 3. Define the Controller Workflow\n",
+ "Define the controller workflow and send to server."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6962e6cc-995e-4356-8156-3ceba2c7a249",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_clients = 2\n",
+ "\n",
+ "controller = FedAvg(\n",
+ " num_clients=n_clients,\n",
+ " num_rounds=2,\n",
+ ")\n",
+ "job.to(controller, \"server\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77f5bc7f-4fb4-46e9-8f02-5e7245d95070",
+ "metadata": {},
+ "source": [
+ "That completes the components that need to be defined on the server."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "548966c2-90bf-47ad-91d2-5c6c22c3c4f0",
+ "metadata": {},
+ "source": [
+ "#### 4. Add clients\n",
+ "Next, we can use the `ScriptRunner` and send it to each of the clients to run our training script.\n",
+ "\n",
+ "Note that our script could have additional input arguments, such as batch size or data path, but we don't use them here for simplicity."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad5d36fe-9ae5-43c3-80bc-2cdc66bf7a7e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i in range(n_clients):\n",
+ " runner = ScriptRunner(\n",
+ " script=\"src/cifar10_lightning_fl.py\", script_args=\"\" # f\"--batch_size 32 --data_path /tmp/data/site-{i}\"\n",
+ " )\n",
+ " job.to(runner, f\"site-{i+1}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "113fd6af-85be-4f75-8a8e-4666771252b3",
+ "metadata": {},
+ "source": [
+ "That's it!\n",
+ "\n",
+ "#### 5. Optionally export the job\n",
+ "Now, we could export the job and submit it to a real NVFlare deployment using the [Admin client](https://nvflare.readthedocs.io/en/main/real_world_fl/operation.html) or [FLARE API](https://nvflare.readthedocs.io/en/main/real_world_fl/flare_api.html). "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "99a270bf-c906-425b-b999-2306cb76eb62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "job.export_job(\"/tmp/nvflare/jobs/job_config\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9ac3f0a8-06bb-4bea-89d3-4a5fc5b76c63",
+ "metadata": {},
+ "source": [
+ "#### 6. Run FL Simulation\n",
+ "Finally, we can run our FedJob in simulation using NVFlare's [simulator](https://nvflare.readthedocs.io/en/main/user_guide/nvflare_cli/fl_simulator.html) under the hood. We can also specify which GPU should be used to run this client, which is helpful for simulated environments. The results will be saved in the specified `workdir`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13068ab7-35cf-49e7-91ed-10993049ef0d",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "job.simulator_run(\"/tmp/nvflare/jobs/workdir\", gpu=\"0\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fb2e1266",
+ "metadata": {},
+ "source": [
+ "You can see the full code for this job in [lightning_fl_job](code/lightning_fl_job.py)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.1_federated_statistics/federated_statistics_with_image_data/code/demo/image_stats.json b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.1_federated_statistics/federated_statistics_with_image_data/code/demo/image_stats.json
new file mode 100644
index 0000000000..f6c8329b29
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.1_federated_statistics/federated_statistics_with_image_data/code/demo/image_stats.json
@@ -0,0 +1 @@
+{"intensity": {"count": {"site-1": {"train": 3616}, "site-2": {"train": 6012}, "site-3": {"train": 10192}, "Global": {"train": 19820}}, "histogram": {"site-1": {"train": [[0.0, 12.8, 17164261], [12.8, 25.6, 4058459], [25.6, 38.4, 4715127], [38.4, 51.2, 6359203], [51.2, 64.0, 8292112], [64.0, 76.8, 11693236], [76.8, 89.6, 14463928], [89.6, 102.4, 17033104], [102.4, 115.2, 20322665], [115.2, 128.0, 20882846], [128.0, 140.8, 24501167], [140.8, 153.6, 25907494], [153.6, 166.4, 26948743], [166.4, 179.2, 27176112], [179.2, 192.0, 23621479], [192.0, 204.8, 23483969], [204.8, 217.6, 18538400], [217.6, 230.4, 11707204], [230.4, 243.2, 8328655], [243.2, 256.0, 8075852]]}, "site-2": {"train": [[0.0, 12.8, 41491218], [12.8, 25.6, 13287568], [25.6, 38.4, 10917768], [38.4, 51.2, 13152497], [51.2, 64.0, 15425411], [64.0, 76.8, 21465675], [76.8, 89.6, 27805663], [89.6, 102.4, 34068349], [102.4, 115.2, 38666532], [115.2, 128.0, 37726151], [128.0, 140.8, 41601982], [140.8, 153.6, 41352742], [153.6, 166.4, 40646484], [166.4, 179.2, 39591365], [179.2, 192.0, 33739307], [192.0, 204.8, 31784585], [204.8, 217.6, 25808597], [217.6, 230.4, 18665966], [230.4, 243.2, 9201332], [243.2, 256.0, 1079620]]}, "site-3": {"train": [[0.0, 12.8, 64626063], [12.8, 25.6, 21871385], [25.6, 38.4, 21356879], [38.4, 51.2, 29523659], [51.2, 64.0, 33707232], [64.0, 76.8, 41558944], [76.8, 89.6, 45925803], [89.6, 102.4, 49323955], [102.4, 115.2, 52865293], [115.2, 128.0, 52893169], [128.0, 140.8, 61730773], [140.8, 153.6, 64396923], [153.6, 166.4, 63155642], [166.4, 179.2, 62849440], [179.2, 192.0, 56891238], [192.0, 204.8, 58672649], [204.8, 217.6, 53311331], [217.6, 230.4, 45538276], [230.4, 243.2, 27318127], [243.2, 256.0, 3658211]]}, "Global": {"train": [[0.0, 12.8, 123281542], [12.8, 25.6, 39217412], [25.6, 38.4, 36989774], [38.4, 51.2, 49035359], [51.2, 64.0, 57424755], [64.0, 76.8, 74717855], [76.8, 89.6, 88195394], [89.6, 102.4, 100425408], [102.4, 115.2, 111854490], [115.2, 128.0, 111502166], [128.0, 140.8, 127833922], [140.8, 153.6, 131657159], [153.6, 166.4, 130750869], [166.4, 179.2, 129616917], [179.2, 192.0, 114252024], [192.0, 204.8, 113941203], [204.8, 217.6, 97658328], [217.6, 230.4, 75911446], [230.4, 243.2, 44848114], [243.2, 256.0, 12813683]]}}}}
\ No newline at end of file
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.1_federated_statistics/federated_statistics_with_image_data/code/demo/visualization.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.1_federated_statistics/federated_statistics_with_image_data/code/demo/visualization.ipynb
new file mode 100644
index 0000000000..965bda266e
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.1_federated_statistics/federated_statistics_with_image_data/code/demo/visualization.ipynb
@@ -0,0 +1,258 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3f851980",
+ "metadata": {},
+ "source": [
+ "# NVFLARE Federated Statistics Visualization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0b71dd55",
+ "metadata": {},
+ "source": [
+ "## Image Statistics Visualization\n",
+ "In this example, we demonstate how to visualize the results from the statistics of image data. The visualization requires json, pandas, matplotlib modules as well as nvflare visualization utlities. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "85f23acf",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "import json\n",
+ "import pandas as pd\n",
+ "from nvflare.app_opt.statistics.visualization.statistics_visualization import Visualization"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "151e23a8",
+ "metadata": {},
+ "source": [
+ "First, copy the resulting json file to demo directory. In this example, resulting file is called image_statistics.json. Then load json file\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "44f6bed2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "with open('image_stats.json', 'r') as f:\n",
+ " data = json.load(f)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c4b83ddb",
+ "metadata": {},
+ "source": [
+ "Initialize the Visualization utilities\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ab771712",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "vis = Visualization()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "49f976aa",
+ "metadata": {},
+ "source": [
+ "### Overall Statistics\n",
+ "vis.show_stats() will show the statistics for each features, at each site for each dataset\n",
+ "\n",
+ "vis.show_stats(data = data)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20ea4dff",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "vis.show_stats(data = data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "521cbf6f",
+ "metadata": {},
+ "source": [
+ "### select features statistics using white_list_features \n",
+ "user can optionally select only show specified features via white_list_features arguments. In these image files, we only have one feature"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9ab23bcc",
+ "metadata": {},
+ "source": [
+ "### Histogram Visualization\n",
+ "We can use vis.show_histograms() to visualize the histogram. Before we do that, we need set some iPython display setting to make sure the graph displayed in full cell. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4bada64b",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from IPython.display import display, HTML\n",
+ "display(HTML(\"\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e415b49e",
+ "metadata": {},
+ "source": [
+ "The following command display histograms for numberic features. The result shows both main plot"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "53542cf9",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "vis.show_histograms(data = data, plot_type=\"main\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d8d537cc",
+ "metadata": {},
+ "source": [
+ "## Display Options\n",
+ "Similar to other statistics, we can use white_list_features to select only few features to display histograms. We can also use display_format=\"percent\" to allow all dataset and sites to be displayed in the same scale. User can set \n",
+ "\n",
+ "* display_format: \"percent\" or \"sample_count\"\n",
+ "* white_list_features: feature names\n",
+ "* plot_type : \"both\" or \"main\" or \"subplot\"\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "353db4d9",
+ "metadata": {},
+ "source": [
+ "#### show default display format with subplot\n",
+ "In the following, we display only feature \"Intensity\" in default display_format, with \"subplot\" plot_type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8f619729",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vis.show_histograms(data = data, plot_type=\"subplot\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fbf7fc73",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### show percent display format with default plot_type (main)\n",
+ "In the following, we display only feature \"Intensity\" in \"percent\" display_format, with default plot_type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8655ad63",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "vis.show_histograms(data = data, display_format=\"percent\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "48501d37",
+ "metadata": {},
+ "source": [
+ "back to [federated_statistics_with_image_data](../../federated_statistics_with_image_data.ipynb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bd6f59f2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "918341b9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d86bd3e8",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "nvflare_example",
+ "language": "python",
+ "name": "nvflare_example"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.2_client_api/client_api.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.2_client_api/client_api.ipynb
new file mode 100644
index 0000000000..72a14b5b04
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.2_client_api/client_api.ipynb
@@ -0,0 +1,188 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "58149c32",
+ "metadata": {},
+ "source": [
+ "# Transform Existing Code to FL Easily with the FLARE Client API"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06203527",
+ "metadata": {},
+ "source": [
+ "The FLARE Client API offers a straightforward path to transform your existing machine learning or deep learning code into federated learning applications. With just a few lines of code changes, you can adapt your training logic without restructuring your codebase or moving code into different class methods. This flexibility applies to both traditional machine learning and deep learning frameworks. For PyTorch Lightning users, the process is even more streamlined with dedicated Lightning API support.\n",
+ "\n",
+ "You can see detailed examples with actual integration across different platforms including PyTorch and TensorFlow [here:](https://github.com/NVIDIA/NVFlare/tree/main/examples/hello-world/ml-to-fl)\n",
+ "\n",
+ "In Chapter 1, you have already seen the Client API in action with pytorch. In this section, we will focus on the core concepts of the Client API and explain some of the ways it can be configured to help you use the Client API more effectively.\n",
+ "\n",
+ "Then we will see how to use the Client API with PyTorch Lightning, and traditional machine learning algorithms such as Logistic Regression, KMeans and survival analysis."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "be7efa36",
+ "metadata": {},
+ "source": [
+ "## Core Concept"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76102eac",
+ "metadata": {},
+ "source": [
+ "The general workflow of the popular federated learning (FL) follows the following steps:\n",
+ "\n",
+ "1. **FL server initializes an initial model**\n",
+ "2. **For each round (global iteration):**\n",
+ " * FL server broadcasts the global model to clients\n",
+ " * Each FL client starts with this global model and perform the local training on their own data\n",
+ " * Each FL client, then sends back their newly trained model to the FL server\n",
+ " * FL server aggregates all the local models and produces a new global model\n",
+ "\n",
+ "On the client side, the training workflow is as follows:\n",
+ "\n",
+ "1. Receive the model from the FL server\n",
+ "2. Perform local training on the received global model and/or evaluate the received global model for model selection\n",
+ "3. Send the new model back to the FL server"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "50e2b7dd",
+ "metadata": {},
+ "source": [
+ "To convert a centralized training code to federated learning, we need to\n",
+ "adapt the code to do the following steps:\n",
+ "\n",
+ "1. Obtain the required information from the received `fl_model`\n",
+ "2. Run local training\n",
+ "3. Put the results in a new `fl_model` to be sent back\n",
+ "\n",
+ "For a general use case, there are three essential methods for the Client API:\n",
+ "\n",
+ "* ``init()``: Initializes NVFlare Client API environment.\n",
+ "* ``receive()``: Receives model from NVFlare side.\n",
+ "* ``send()``: Sends the model to NVFlare side.\n",
+ "\n",
+ "You can use the Client API to change centralized training code to\n",
+ "federated learning, for example:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9f0044ea",
+ "metadata": {},
+ "source": [
+ "\n",
+ "```\n",
+ "import nvflare.client as flare\n",
+ "\n",
+ "flare.init() # 1. Initializes NVFlare Client API environment.\n",
+ "input_model = flare.receive() # 2. Receives model from NVFlare side.\n",
+ "params = input_model.params # 3. Obtain the required information from received FLModel\n",
+ "\n",
+ "# original local training code begins\n",
+ "new_params = trainer.fit(params)\n",
+ "# original local training code ends\n",
+ "\n",
+ "output_model = flare.FLModel(params=new_params) # 4. Put the results in a new FLModel\n",
+ "flare.send(output_model) # 5. Sends the model to NVFlare side.\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "494e4079",
+ "metadata": {},
+ "source": [
+ "With 5 lines of code changes, we convert the centralized training code to work in a\n",
+ "federated learning setting.\n",
+ "\n",
+ "After this, we can use the job templates and the Job CLI\n",
+ "to generate a job and export it to run on a deployed NVFlare system or directly run the job using FL Simulator.\n",
+ "\n",
+ "To see a table of the key Client APIs, see the [Client API documentation in the programming guide](https://nvflare.readthedocs.io/en/main/programming_guide/execution_api_type/client_api.html#id2).\n",
+ "\n",
+ "Please consult the [Client API Module](https://nvflare.readthedocs.io/en/main/apidocs/nvflare.client.api.html) for more in-depth information about all of the Client API functions.\n",
+ "\n",
+ "If you are using PyTorch Lightning in your training code, you can check the [Lightning API Module](https://nvflare.readthedocs.io/en/main/apidocs/nvflare.app_opt.lightning.api.html). Also, be sure to look through the [Convert Torch Lightning to FL notebook](../02.2_client_api/convert_torch_lightning_to_federated_learning/convert_torch_lightning_to_fl.ipynb) and related code."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4a09d80e",
+ "metadata": {},
+ "source": [
+ "## Client API with Different Implementations\n",
+ "\n",
+ "Within the Client API, we offer multiple implementations tailored to diverse requirements:\n",
+ "\n",
+ "* In-process Client API: efficient for single GPU training\n",
+ "* Sub-process Client API: flexible for multi-GPU or distributed PyTorch training\n",
+ "\n",
+ "\n",
+ "\n",
+ "### In-process Client API\n",
+ "\n",
+ "In this setup, the client training script operates within the same process as the NVFlare Client job. This configuration, utilizing the ```InProcessClientAPIExecutor```, offers shared memory usage and is efficient with simple configuration. \n",
+ "This is the default for `ScriptRunner` since by default `launch_external_process=False`. Use this configuration for development or single GPU training.\n",
+ "\n",
+ "### Sub-process Client API: Here, the client training script runs in a separate subprocess.\n",
+ "\n",
+ "Utilizing the ```ClientAPILauncherExecutor```, this option offers flexibility in communication mechanisms:\n",
+ " * Communication via CellPipe (default)\n",
+ " * Communication via FilePipe (no capability to stream metrics for experiment tracking) \n",
+ "\n",
+ "This configuration is ideal for scenarios requiring multi-GPU or distributed PyTorch training.\n",
+ "\n",
+ "Choose the option best suited to your specific requirements and workflow preferences.\n",
+ "\n",
+ "These implementations can be easily configured using the JobAPI's `ScriptRunner`.\n",
+ "By default, the ```InProcessClientAPIExecutor``` is used, however setting `launch_external_process=True` uses the ```ClientAPILauncherExecutor```\n",
+ "with pre-configured CellPipes for communication and metrics streaming.\n",
+ "\n",
+ "Lets look at an example of how to use the Client API with PyTorch Lightning and machine learning algorithms.\n",
+ "\n",
+ "* [convert pyTorch lightning to federated learning](../02.2_client_api/convert_torch_lightning_to_federated_learning/convert_torch_lightning_to_fl.ipynb)\n",
+ "\n",
+ "* [converft logistics regression to federatead learning](../02.2_client_api/convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/convert_logistic_regression_to_fl.ipynb)\n",
+ "\n",
+ "* [convert Kmeans to federated learning](../02.2_client_api/convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/convert_kmeans_to_fl.ipynb)\n",
+ "\n",
+ "* [convert survival analysis to federated learning](../02.2_client_api/convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/convert_survival_analysis_to_fl.ipynb)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fffcd761",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "nvflare_example",
+ "language": "python",
+ "name": "nvflare_example"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/data/prepare_heart_disease_data.sh b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/data/prepare_heart_disease_data.sh
new file mode 100755
index 0000000000..c297f15e71
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/data/prepare_heart_disease_data.sh
@@ -0,0 +1,29 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+DATA_DIR=/tmp/flare/dataset/heart_disease_data
+
+# Install dependencies
+#pip install wget
+FLAMBY_INSTALL_DIR=$(python3 -c "import sysconfig; print(sysconfig.get_path('purelib'))")
+# git clone https://github.com/owkin/FLamby.git && cd FLamby && pip install -e .
+
+# Download data using FLamby
+mkdir -p ${DATA_DIR}
+python3 ${FLAMBY_INSTALL_DIR}/flamby/datasets/fed_heart_disease/dataset_creation_scripts/download.py --output-folder ${DATA_DIR}
+
+# Convert data to numpy files
+python3 ${SCRIPT_DIR}/utils/convert_data_to_np.py ${DATA_DIR}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/data/utils/convert_data_to_np.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/data/utils/convert_data_to_np.py
new file mode 100755
index 0000000000..a35ba16084
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/data/utils/convert_data_to_np.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import os
+
+import numpy as np
+from flamby.datasets.fed_heart_disease import FedHeartDisease
+from torch.utils.data import DataLoader as dl
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser("save UCI Heart Disease as numpy arrays.")
+ parser.add_argument("save_dir", type=str, help="directory to save converted numpy arrays as .npy files.")
+ args = parser.parse_args()
+
+ if not os.path.exists(args.save_dir):
+ os.makedirs(args.save_dir, exist_ok=True)
+
+ for site in range(4):
+
+ for flag in ("train", "test"):
+
+ # To load data a pytorch dataset
+ data = FedHeartDisease(center=site, train=(flag == "train"))
+
+ # Save training dataset
+ data_x = []
+ data_y = []
+ for x, y in dl(data, batch_size=1, shuffle=False, num_workers=0):
+ data_x.append(x.cpu().numpy().reshape(-1))
+ data_y.append(y.cpu().numpy().reshape(-1))
+
+ data_x = np.array(data_x).reshape(-1, 13)
+ data_y = np.array(data_y).reshape(-1, 1)
+
+ print("site {} - {} - variables shape: {}".format(site, flag, data_x.shape))
+ print("site {} - {} - outcomes shape: {}".format(site, flag, data_y.shape))
+
+ save_x_path = "{}/site-{}.{}.x.npy".format(args.save_dir, site + 1, flag)
+ print("saving data: {}".format(save_x_path))
+ np.save(save_x_path, data_x)
+
+ save_y_path = "{}/site-{}.{}.y.npy".format(args.save_dir, site + 1, flag)
+ print("saving data: {}".format(save_y_path))
+ np.save(save_y_path, data_y)
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/figs/tb-metrics.png b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/figs/tb-metrics.png
new file mode 100644
index 0000000000..148bacea0f
Binary files /dev/null and b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/figs/tb-metrics.png differ
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/log_config.json b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/log_config.json
new file mode 100644
index 0000000000..e5732b4950
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/log_config.json
@@ -0,0 +1,87 @@
+{
+ "version": 1,
+ "disable_existing_loggers": false,
+ "formatters": {
+ "baseFormatter": {
+ "()": "nvflare.fuel.utils.log_utils.BaseFormatter",
+ "fmt": "%(asctime)s - %(name)s - %(levelname)s - %(fl_ctx)s - %(message)s"
+ },
+ "colorFormatter": {
+ "()": "nvflare.fuel.utils.log_utils.ColorFormatter",
+ "fmt": "%(asctime)s - %(levelname)s - %(message)s",
+ "datefmt": "%Y-%m-%d %H:%M:%S"
+ },
+ "jsonFormatter": {
+ "()": "nvflare.fuel.utils.log_utils.JsonFormatter",
+ "fmt": "%(asctime)s - %(identity)s - %(name)s - %(fullName)s - %(levelname)s - %(fl_ctx)s - %(message)s"
+ }
+ },
+ "filters": {
+ "FLFilter": {
+ "()": "nvflare.fuel.utils.log_utils.LoggerNameFilter",
+ "logger_names": ["custom", "nvflare.app_common", "nvflare.app_opt"]
+ }
+ },
+ "handlers": {
+ "consoleHandler": {
+ "class": "logging.StreamHandler",
+ "level": "INFO",
+ "formatter": "colorFormatter",
+ "filters": ["FLFilter"],
+ "stream": "ext://sys.stdout"
+ },
+ "logFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "DEBUG",
+ "formatter": "baseFormatter",
+ "filename": "log.txt",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10
+ },
+ "errorFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "ERROR",
+ "formatter": "baseFormatter",
+ "filename": "log_error.txt",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10
+ },
+ "jsonFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "DEBUG",
+ "formatter": "jsonFormatter",
+ "filename": "log.json",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10
+ },
+ "FLFileHandler": {
+ "class": "logging.handlers.RotatingFileHandler",
+ "level": "DEBUG",
+ "formatter": "baseFormatter",
+ "filters": ["FLFilter"],
+ "filename": "log_fl.txt",
+ "mode": "a",
+ "maxBytes": 20971520,
+ "backupCount": 10,
+ "delay": true
+ }
+ },
+ "loggers": {
+ "root": {
+ "level": "INFO",
+ "handlers": ["consoleHandler", "logFileHandler", "errorFileHandler", "jsonFileHandler", "FLFileHandler"]
+ }
+ }
+}
+
+
+
+
+
+
+
+
+
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/lr_fl_job.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/lr_fl_job.py
new file mode 100644
index 0000000000..4cc8c55e48
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/lr_fl_job.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from src.newton_raphson_persistor import NewtonRaphsonModelPersistor
+from src.newton_raphson_workflow import FedAvgNewtonRaphson
+
+from nvflare.app_opt.pt.job_config.base_fed_job import BaseFedJob
+from nvflare.client.config import ExchangeFormat
+from nvflare.job_config.script_runner import ScriptRunner
+
+if __name__ == "__main__":
+ n_clients = 4
+ num_rounds = 5
+
+ job = BaseFedJob(
+ name="logistic_regression_fedavg",
+ model_persistor=NewtonRaphsonModelPersistor(n_features=13),
+ )
+
+ controller = FedAvgNewtonRaphson(
+ num_clients=n_clients,
+ num_rounds=num_rounds,
+ damping_factor=0.8,
+ persistor_id="newton_raphson_persistor",
+ )
+ job.to(controller, "server")
+
+ # Add clients
+ for i in range(n_clients):
+ runner = ScriptRunner(
+ script="src/newton_raphson_train.py",
+ script_args="--data_root /tmp/flare/dataset/heart_disease_data",
+ launch_external_process=True,
+ params_exchange_format=ExchangeFormat.RAW,
+ )
+ job.to(runner, f"site-{i + 1}")
+
+ job.export_job("/tmp/nvflare/jobs/job_config")
+ job.simulator_run("/tmp/nvflare/jobs/workdir", gpu="0", log_config="./log_config.json")
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/config/config_fed_client.json b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/config/config_fed_client.json
new file mode 100755
index 0000000000..75413266b0
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/config/config_fed_client.json
@@ -0,0 +1,75 @@
+{
+ "format_version": 2,
+ "app_script": "newton_raphson_train.py",
+ "app_config": "--data_root /tmp/flare/dataset/heart_disease_data",
+ "executors": [
+ {
+ "tasks": [
+ "train"
+ ],
+ "executor": {
+ "path": "nvflare.app_common.executors.client_api_launcher_executor.ClientAPILauncherExecutor",
+ "args": {
+ "launcher_id": "launcher",
+ "pipe_id": "pipe",
+ "heartbeat_timeout": 60,
+ "params_exchange_format": "raw",
+ "params_transfer_type": "FULL",
+ "train_with_evaluation": false
+ }
+ }
+ }
+ ],
+ "task_data_filters": [],
+ "task_result_filters": [],
+ "components": [
+ {
+ "id": "launcher",
+ "path": "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher",
+ "args": {
+ "script": "python3 custom/{app_script} {app_config}",
+ "launch_once": true
+ }
+ },
+ {
+ "id": "pipe",
+ "path": "nvflare.fuel.utils.pipe.cell_pipe.CellPipe",
+ "args": {
+ "mode": "PASSIVE",
+ "site_name": "{SITE_NAME}",
+ "token": "{JOB_ID}",
+ "root_url": "{ROOT_URL}",
+ "secure_mode": "{SECURE_MODE}",
+ "workspace_dir": "{WORKSPACE}"
+ }
+ },
+ {
+ "id": "metrics_pipe",
+ "path": "nvflare.fuel.utils.pipe.cell_pipe.CellPipe",
+ "args": {
+ "mode": "PASSIVE",
+ "site_name": "{SITE_NAME}",
+ "token": "{JOB_ID}",
+ "root_url": "{ROOT_URL}",
+ "secure_mode": "{SECURE_MODE}",
+ "workspace_dir": "{WORKSPACE}"
+ }
+ },
+ {
+ "id": "metric_relay",
+ "path": "nvflare.app_common.widgets.metric_relay.MetricRelay",
+ "args": {
+ "pipe_id": "metrics_pipe",
+ "event_type": "fed.analytix_log_stats",
+ "read_interval": 0.1
+ }
+ },
+ {
+ "id": "client_api_config_preparer",
+ "path": "nvflare.app_common.widgets.external_configurator.ExternalConfigurator",
+ "args": {
+ "component_ids": ["metric_relay"]
+ }
+ }
+ ]
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/config/config_fed_server.json b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/config/config_fed_server.json
new file mode 100755
index 0000000000..43f67b299e
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/config/config_fed_server.json
@@ -0,0 +1,34 @@
+{
+ "format_version": 2,
+ "server": {
+ "heart_beat_timeout": 600
+ },
+ "task_data_filters": [],
+ "task_result_filters": [],
+ "components": [
+ {
+ "id": "newton_raphson_persistor",
+ "path": "newton_raphson_persistor.NewtonRaphsonModelPersistor",
+ "args": {
+ "n_features": 13
+ }
+ },
+ {
+ "id": "tb_analytics_receiver",
+ "path": "nvflare.app_opt.tracking.tb.tb_receiver.TBAnalyticsReceiver",
+ "args.events": ["fed.analytix_log_stats"]
+ }
+ ],
+ "workflows": [
+ {
+ "id": "fedavg_newton_raphson",
+ "path": "newton_raphson_workflow.FedAvgNewtonRaphson",
+ "args": {
+ "num_clients": 4,
+ "num_rounds": 5,
+ "damping_factor": 0.8,
+ "persistor_id": "newton_raphson_persistor"
+ }
+ }
+ ]
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_persistor.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_persistor.py
new file mode 100644
index 0000000000..5b324dd50c
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_persistor.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+
+from nvflare.app_common.np.np_model_persistor import NPModelPersistor
+
+
+class NewtonRaphsonModelPersistor(NPModelPersistor):
+ """
+ This class defines the persistor for Newton Raphson model.
+
+ A persistor controls the logic behind initializing, loading
+ and saving of the model / parameters for each round of a
+ federated learning process.
+
+ In the 2nd order Newton Raphson case, a model is just a
+ 1-D numpy vector containing the parameters for logistic
+ regression. The length of the parameter vector is defined
+ by the number of features in the dataset.
+
+ """
+
+ def __init__(self, model_dir="models", model_name="weights.npy", n_features=13):
+ """
+ Init function for NewtonRaphsonModelPersistor.
+
+ Args:
+ model_dir: sub-folder name to save and load the global model
+ between rounds.
+ model_name: name to save and load the global model.
+ n_features: number of features for the logistic regression.
+ For the UCI ML heart Disease dataset, this is 13.
+
+ """
+
+ super().__init__()
+
+ self.model_dir = model_dir
+ self.model_name = model_name
+ self.n_features = n_features
+
+ # A default model is loaded when no local model is available.
+ # This happen when training starts.
+ #
+ # A `model` for a binary logistic regression is just a matrix,
+ # with shape (n_features + 1, 1).
+ # For the UCI ML Heart Disease dataset, the n_features = 13.
+ #
+ # A default matrix with value 0s is created.
+ #
+ self.default_data = np.zeros((self.n_features + 1, 1), dtype=np.float32)
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_train.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_train.py
new file mode 100644
index 0000000000..419b9ed70b
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_train.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import os
+
+import numpy as np
+from sklearn.metrics import accuracy_score, precision_score
+
+import nvflare.client as flare
+from nvflare.apis.fl_constant import FLMetaKey
+from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
+from nvflare.app_common.np.constants import NPConstants
+from nvflare.client.tracking import SummaryWriter
+
+
+def parse_arguments():
+ """
+ Parse command line args for client side training.
+ """
+ parser = argparse.ArgumentParser(description="Federated Second-Order Newton Raphson")
+
+ parser.add_argument("--data_root", type=str, help="Path to load client side data.")
+
+ return parser.parse_args()
+
+
+def load_data(data_root, site_name):
+ """
+ Load the data for each client.
+
+ Args:
+ data_root: root directory storing client site data.
+ site_name: client site name
+ Returns:
+ A dict with client site training and validation data.
+ """
+ print("loading data for client {} from: {}".format(site_name, data_root))
+ train_x_path = os.path.join(data_root, "{}.train.x.npy".format(site_name))
+ train_y_path = os.path.join(data_root, "{}.train.y.npy".format(site_name))
+ test_x_path = os.path.join(data_root, "{}.test.x.npy".format(site_name))
+ test_y_path = os.path.join(data_root, "{}.test.y.npy".format(site_name))
+
+ train_X = np.load(train_x_path)
+ train_y = np.load(train_y_path)
+ valid_X = np.load(test_x_path)
+ valid_y = np.load(test_y_path)
+
+ return {"train_X": train_X, "train_y": train_y, "valid_X": valid_X, "valid_y": valid_y}
+
+
+def sigmoid(inp):
+ return 1.0 / (1.0 + np.exp(-inp))
+
+
+def train_newton_raphson(data, theta):
+ """
+ Compute gradient and hessian on local data
+ based on paramters received from server.
+
+ """
+ train_X = data["train_X"]
+ train_y = data["train_y"]
+
+ # Add intercept, pre-pend 1s to as first
+ # column of train_X
+ train_X = np.concatenate((np.ones((train_X.shape[0], 1)), train_X), axis=1)
+
+ # Compute probabilities from current weights
+ proba = sigmoid(np.dot(train_X, theta))
+
+ # The gradient is X^T . (y - proba)
+ gradient = np.dot(train_X.T, (train_y - proba))
+
+ # The hessian is X^T . D . X, where D is the
+ # diagnoal matrix with values proba * (1 - proba)
+ D = np.diag((proba * (1 - proba))[:, 0])
+ hessian = train_X.T.dot(D).dot(train_X)
+
+ return {"gradient": gradient, "hessian": hessian}
+
+
+def validate(data, theta):
+ """
+ Performs local validation.
+ Computes accuracy and precision scores.
+
+ """
+ valid_X = data["valid_X"]
+ valid_y = data["valid_y"]
+
+ # Add intercept, pre-pend 1s to as first
+ # column of valid_X
+ valid_X = np.concatenate((np.ones((valid_X.shape[0], 1)), valid_X), axis=1)
+
+ # Compute probabilities from current weights
+ proba = sigmoid(np.dot(valid_X, theta))
+
+ return {"accuracy": accuracy_score(valid_y, proba.round()), "precision": precision_score(valid_y, proba.round())}
+
+
+def main():
+ """
+ This is a typical ML training loop,
+ augmented with Flare Client API to
+ perform local training on each client
+ side and send result to server.
+
+ """
+ args = parse_arguments()
+
+ flare.init()
+
+ site_name = flare.get_site_name()
+ print("training on client site: {}".format(site_name))
+
+ # Load client site data.
+ data = load_data(args.data_root, site_name)
+
+ # Get metric summary writer
+ writer = SummaryWriter()
+
+ while flare.is_running():
+
+ # Receive global model (FLModel) from server.
+ global_model = flare.receive()
+
+ curr_round = global_model.current_round
+ print("current_round={}".format(curr_round))
+
+ print(
+ ("[ROUND {}] - client site: {}, received " "global model: {}").format(curr_round, site_name, global_model)
+ )
+
+ # Get the weights, aka parameter theta for
+ # logistic regression.
+ global_weights = global_model.params[NPConstants.NUMPY_KEY]
+ print("[ROUND {}] - global model weights: {}".format(curr_round, global_weights))
+
+ # Local validation before training
+ print(("[ROUND {}] - start validation of global " "model on client: {}").format(curr_round, site_name))
+ validation_scores = validate(data, global_weights)
+ print(
+ ("[ROUND {}] - validation metric scores on " "client: {} = {}").format(
+ curr_round, site_name, validation_scores
+ )
+ )
+
+ # Write validation metric summary
+ writer.add_scalar("{}/accuracy".format(site_name), validation_scores["accuracy"], curr_round)
+
+ writer.add_scalar("{}/precision".format(site_name), validation_scores["precision"], curr_round)
+
+ # Local training
+ print(("[ROUND {}] - start local training on client " "site: {}").format(curr_round, site_name))
+ result_dict = train_newton_raphson(data, theta=global_weights)
+
+ # Send result to server for aggregation.
+ result_model = FLModel(params=result_dict, params_type=ParamsType.FULL)
+ result_model.meta[FLMetaKey.NUM_STEPS_CURRENT_ROUND] = data["train_X"].shape[0]
+
+ print(
+ (
+ "[ROUND {}] - local newton raphson training from " "client: {} complete, sending results to server: {}"
+ ).format(curr_round, site_name, result_model)
+ )
+
+ flare.send(result_model)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_workflow.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_workflow.py
new file mode 100644
index 0000000000..a4094cb7f6
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/app/custom/newton_raphson_workflow.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import List
+
+import numpy as np
+
+from nvflare.apis.fl_constant import FLMetaKey
+from nvflare.app_common.abstract.fl_model import FLModel
+from nvflare.app_common.aggregators.weighted_aggregation_helper import WeightedAggregationHelper
+from nvflare.app_common.app_constant import AppConstants
+from nvflare.app_common.np.constants import NPConstants
+from nvflare.app_common.workflows.base_fedavg import BaseFedAvg
+
+
+class FedAvgNewtonRaphson(BaseFedAvg):
+ def __init__(self, damping_factor, epsilon=1.0, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ """
+ Init function for FedAvgNewtonRaphson.
+
+ Args:
+ damping_factor: damping factor for Newton Raphson updates.
+ epsilon: a regularization factor to avoid empty hessian for
+ matrix inversion
+ """
+ self.damping_factor = damping_factor
+ self.epsilon = epsilon
+ self.aggregator = WeightedAggregationHelper()
+
+ def run(self) -> None:
+ """
+ The run function executes the logic of federated
+ second order Newton Raphson optimization.
+
+ """
+ self.info("starting Federated Averaging Netwon Raphson ...")
+
+ # First load the model and set up some training params.
+ # A `persisitor` (NewtonRaphsonModelPersistor) will load
+ # the model in `ModelLearnable` format, then will be
+ # converted `FLModel` by `ModelController`.
+ #
+ model = self.load_model()
+
+ model.start_round = self.start_round
+ model.total_rounds = self.num_rounds
+
+ self.info("Server side model loader: {}".format(model))
+
+ for self.current_round in range(self.start_round, self.start_round + self.num_rounds):
+ self.info(f"Round {self.current_round} started.")
+
+ # Get the list of clients.
+ clients = self.sample_clients(self.num_clients)
+
+ model.current_round = self.current_round
+
+ # Send training task and current global model to clients.
+ #
+ # A `task` isntance will be created, and sent
+ # to clients, the model is first converted to a shareable
+ # and is attached to the task.
+ #
+ # After the task is finished, the result (shareable) recieved
+ # from the task is converted to FLModel, and is returned to the
+ # server. The `results` below is a list with result (FLModel)
+ # from all clients.
+ #
+ # The full logic of `task` is implemented in:
+ # https://github.com/NVIDIA/NVFlare/blob/d6827bca96d332adb3402ceceb4b67e876146067/nvflare/app_common/workflows/model_controller.py#L178
+ #
+ self.info("sending server side global model to clients")
+ results = self.send_model_and_wait(targets=clients, data=model)
+
+ # Aggregate results receieved from clients.
+ aggregate_results = self.aggregate(results, aggregate_fn=self.newton_raphson_aggregator_fn)
+
+ # Update global model based on the following formula:
+ # weights = weights + updates, where
+ # updates = -damping_factor * Hessian^{-1} . Gradient
+ self.update_model(model, aggregate_results)
+
+ # Save global model.
+ self.save_model(model)
+
+ self.info("Finished FedAvg.")
+
+ def newton_raphson_aggregator_fn(self, results: List[FLModel]):
+ """
+ Custom aggregator function for second order Newton Raphson
+ optimization.
+
+ This uses the default thread-safe WeightedAggregationHelper,
+ which implement a weighted average of all values received from
+ a `result` dictionary.
+
+ Args:
+ results: a list of `FLModel`s. Each `FLModel` is received
+ from a client. The field `params` is a dictionary that
+ contains values to be aggregated: the gradient and hessian.
+ """
+ self.info("receieved results from clients: {}".format(results))
+
+ # On client side the `NUM_STEPS_CURRENT_ROUND` key
+ # is used to track the number of samples for each client.
+ for curr_result in results:
+ self.aggregator.add(
+ data=curr_result.params,
+ weight=curr_result.meta.get(FLMetaKey.NUM_STEPS_CURRENT_ROUND, 1.0),
+ contributor_name=curr_result.meta.get("client_name", AppConstants.CLIENT_UNKNOWN),
+ contribution_round=curr_result.current_round,
+ )
+
+ aggregated_dict = self.aggregator.get_result()
+ self.info("aggregated result: {}".format(aggregated_dict))
+
+ # Compute global model update:
+ # update = - damping_factor * Hessian^{-1} . Gradient
+ # A regularization is added to avoid empty hessian.
+ #
+ reg = self.epsilon * np.eye(aggregated_dict["hessian"].shape[0])
+ newton_raphson_updates = self.damping_factor * np.linalg.solve(
+ aggregated_dict["hessian"] + reg, aggregated_dict["gradient"]
+ )
+ self.info("newton raphson updates: {}".format(newton_raphson_updates))
+
+ # Convert the aggregated result to `FLModel`, this `FLModel`
+ # will then be used by `update_model` method from the base class,
+ # to update the global model weights.
+ #
+ aggr_result = FLModel(
+ params={"newton_raphson_updates": newton_raphson_updates},
+ params_type=results[0].params_type,
+ meta={
+ "nr_aggregated": len(results),
+ AppConstants.CURRENT_ROUND: results[0].current_round,
+ AppConstants.NUM_ROUNDS: self.num_rounds,
+ },
+ )
+ return aggr_result
+
+ def update_model(self, model, model_update, replace_meta=True) -> FLModel:
+ """
+ Update logistic regression parameters based on
+ aggregated gradient and hessian.
+
+ """
+ if replace_meta:
+ model.meta = model_update.meta
+ else:
+ model.meta.update(model_update.meta)
+
+ model.metrics = model_update.metrics
+ model.params[NPConstants.NUMPY_KEY] += model_update.params["newton_raphson_updates"]
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/meta.json b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/meta.json
new file mode 100644
index 0000000000..c157e9f65a
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/newton_raphson/meta.json
@@ -0,0 +1,10 @@
+{
+ "name": "newton_raphson",
+ "resource_spec": {},
+ "min_clients" : 4,
+ "deploy_map": {
+ "app": [
+ "@ALL"
+ ]
+ }
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/requirements.txt b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/requirements.txt
new file mode 100644
index 0000000000..513c8f8be0
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/requirements.txt
@@ -0,0 +1,2 @@
+flamby @ git+https://github.com/owkin/FLamby.git@main
+wget==3.2
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_persistor.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_persistor.py
new file mode 100644
index 0000000000..5b324dd50c
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_persistor.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import numpy as np
+
+from nvflare.app_common.np.np_model_persistor import NPModelPersistor
+
+
+class NewtonRaphsonModelPersistor(NPModelPersistor):
+ """
+ This class defines the persistor for Newton Raphson model.
+
+ A persistor controls the logic behind initializing, loading
+ and saving of the model / parameters for each round of a
+ federated learning process.
+
+ In the 2nd order Newton Raphson case, a model is just a
+ 1-D numpy vector containing the parameters for logistic
+ regression. The length of the parameter vector is defined
+ by the number of features in the dataset.
+
+ """
+
+ def __init__(self, model_dir="models", model_name="weights.npy", n_features=13):
+ """
+ Init function for NewtonRaphsonModelPersistor.
+
+ Args:
+ model_dir: sub-folder name to save and load the global model
+ between rounds.
+ model_name: name to save and load the global model.
+ n_features: number of features for the logistic regression.
+ For the UCI ML heart Disease dataset, this is 13.
+
+ """
+
+ super().__init__()
+
+ self.model_dir = model_dir
+ self.model_name = model_name
+ self.n_features = n_features
+
+ # A default model is loaded when no local model is available.
+ # This happen when training starts.
+ #
+ # A `model` for a binary logistic regression is just a matrix,
+ # with shape (n_features + 1, 1).
+ # For the UCI ML Heart Disease dataset, the n_features = 13.
+ #
+ # A default matrix with value 0s is created.
+ #
+ self.default_data = np.zeros((self.n_features + 1, 1), dtype=np.float32)
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_train.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_train.py
new file mode 100644
index 0000000000..419b9ed70b
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_train.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import os
+
+import numpy as np
+from sklearn.metrics import accuracy_score, precision_score
+
+import nvflare.client as flare
+from nvflare.apis.fl_constant import FLMetaKey
+from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
+from nvflare.app_common.np.constants import NPConstants
+from nvflare.client.tracking import SummaryWriter
+
+
+def parse_arguments():
+ """
+ Parse command line args for client side training.
+ """
+ parser = argparse.ArgumentParser(description="Federated Second-Order Newton Raphson")
+
+ parser.add_argument("--data_root", type=str, help="Path to load client side data.")
+
+ return parser.parse_args()
+
+
+def load_data(data_root, site_name):
+ """
+ Load the data for each client.
+
+ Args:
+ data_root: root directory storing client site data.
+ site_name: client site name
+ Returns:
+ A dict with client site training and validation data.
+ """
+ print("loading data for client {} from: {}".format(site_name, data_root))
+ train_x_path = os.path.join(data_root, "{}.train.x.npy".format(site_name))
+ train_y_path = os.path.join(data_root, "{}.train.y.npy".format(site_name))
+ test_x_path = os.path.join(data_root, "{}.test.x.npy".format(site_name))
+ test_y_path = os.path.join(data_root, "{}.test.y.npy".format(site_name))
+
+ train_X = np.load(train_x_path)
+ train_y = np.load(train_y_path)
+ valid_X = np.load(test_x_path)
+ valid_y = np.load(test_y_path)
+
+ return {"train_X": train_X, "train_y": train_y, "valid_X": valid_X, "valid_y": valid_y}
+
+
+def sigmoid(inp):
+ return 1.0 / (1.0 + np.exp(-inp))
+
+
+def train_newton_raphson(data, theta):
+ """
+ Compute gradient and hessian on local data
+ based on paramters received from server.
+
+ """
+ train_X = data["train_X"]
+ train_y = data["train_y"]
+
+ # Add intercept, pre-pend 1s to as first
+ # column of train_X
+ train_X = np.concatenate((np.ones((train_X.shape[0], 1)), train_X), axis=1)
+
+ # Compute probabilities from current weights
+ proba = sigmoid(np.dot(train_X, theta))
+
+ # The gradient is X^T . (y - proba)
+ gradient = np.dot(train_X.T, (train_y - proba))
+
+ # The hessian is X^T . D . X, where D is the
+ # diagnoal matrix with values proba * (1 - proba)
+ D = np.diag((proba * (1 - proba))[:, 0])
+ hessian = train_X.T.dot(D).dot(train_X)
+
+ return {"gradient": gradient, "hessian": hessian}
+
+
+def validate(data, theta):
+ """
+ Performs local validation.
+ Computes accuracy and precision scores.
+
+ """
+ valid_X = data["valid_X"]
+ valid_y = data["valid_y"]
+
+ # Add intercept, pre-pend 1s to as first
+ # column of valid_X
+ valid_X = np.concatenate((np.ones((valid_X.shape[0], 1)), valid_X), axis=1)
+
+ # Compute probabilities from current weights
+ proba = sigmoid(np.dot(valid_X, theta))
+
+ return {"accuracy": accuracy_score(valid_y, proba.round()), "precision": precision_score(valid_y, proba.round())}
+
+
+def main():
+ """
+ This is a typical ML training loop,
+ augmented with Flare Client API to
+ perform local training on each client
+ side and send result to server.
+
+ """
+ args = parse_arguments()
+
+ flare.init()
+
+ site_name = flare.get_site_name()
+ print("training on client site: {}".format(site_name))
+
+ # Load client site data.
+ data = load_data(args.data_root, site_name)
+
+ # Get metric summary writer
+ writer = SummaryWriter()
+
+ while flare.is_running():
+
+ # Receive global model (FLModel) from server.
+ global_model = flare.receive()
+
+ curr_round = global_model.current_round
+ print("current_round={}".format(curr_round))
+
+ print(
+ ("[ROUND {}] - client site: {}, received " "global model: {}").format(curr_round, site_name, global_model)
+ )
+
+ # Get the weights, aka parameter theta for
+ # logistic regression.
+ global_weights = global_model.params[NPConstants.NUMPY_KEY]
+ print("[ROUND {}] - global model weights: {}".format(curr_round, global_weights))
+
+ # Local validation before training
+ print(("[ROUND {}] - start validation of global " "model on client: {}").format(curr_round, site_name))
+ validation_scores = validate(data, global_weights)
+ print(
+ ("[ROUND {}] - validation metric scores on " "client: {} = {}").format(
+ curr_round, site_name, validation_scores
+ )
+ )
+
+ # Write validation metric summary
+ writer.add_scalar("{}/accuracy".format(site_name), validation_scores["accuracy"], curr_round)
+
+ writer.add_scalar("{}/precision".format(site_name), validation_scores["precision"], curr_round)
+
+ # Local training
+ print(("[ROUND {}] - start local training on client " "site: {}").format(curr_round, site_name))
+ result_dict = train_newton_raphson(data, theta=global_weights)
+
+ # Send result to server for aggregation.
+ result_model = FLModel(params=result_dict, params_type=ParamsType.FULL)
+ result_model.meta[FLMetaKey.NUM_STEPS_CURRENT_ROUND] = data["train_X"].shape[0]
+
+ print(
+ (
+ "[ROUND {}] - local newton raphson training from " "client: {} complete, sending results to server: {}"
+ ).format(curr_round, site_name, result_model)
+ )
+
+ flare.send(result_model)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_workflow.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_workflow.py
new file mode 100644
index 0000000000..a4094cb7f6
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/src/newton_raphson_workflow.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import List
+
+import numpy as np
+
+from nvflare.apis.fl_constant import FLMetaKey
+from nvflare.app_common.abstract.fl_model import FLModel
+from nvflare.app_common.aggregators.weighted_aggregation_helper import WeightedAggregationHelper
+from nvflare.app_common.app_constant import AppConstants
+from nvflare.app_common.np.constants import NPConstants
+from nvflare.app_common.workflows.base_fedavg import BaseFedAvg
+
+
+class FedAvgNewtonRaphson(BaseFedAvg):
+ def __init__(self, damping_factor, epsilon=1.0, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ """
+ Init function for FedAvgNewtonRaphson.
+
+ Args:
+ damping_factor: damping factor for Newton Raphson updates.
+ epsilon: a regularization factor to avoid empty hessian for
+ matrix inversion
+ """
+ self.damping_factor = damping_factor
+ self.epsilon = epsilon
+ self.aggregator = WeightedAggregationHelper()
+
+ def run(self) -> None:
+ """
+ The run function executes the logic of federated
+ second order Newton Raphson optimization.
+
+ """
+ self.info("starting Federated Averaging Netwon Raphson ...")
+
+ # First load the model and set up some training params.
+ # A `persisitor` (NewtonRaphsonModelPersistor) will load
+ # the model in `ModelLearnable` format, then will be
+ # converted `FLModel` by `ModelController`.
+ #
+ model = self.load_model()
+
+ model.start_round = self.start_round
+ model.total_rounds = self.num_rounds
+
+ self.info("Server side model loader: {}".format(model))
+
+ for self.current_round in range(self.start_round, self.start_round + self.num_rounds):
+ self.info(f"Round {self.current_round} started.")
+
+ # Get the list of clients.
+ clients = self.sample_clients(self.num_clients)
+
+ model.current_round = self.current_round
+
+ # Send training task and current global model to clients.
+ #
+ # A `task` isntance will be created, and sent
+ # to clients, the model is first converted to a shareable
+ # and is attached to the task.
+ #
+ # After the task is finished, the result (shareable) recieved
+ # from the task is converted to FLModel, and is returned to the
+ # server. The `results` below is a list with result (FLModel)
+ # from all clients.
+ #
+ # The full logic of `task` is implemented in:
+ # https://github.com/NVIDIA/NVFlare/blob/d6827bca96d332adb3402ceceb4b67e876146067/nvflare/app_common/workflows/model_controller.py#L178
+ #
+ self.info("sending server side global model to clients")
+ results = self.send_model_and_wait(targets=clients, data=model)
+
+ # Aggregate results receieved from clients.
+ aggregate_results = self.aggregate(results, aggregate_fn=self.newton_raphson_aggregator_fn)
+
+ # Update global model based on the following formula:
+ # weights = weights + updates, where
+ # updates = -damping_factor * Hessian^{-1} . Gradient
+ self.update_model(model, aggregate_results)
+
+ # Save global model.
+ self.save_model(model)
+
+ self.info("Finished FedAvg.")
+
+ def newton_raphson_aggregator_fn(self, results: List[FLModel]):
+ """
+ Custom aggregator function for second order Newton Raphson
+ optimization.
+
+ This uses the default thread-safe WeightedAggregationHelper,
+ which implement a weighted average of all values received from
+ a `result` dictionary.
+
+ Args:
+ results: a list of `FLModel`s. Each `FLModel` is received
+ from a client. The field `params` is a dictionary that
+ contains values to be aggregated: the gradient and hessian.
+ """
+ self.info("receieved results from clients: {}".format(results))
+
+ # On client side the `NUM_STEPS_CURRENT_ROUND` key
+ # is used to track the number of samples for each client.
+ for curr_result in results:
+ self.aggregator.add(
+ data=curr_result.params,
+ weight=curr_result.meta.get(FLMetaKey.NUM_STEPS_CURRENT_ROUND, 1.0),
+ contributor_name=curr_result.meta.get("client_name", AppConstants.CLIENT_UNKNOWN),
+ contribution_round=curr_result.current_round,
+ )
+
+ aggregated_dict = self.aggregator.get_result()
+ self.info("aggregated result: {}".format(aggregated_dict))
+
+ # Compute global model update:
+ # update = - damping_factor * Hessian^{-1} . Gradient
+ # A regularization is added to avoid empty hessian.
+ #
+ reg = self.epsilon * np.eye(aggregated_dict["hessian"].shape[0])
+ newton_raphson_updates = self.damping_factor * np.linalg.solve(
+ aggregated_dict["hessian"] + reg, aggregated_dict["gradient"]
+ )
+ self.info("newton raphson updates: {}".format(newton_raphson_updates))
+
+ # Convert the aggregated result to `FLModel`, this `FLModel`
+ # will then be used by `update_model` method from the base class,
+ # to update the global model weights.
+ #
+ aggr_result = FLModel(
+ params={"newton_raphson_updates": newton_raphson_updates},
+ params_type=results[0].params_type,
+ meta={
+ "nr_aggregated": len(results),
+ AppConstants.CURRENT_ROUND: results[0].current_round,
+ AppConstants.NUM_ROUNDS: self.num_rounds,
+ },
+ )
+ return aggr_result
+
+ def update_model(self, model, model_update, replace_meta=True) -> FLModel:
+ """
+ Update logistic regression parameters based on
+ aggregated gradient and hessian.
+
+ """
+ if replace_meta:
+ model.meta = model_update.meta
+ else:
+ model.meta.update(model_update.meta)
+
+ model.metrics = model_update.metrics
+ model.params[NPConstants.NUMPY_KEY] += model_update.params["newton_raphson_updates"]
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/train_centralized.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/train_centralized.py
new file mode 100755
index 0000000000..c64ee6cb6e
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/code/train_centralized.py
@@ -0,0 +1,118 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import os
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, precision_score
+
+DATA_ROOT = "/tmp/flare/dataset/heart_disease_data/"
+
+MAX_ITERS = 4
+EPSILON = 1.0
+
+
+def sigmoid(inp):
+ return 1.0 / (1.0 + np.exp(-inp))
+
+
+def lr_solver(X, y):
+ """
+ Custom logistic regression solver using Newton Raphson
+ method.
+
+ """
+ n_features = X.shape[1]
+ theta = np.zeros((n_features + 1, 1))
+ X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
+
+ for iter in range(MAX_ITERS):
+ proba = sigmoid(np.dot(X, theta))
+ gradient = np.dot(X.T, (y - proba))
+ D = np.diag((proba * (1 - proba))[:, 0])
+ hessian = X.T.dot(D).dot(X)
+
+ reg = EPSILON * np.eye(hessian.shape[0])
+ updates = np.linalg.solve(hessian + reg, gradient)
+
+ theta += updates
+
+ return theta
+
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--solver",
+ type=str,
+ default="custom",
+ help=("which solver to use: custom (default) or sklearn " "LogisticRegression. The results are the same. "),
+ )
+ args = parser.parse_args()
+
+ print("using solver:", args.solver)
+
+ print("loading training data.")
+ train_X = np.concatenate(
+ (
+ np.load(os.path.join(DATA_ROOT, "site-1.train.x.npy")),
+ np.load(os.path.join(DATA_ROOT, "site-2.train.x.npy")),
+ np.load(os.path.join(DATA_ROOT, "site-3.train.x.npy")),
+ np.load(os.path.join(DATA_ROOT, "site-4.train.x.npy")),
+ )
+ )
+ train_y = np.concatenate(
+ (
+ np.load(os.path.join(DATA_ROOT, "site-1.train.y.npy")),
+ np.load(os.path.join(DATA_ROOT, "site-2.train.y.npy")),
+ np.load(os.path.join(DATA_ROOT, "site-3.train.y.npy")),
+ np.load(os.path.join(DATA_ROOT, "site-4.train.y.npy")),
+ )
+ )
+
+if args.solver == "sklearn":
+ train_y = train_y.reshape(-1)
+
+print("training data X loaded. shape:", train_X.shape)
+print("training data y loaded. shape:", train_y.shape)
+
+if args.solver == "sklearn":
+ clf = LogisticRegression(random_state=0, solver="newton-cholesky", verbose=1).fit(train_X, train_y)
+
+else:
+ theta = lr_solver(train_X, train_y)
+
+for site in range(4):
+
+ print("\nsite - {}".format(site + 1))
+ test_X = np.load(os.path.join(DATA_ROOT, "site-{}.test.x.npy".format(site + 1)))
+ test_y = np.load(os.path.join(DATA_ROOT, "site-{}.test.y.npy".format(site + 1)))
+ test_y = test_y.reshape(-1)
+
+ print("validation set n_samples: ", test_X.shape[0])
+
+ if args.solver == "sklearn":
+ proba = clf.predict_proba(test_X)
+ proba = proba[:, 1]
+
+ else:
+ test_X = np.concatenate((np.ones((test_X.shape[0], 1)), test_X), axis=1)
+ proba = sigmoid(np.dot(test_X, theta))
+
+ print("accuracy:", accuracy_score(test_y, proba.round()))
+ print("precision:", precision_score(test_y, proba.round()))
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/convert_logistic_regression_to_fl.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/convert_logistic_regression_to_fl.ipynb
new file mode 100644
index 0000000000..5bf4d3f4f1
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/convert_logistic_regression_to_fl.ipynb
@@ -0,0 +1,341 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "e8c19632",
+ "metadata": {},
+ "source": [
+ "# Converting Logistic Regression to FL"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f9d96ed",
+ "metadata": {},
+ "source": [
+ "## Federated Logistic Regression with Second-Order Newton-Raphson optimization\n",
+ "This example shows how to implement a federated binary classification via logistic regression with second-order Newton-Raphson optimization.\n",
+ "\n",
+ "The [UCI Heart Disease dataset](https://archive.ics.uci.edu/dataset/45/heart+disease) is\n",
+ "used in this example. Scripts are provided to download and process the\n",
+ "dataset as described\n",
+ "[here](https://github.com/owkin/FLamby/tree/main/flamby/datasets/fed_heart_disease).\n",
+ "\n",
+ "This dataset contains samples from 4 sites, splitted into training and\n",
+ "testing sets as described below:\n",
+ "|site | sample split |\n",
+ "|-------------|---------------------------------------|\n",
+ "|Cleveland | train: 199 samples, test: 104 samples |\n",
+ "|Hungary | train: 172 samples, test: 89 samples |\n",
+ "|Switzerland | train: 30 samples, test: 16 samples |\n",
+ "|Long Beach V | train: 85 samples, test: 45 samples |\n",
+ "\n",
+ "The number of features in each sample is 13."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e54f0dcc",
+ "metadata": {},
+ "source": [
+ "## Introduction\n",
+ "\n",
+ "The [Newton-Raphson\n",
+ "optimization](https://en.wikipedia.org/wiki/Newton%27s_method) problem\n",
+ "can be described as follows.\n",
+ "\n",
+ "In a binary classification task with logistic regression, the\n",
+ "probability of a data sample $x$ classified as positive is formulated\n",
+ "as:\n",
+ "$$p(x) = \\sigma(\\beta \\cdot x + \\beta_{0})$$\n",
+ "where $\\sigma(.)$ denotes the sigmoid function. We can incorporate\n",
+ "$\\beta_{0}$ and $\\beta$ into a single parameter vector $\\theta =\n",
+ "( \\beta_{0}, \\beta)$. Let $d$ be the number\n",
+ "of features for each data sample $x$ and let $N$ be the number of data\n",
+ "samples. We then have the matrix version of the above probability\n",
+ "equation:\n",
+ "$$p(X) = \\sigma( X \\theta )$$\n",
+ "Here $X$ is the matrix of all samples, with shape $N \\times (d+1)$,\n",
+ "having it's first column filled with value 1 to account for the\n",
+ "intercept $\\theta_{0}$.\n",
+ "\n",
+ "The goal is to compute parameter vector $\\theta$ that maximizes the\n",
+ "below likelihood function:\n",
+ "$$L_{\\theta} = \\prod_{i=1}^{N} p(x_i)^{y_i} (1 - p(x_i)^{1-y_i})$$\n",
+ "\n",
+ "The Newton-Raphson method optimizes the likelihood function via\n",
+ "quadratic approximation. Omitting the maths, the theoretical update\n",
+ "formula for parameter vector $\\theta$ is:\n",
+ "$$\\theta^{n+1} = \\theta^{n} - H_{\\theta^{n}}^{-1} \\nabla L_{\\theta^{n}}$$\n",
+ "where\n",
+ "$$\\nabla L_{\\theta^{n}} = X^{T}(y - p(X))$$\n",
+ "is the gradient of the likelihood function, with $y$ being the vector\n",
+ "of ground truth for sample data matrix $X$, and\n",
+ "$$H_{\\theta^{n}} = -X^{T} D X$$\n",
+ "is the Hessian of the likelihood function, with $D$ a diagonal matrix\n",
+ "where diagonal value at $(i,i)$ is $D(i,i) = p(x_i) (1 - p(x_i))$.\n",
+ "\n",
+ "In federated Newton-Raphson optimization, each client will compute its\n",
+ "own gradient $\\nabla L_{\\theta^{n}}$ and Hessian $H_{\\theta^{n}}$\n",
+ "based on local training samples. A server will aggregate the gradients\n",
+ "and Hessians computed from all clients, and perform the update of\n",
+ "parameter $\\theta$ based on the theoretical update formula described\n",
+ "above."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32003ba9",
+ "metadata": {},
+ "source": [
+ "## Implementation\n",
+ "\n",
+ "Using `nvflare`, The federated logistic regression with Newton-Raphson\n",
+ "optimization is implemented as follows.\n",
+ "\n",
+ "On the server side, all workflow logics are implemented in\n",
+ "class `FedAvgNewtonRaphson`, which can be found\n",
+ "[here](code/newton_raphson/app/custom/newton_raphson_workflow.py). The\n",
+ "`FedAvgNewtonRaphson` class inherits from the\n",
+ "[`BaseFedAvg`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/workflows/base_fedavg.py)\n",
+ "class, which itself inherits from the **ModelController**\n",
+ "([`ModelController`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/workflows/model_controller.py))\n",
+ "class. This is the preferrable approach to implement a custom\n",
+ "workflow, since `ModelController` decouples communication logic from\n",
+ "actual workflow (training & validation) logic. The mandatory\n",
+ "method to override in `ModelController` is the\n",
+ "[`run()`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/workflows/model_controller.py#L37)\n",
+ "method, where the orchestration of server-side workflow actually\n",
+ "happens. The implementation of `run()` method in\n",
+ "[`FedAvgNewtonRaphson`](code/newton_raphson/app/custom/newton_raphson_workflow.py)\n",
+ "is similar to the classic\n",
+ "[`FedAvg`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/workflows/fedavg.py#L44):\n",
+ "- Initialize the global model, this is acheived through method `load_model()`\n",
+ " from base class\n",
+ " [`ModelController`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/workflows/model_controller.py#L292),\n",
+ " which relies on the\n",
+ " [`ModelPersistor`](https://nvflare.readthedocs.io/en/main/glossary.html#persistor). A\n",
+ " custom\n",
+ " [`NewtonRaphsonModelPersistor`](code/newton_raphson/app/custom/newton_raphson_persistor.py)\n",
+ " is implemented in this example, which is based on the\n",
+ " [`NPModelPersistor`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/np/np_model_persistor.py)\n",
+ " for numpy data, since the _model_ in the case of logistic regression\n",
+ " is just the parameter vector $\\theta$ that can be represented by a\n",
+ " numpy array. Only the `__init__` method needs to be re-implemented\n",
+ " to provide a proper initialization for the global parameter vector\n",
+ " $\\theta$.\n",
+ "- During each training round, the global model will be sent to the\n",
+ " list of participating clients to perform a training task. This is\n",
+ " done using the\n",
+ " [`send_model_and_wait()`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/workflows/model_controller.py#L41)\n",
+ " method. Once\n",
+ " the clients finish their local training, results will be collected\n",
+ " and sent back to server as\n",
+ " [`FLModel`](https://nvflare.readthedocs.io/en/main/programming_guide/fl_model.html#flmodel)s.\n",
+ "- Results sent by clients contain their locally computed gradient and\n",
+ " Hessian. A [custom aggregation\n",
+ " function](code/newton_raphson/app/custom/newton_raphson_workflow.py)\n",
+ " is implemented to get the averaged gradient and Hessian, and compute\n",
+ " the Newton-Raphson update for the global parameter vector $\\theta$,\n",
+ " based on the theoretical formula shown above. The averaging of\n",
+ " gradient and Hessian is based on the\n",
+ " [`WeightedAggregationHelper`](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_common/aggregators/weighted_aggregation_helper.py#L20),\n",
+ " which weighs the contribution from each client based on the number\n",
+ " of local training samples. The aggregated Newton-Raphson update is\n",
+ " returned as an `FLModel`.\n",
+ "- After getting the aggregated Newton-Raphson update, an\n",
+ " [`update_model()`](code/newton_raphson/app/custom/newton_raphson_workflow.py#L172)\n",
+ " method is implemented to actually apply the Newton-Raphson update to\n",
+ " the global model.\n",
+ "- The last step is to save the updated global model, again through\n",
+ " the `NewtonRaphsonModelPersistor` using `save_model()`.\n",
+ "\n",
+ "\n",
+ "On the client side, the local training logic is implemented\n",
+ "[here](code/newton_raphson/app/custom/newton_raphson_train.py). The\n",
+ "implementation is based on the [`Client\n",
+ "API`](https://nvflare.readthedocs.io/en/main/programming_guide/execution_api_type.html#client-api). This\n",
+ "allows user to add minimum `nvflare`-specific code to turn a typical\n",
+ "centralized training script into a federated client side local training\n",
+ "script.\n",
+ "- During local training, each client receives a copy of the global\n",
+ " model, sent by the server, using `flare.receive()` from the Client API.\n",
+ " The received global model is an instance of `FLModel`.\n",
+ "- A local validation is first performed, where validation metrics\n",
+ " (accuracy and precision) are streamed to server using the\n",
+ " [`SummaryWriter`](https://nvflare.readthedocs.io/en/main/apidocs/nvflare.client.tracking.html#nvflare.client.tracking.SummaryWriter). The\n",
+ " streamed metrics can be loaded and visualized using tensorboard.\n",
+ "- Then each client computes it's gradient and Hessian based on local\n",
+ " training data, using their respective theoretical formula described\n",
+ " above. This is implemented in the\n",
+ " [`train_newton_raphson()`](code/newton_raphson/app/custom/newton_raphson_train.py#L82)\n",
+ " method. Each client then sends the computed results (always in\n",
+ " `FLModel` format) to server for aggregation, using the Client API call\n",
+ " `flare.send()`.\n",
+ "\n",
+ "Each client site corresponds to a site listed in the data table above.\n",
+ "\n",
+ "A [centralized training script](code/train_centralized.py) is also\n",
+ "provided, which allows for comparing the federated Newton-Raphson\n",
+ "optimization versus the centralized version. In the centralized\n",
+ "version, training data samples from all 4 sites were concatenated into\n",
+ "a single matrix, used to optimize the model parameters. The\n",
+ "optimized model was then tested separately on testing data samples of\n",
+ "the 4 sites, using accuracy and precision as metrics.\n",
+ "\n",
+ "Comparing the federated [client-side training\n",
+ "code](code/newton_raphson/app/custom/newton_raphson_train.py) with the\n",
+ "centralized [training code](code/train_centralized.py), we can see that\n",
+ "the training logic remains similar: load data, perform training\n",
+ "(Newton-Raphson updates), and valid trained model. The only added\n",
+ "differences in the federated code are related to interaction with the\n",
+ "FL system, such as receiving and send `FLModel`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3fc55e0",
+ "metadata": {},
+ "source": [
+ "## Install requirements\n",
+ "First, install the required packages:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "04911ca3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%pip install -r code/requirements.txt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33ea8504",
+ "metadata": {},
+ "source": [
+ "## Download and prepare data\n",
+ "\n",
+ "Execute the following script\n",
+ "```\n",
+ "bash ./code/data/prepare_heart_disease_data.sh\n",
+ "```\n",
+ "This will download the heart disease dataset under\n",
+ "`/tmp/flare/dataset/heart_disease_data/`\n",
+ "\n",
+ "Please note that you may need to accept the data terms in order to complete the download."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d548b466",
+ "metadata": {},
+ "source": [
+ "## Centralized Logistic Regression\n",
+ "\n",
+ "Launch the following script:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8c68fe1a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! python3 code/train_centralized.py --solver custom"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa666b79",
+ "metadata": {},
+ "source": [
+ "Two implementations of logistic regression are provided in the\n",
+ "centralized training script, which can be specified by the `--solver`\n",
+ "argument:\n",
+ "- One is using `sklearn.LogisticRegression` with the `newton-cholesky`\n",
+ " solver\n",
+ "- The other one is manually implemented using the theoretical update\n",
+ " formulas described above.\n",
+ "\n",
+ "Both implementations were tested to converge in 4 iterations and to\n",
+ "give the same result.\n",
+ "\n",
+ "Example output:\n",
+ "```\n",
+ "using solver: custom\n",
+ "loading training data.\n",
+ "training data X loaded. shape: (486, 13)\n",
+ "training data y loaded. shape: (486, 1)\n",
+ "\n",
+ "site - 1\n",
+ "validation set n_samples: 104\n",
+ "accuracy: 0.75\n",
+ "precision: 0.7115384615384616\n",
+ "\n",
+ "site - 2\n",
+ "validation set n_samples: 89\n",
+ "accuracy: 0.7528089887640449\n",
+ "precision: 0.6122448979591837\n",
+ "\n",
+ "site - 3\n",
+ "validation set n_samples: 16\n",
+ "accuracy: 0.75\n",
+ "precision: 1.0\n",
+ "\n",
+ "site - 4\n",
+ "validation set n_samples: 45\n",
+ "accuracy: 0.6\n",
+ "precision: 0.9047619047619048\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0b72ef2b",
+ "metadata": {},
+ "source": [
+ "## Federated Logistic Regression\n",
+ "\n",
+ "Execute the following command to launch federated logistic\n",
+ "regression. This will run in `nvflare`'s simulator mode.\n",
+ "```\n",
+ "nvflare simulator -w ./workspace -n 4 -t 4 job/newton_raphson/\n",
+ "```\n",
+ "\n",
+ "Accuracy and precision for each site can be viewed in Tensorboard:\n",
+ "```\n",
+ "tensorboard --logdir=./workspace/server/simulate_job/tb_events\n",
+ "```\n",
+ "As can be seen from the figure below, per-site evaluation metrics in\n",
+ "federated logistic regression are on-par with the centralized version.\n",
+ "\n",
+ "
\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/figs/minibatch.png b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/figs/minibatch.png
new file mode 100644
index 0000000000..6c18b663a3
Binary files /dev/null and b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/figs/minibatch.png differ
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/kmeans_job.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/kmeans_job.py
new file mode 100644
index 0000000000..3437a4ed2d
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/kmeans_job.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+from src.kmeans_assembler import KMeansAssembler
+from utils.split_data import split_data
+
+from nvflare import FedJob
+from nvflare.app_common.aggregators.collect_and_assemble_aggregator import CollectAndAssembleAggregator
+from nvflare.app_common.shareablegenerators.full_model_shareable_generator import FullModelShareableGenerator
+from nvflare.app_common.workflows.scatter_and_gather import ScatterAndGather
+from nvflare.app_opt.sklearn.joblib_model_param_persistor import JoblibModelParamPersistor
+from nvflare.job_config.script_runner import ScriptRunner
+
+
+def define_parser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--workspace_dir",
+ type=str,
+ default="/tmp/nvflare/workspace/works/kmeans",
+ help="work directory, default to '/tmp/nvflare/workspace/works/kmeans'",
+ )
+ parser.add_argument(
+ "--job_dir",
+ type=str,
+ default="/tmp/nvflare/workspace/jobs/kmeans",
+ help="directory for job export, default to '/tmp/nvflare/workspace/jobs/kmeans'",
+ )
+ parser.add_argument(
+ "--data_path",
+ type=str,
+ default="/tmp/nvflare/dataset/sklearn_iris.csv",
+ help="data path, default to '/tmp/nvflare/dataset/sklearn_iris.csv'",
+ )
+ parser.add_argument(
+ "--num_clients",
+ type=int,
+ default=3,
+ help="number of clients to simulate, default to 3",
+ )
+ parser.add_argument(
+ "--num_rounds",
+ type=int,
+ default=5,
+ help="number of rounds, default to 5",
+ )
+ parser.add_argument(
+ "--split_mode",
+ type=str,
+ default="uniform",
+ choices=["uniform", "linear", "square", "exponential"],
+ help="how to split data among clients",
+ )
+ parser.add_argument(
+ "--valid_frac",
+ type=float,
+ default=1,
+ help="fraction of data to use for validation, default to perform validation on all data",
+ )
+ return parser.parse_args()
+
+
+def main():
+ args = define_parser()
+ # Get args
+ data_path = args.data_path
+ num_clients = args.num_clients
+ num_rounds = args.num_rounds
+ split_mode = args.split_mode
+ valid_frac = args.valid_frac
+ job_name = f"sklearn_kmeans_{split_mode}_{num_clients}_clients"
+ train_script = "src/kmeans_fl.py"
+
+ # Set the output workspace and job directories
+ workspace_dir = os.path.join(args.workspace_dir, job_name)
+ job_dir = args.job_dir
+
+ # Create the FedJob
+ job = FedJob(name=job_name, min_clients=num_clients)
+
+ # Define the controller workflow and send to server
+ controller = ScatterAndGather(
+ min_clients=num_clients,
+ num_rounds=num_rounds,
+ aggregator_id="aggregator",
+ persistor_id="persistor",
+ shareable_generator_id="shareable_generator",
+ train_task_name="train",
+ )
+ job.to_server(controller, id="scatter_and_gather")
+
+ # Define other server components
+ assembler = KMeansAssembler()
+ job.to_server(assembler, id="kmeans_assembler")
+ aggregator = CollectAndAssembleAggregator(assembler_id="kmeans_assembler")
+ job.to_server(aggregator, id="aggregator")
+ shareable_generator = FullModelShareableGenerator()
+ job.to_server(shareable_generator, id="shareable_generator")
+ persistor = JoblibModelParamPersistor(
+ initial_params={"n_clusters": 3},
+ )
+ job.to_server(persistor, id="persistor")
+
+ # Get the data split numbers and send to each client
+ # generate data split
+ site_indices = split_data(
+ data_path,
+ num_clients,
+ valid_frac,
+ )
+
+ for i in range(1, num_clients + 1):
+ # Define the executor and send to clients
+ train_start = site_indices[i]["start"]
+ train_end = site_indices[i]["end"]
+ valid_start = site_indices["valid"]["start"]
+ valid_end = site_indices["valid"]["end"]
+
+ executor = ScriptRunner(
+ script=train_script,
+ script_args=f"--data_path {data_path} "
+ f"--train_start {train_start} --train_end {train_end} "
+ f"--valid_start {valid_start} --valid_end {valid_end}",
+ params_exchange_format="raw",
+ )
+ job.to(executor, f"site-{i}", tasks=["train"])
+
+ # Export the job
+ print("job_dir=", job_dir)
+ job.export_job(job_dir)
+
+ # Run the job
+ print("workspace_dir=", workspace_dir)
+ job.simulator_run(workspace_dir)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/requirements.txt b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/requirements.txt
new file mode 100644
index 0000000000..b72d5c2798
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/requirements.txt
@@ -0,0 +1,4 @@
+pandas
+scikit-learn
+joblib
+tensorboard
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/src/kmeans_assembler.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/src/kmeans_assembler.py
new file mode 100644
index 0000000000..23e6fdc62e
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/src/kmeans_assembler.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict
+
+import numpy as np
+from sklearn.cluster import KMeans
+
+from nvflare.apis.dxo import DXO, DataKind
+from nvflare.apis.fl_context import FLContext
+from nvflare.app_common.aggregators.assembler import Assembler
+from nvflare.app_common.app_constant import AppConstants
+
+
+class KMeansAssembler(Assembler):
+ def __init__(self):
+ super().__init__(data_kind=DataKind.WEIGHTS)
+ # Aggregator needs to keep record of historical
+ # center and count information for mini-batch kmeans
+ self.center = None
+ self.count = None
+ self.n_cluster = 0
+
+ def get_model_params(self, dxo: DXO):
+ data = dxo.data
+ return {"center": data["center"], "count": data["count"]}
+
+ def assemble(self, data: Dict[str, dict], fl_ctx: FLContext) -> DXO:
+ current_round = fl_ctx.get_prop(AppConstants.CURRENT_ROUND)
+ if current_round == 0:
+ # First round, collect the information regarding n_feature and n_cluster
+ # Initialize the aggregated center and count to all zero
+ client_0 = list(self.collection.keys())[0]
+ self.n_cluster = self.collection[client_0]["center"].shape[0]
+ n_feature = self.collection[client_0]["center"].shape[1]
+ self.center = np.zeros([self.n_cluster, n_feature])
+ self.count = np.zeros([self.n_cluster])
+ # perform one round of KMeans over the submitted centers
+ # to be used as the original center points
+ # no count for this round
+ center_collect = []
+ for _, record in self.collection.items():
+ center_collect.append(record["center"])
+ centers = np.concatenate(center_collect)
+ kmeans_center_initial = KMeans(n_clusters=self.n_cluster)
+ kmeans_center_initial.fit(centers)
+ self.center = kmeans_center_initial.cluster_centers_
+ else:
+ # Mini-batch k-Means step to assemble the received centers
+ for center_idx in range(self.n_cluster):
+ centers_global_rescale = self.center[center_idx] * self.count[center_idx]
+ # Aggregate center, add new center to previous estimate, weighted by counts
+ for _, record in self.collection.items():
+ centers_global_rescale += record["center"][center_idx] * record["count"][center_idx]
+ self.count[center_idx] += record["count"][center_idx]
+ # Rescale to compute mean of all points (old and new combined)
+ alpha = 1 / self.count[center_idx]
+ centers_global_rescale *= alpha
+ # Update the global center
+ self.center[center_idx] = centers_global_rescale
+ params = {"center": self.center}
+ dxo = DXO(data_kind=self.expected_data_kind, data=params)
+
+ return dxo
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/src/kmeans_fl.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/src/kmeans_fl.py
new file mode 100644
index 0000000000..b2af57b507
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/src/kmeans_fl.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+from sklearn.cluster import KMeans, MiniBatchKMeans, kmeans_plusplus
+from sklearn.metrics import homogeneity_score
+from torch.utils.tensorboard import SummaryWriter
+
+import nvflare.client as flare
+from nvflare.app_opt.sklearn.data_loader import load_data_for_range
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--data_path",
+ type=str,
+ default="/tmp/nvflare/dataset/sklearn_iris.csv",
+ help="data directory, default to '/tmp/nvflare/dataset/sklearn_iris.csv'",
+ )
+ parser.add_argument(
+ "--train_start",
+ type=int,
+ help="start index of training data",
+ )
+ parser.add_argument(
+ "--train_end",
+ type=int,
+ help="end index of training data",
+ )
+ parser.add_argument(
+ "--valid_start",
+ type=int,
+ help="start index of validation data",
+ )
+ parser.add_argument(
+ "--valid_end",
+ type=int,
+ help="end index of validation data",
+ )
+ args = parser.parse_args()
+ max_iter = 1
+ n_init = 1
+ reassignment_ratio = 0
+ n_clusters = 0
+ writer = SummaryWriter(log_dir="./logs")
+
+ # Load data
+ train_data = load_data_for_range(args.data_path, args.train_start, args.train_end)
+ x_train = train_data[0]
+ n_samples = train_data[2]
+ valid_data = load_data_for_range(args.data_path, args.valid_start, args.valid_end)
+
+ # initializes NVFlare client API
+ flare.init()
+ # Train federated rounds
+ # start with global model at the beginning of each round
+ while flare.is_running():
+ # receives FLModel from NVFlare
+ input_model = flare.receive()
+ curr_round = input_model.current_round
+ global_param = input_model.params
+ print(f"current_round={curr_round}")
+
+ if curr_round == 0:
+ # first round, compute initial center with kmeans++ method
+ # model will be None for this round
+ n_clusters = global_param["n_clusters"]
+ center_local, _ = kmeans_plusplus(x_train, n_clusters=n_clusters, random_state=0)
+ params = {"center": center_local, "count": None}
+ homo = 0
+ else:
+ center_global = global_param["center"]
+
+ # local validation with global center
+ # fit a standalone KMeans with just the given center
+ kmeans_global = KMeans(n_clusters=n_clusters, init=center_global, n_init=1)
+ kmeans_global.fit(center_global)
+ # get validation data, both x and y will be used
+ (x_valid, y_valid, valid_size) = valid_data
+ y_pred = kmeans_global.predict(x_valid)
+ homo = homogeneity_score(y_valid, y_pred)
+ print(f"Homogeneity {homo:.4f}")
+
+ # local training starting from global center
+ kmeans = MiniBatchKMeans(
+ n_clusters=n_clusters,
+ batch_size=n_samples,
+ max_iter=max_iter,
+ init=center_global,
+ n_init=n_init,
+ reassignment_ratio=reassignment_ratio,
+ random_state=0,
+ )
+ kmeans.fit(x_train)
+ center_local = kmeans.cluster_centers_
+ count_local = kmeans._counts
+ params = {"center": center_local, "count": count_local}
+
+ # log metric
+ writer.add_scalar("Homogeneity", homo, curr_round)
+
+ # construct trained FL model
+ output_model = flare.FLModel(
+ params=params,
+ metrics={"metrics": homo},
+ meta={"NUM_STEPS_CURRENT_ROUND": n_samples},
+ )
+ # send model back to NVFlare
+ flare.send(output_model)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/utils/prepare_data.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/utils/prepare_data.py
new file mode 100644
index 0000000000..cfc12462d1
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/utils/prepare_data.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+from sklearn import datasets
+
+
+def load_data(dataset_name: str = "iris"):
+ if dataset_name == "iris":
+ dataset = datasets.load_iris()
+ elif dataset_name == "cancer":
+ dataset = datasets.load_breast_cancer()
+ else:
+ raise ValueError("Dataset unknown!")
+ return dataset
+
+
+def prepare_data(
+ output_dir: str,
+ dataset_name: str = "iris",
+ randomize: bool = False,
+ filename: Optional[str] = None,
+ file_format="csv",
+):
+ # Load data
+ dataset = load_data(dataset_name)
+ x = dataset.data
+ y = dataset.target
+ if randomize:
+ np.random.seed(0)
+ idx_random = np.random.permutation(len(y))
+ x = x[idx_random, :]
+ y = y[idx_random]
+
+ data = np.column_stack((y, x))
+ df = pd.DataFrame(data=data)
+
+ # Check if the target folder exists,
+ # If not, create
+
+ if os.path.exists(output_dir) and not os.path.isdir(output_dir):
+ os.rmdir(output_dir)
+ os.makedirs(output_dir, exist_ok=True)
+
+ # Save to csv file
+ filename = filename if filename else f"{dataset_name}.csv"
+ if file_format == "csv":
+ file_path = os.path.join(output_dir, filename)
+
+ df.to_csv(file_path, sep=",", index=False, header=False)
+ else:
+ raise NotImplementedError
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Load sklearn data and save to csv")
+ parser.add_argument("--dataset_name", type=str, choices=["iris", "cancer"], help="Dataset name")
+ parser.add_argument("--randomize", type=int, help="Whether to randomize data sequence")
+ parser.add_argument("--out_path", type=str, help="Path to output data file")
+ args = parser.parse_args()
+
+ output_dir = os.path.dirname(args.out_path)
+ filename = os.path.basename(args.out_path)
+ prepare_data(output_dir, args.dataset_name, args.randomize, filename)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/utils/split_data.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/utils/split_data.py
new file mode 100644
index 0000000000..b2bc866f7f
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/code/utils/split_data.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from enum import Enum
+from typing import List
+
+import numpy as np
+
+
+class SplitMethod(Enum):
+ UNIFORM = "uniform"
+ LINEAR = "linear"
+ SQUARE = "square"
+ EXPONENTIAL = "exponential"
+
+
+def get_split_ratios(site_num: int, split_method: SplitMethod):
+ if split_method == SplitMethod.UNIFORM:
+ ratio_vec = np.ones(site_num)
+ elif split_method == SplitMethod.LINEAR:
+ ratio_vec = np.linspace(1, site_num, num=site_num)
+ elif split_method == SplitMethod.SQUARE:
+ ratio_vec = np.square(np.linspace(1, site_num, num=site_num))
+ elif split_method == SplitMethod.EXPONENTIAL:
+ ratio_vec = np.exp(np.linspace(1, site_num, num=site_num))
+ else:
+ raise ValueError(f"Split method {split_method.name} not implemented!")
+
+ return ratio_vec
+
+
+def split_num_proportion(n, site_num, split_method: SplitMethod) -> List[int]:
+ split = []
+ ratio_vec = get_split_ratios(site_num, split_method)
+ total = sum(ratio_vec)
+ left = n
+ for site in range(site_num - 1):
+ x = int(n * ratio_vec[site] / total)
+ left = left - x
+ split.append(x)
+ split.append(left)
+ return split
+
+
+def assign_data_index_to_sites(
+ data_size: int,
+ valid_fraction: float,
+ num_sites: int,
+ split_method: SplitMethod = SplitMethod.UNIFORM,
+) -> dict:
+ if valid_fraction > 1.0:
+ raise ValueError("validation percent should be less than or equal to 100% of the total data")
+ elif valid_fraction < 1.0:
+ valid_size = int(round(data_size * valid_fraction, 0))
+ train_size = data_size - valid_size
+ else:
+ valid_size = data_size
+ train_size = data_size
+
+ site_sizes = split_num_proportion(train_size, num_sites, split_method)
+ split_data_indices = {
+ "valid": {"start": 0, "end": valid_size},
+ }
+ for site in range(num_sites):
+ site_id = site + 1
+ if valid_fraction < 1.0:
+ idx_start = valid_size + sum(site_sizes[:site])
+ idx_end = valid_size + sum(site_sizes[: site + 1])
+ else:
+ idx_start = sum(site_sizes[:site])
+ idx_end = sum(site_sizes[: site + 1])
+ split_data_indices[site_id] = {"start": idx_start, "end": idx_end}
+
+ return split_data_indices
+
+
+def get_file_line_count(input_path: str) -> int:
+ count = 0
+ with open(input_path, "r") as fp:
+ for i, _ in enumerate(fp):
+ count += 1
+ return count
+
+
+def split_data(
+ data_path: str,
+ num_clients: int,
+ valid_frac: float,
+ split_method: SplitMethod = SplitMethod.UNIFORM,
+):
+ size_total_file = get_file_line_count(data_path)
+ site_indices = assign_data_index_to_sites(size_total_file, valid_frac, num_clients, split_method)
+ return site_indices
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/convert_kmeans_to_fl.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/convert_kmeans_to_fl.ipynb
new file mode 100644
index 0000000000..9a8cb7a548
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/convert_kmeans_to_fl.ipynb
@@ -0,0 +1,185 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7d7767c9",
+ "metadata": {},
+ "source": [
+ "# Federated K-Means Clustering with Scikit-learn on Iris Dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f635ea04",
+ "metadata": {},
+ "source": [
+ "## Introduction to Scikit-learn, tabular data, and federated k-Means\n",
+ "### Scikit-learn\n",
+ "This example shows how to use [NVIDIA FLARE](https://nvflare.readthedocs.io/en/main/index.html) on tabular data.\n",
+ "It uses [Scikit-learn](https://scikit-learn.org/),\n",
+ "a widely used open-source machine learning library that supports supervised \n",
+ "and unsupervised learning.\n",
+ "### Tabular data\n",
+ "The data used in this example is tabular in a format that can be handled by [pandas](https://pandas.pydata.org/), such that:\n",
+ "- rows correspond to data samples\n",
+ "- the first column represents the label \n",
+ "- the other columns cover the features. \n",
+ "\n",
+ "Each client is expected to have one local data file containing both training \n",
+ "and validation samples. To load the data for each client, the following \n",
+ "parameters are expected by the local learner:\n",
+ "- data_file_path: string, the full path to the client's data file \n",
+ "- train_start: int, start row index for the training set\n",
+ "- train_end: int, end row index for the training set\n",
+ "- valid_start: int, start row index for the validation set\n",
+ "- valid_end: int, end row index for the validation set\n",
+ "\n",
+ "### Federated k-Means clustering\n",
+ "The machine learning algorithm in this example is [k-Means clustering](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html).\n",
+ "\n",
+ "The aggregation follows the scheme defined in [Mini-batch k-Means](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MiniBatchKMeans.html). \n",
+ "\n",
+ "Under this setting, each round of federated learning can be formulated as follows:\n",
+ "- local training: starting from global centers, each client trains a local MiniBatchKMeans model with their own data\n",
+ "- global aggregation: server collects the cluster center, \n",
+ " counts information from all clients, aggregates them by considering \n",
+ " each client's results as a mini-batch, and updates the global center and per-center counts.\n",
+ "\n",
+ "For center initialization, at the first round, each client generates its initial centers with the k-means++ method. Then, the server collects all initial centers and performs one round of k-means to generate the initial global center.\n",
+ "\n",
+ "Below we listed steps to run this example."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ce92018e",
+ "metadata": {},
+ "source": [
+ "## Install requirements\n",
+ "First, install the required packages:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e08b25db",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "% pip install -r code/requirements.txt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "31c22f7d",
+ "metadata": {},
+ "source": [
+ "## Download and prepare data\n",
+ "This example uses the Iris dataset available from Scikit-learn's dataset API. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e6c3b765",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%env DATASET_PATH=/tmp/nvflare/dataset/sklearn_iris.csv\n",
+ "! python3 ./code/utils/prepare_data.py --dataset_name iris --out_path ${DATASET_PATH}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a1fefd8",
+ "metadata": {},
+ "source": [
+ "This will load the data, format it properly by removing the header, order \n",
+ "the label and feature columns, randomize the dataset, and save it to a CSV file with comma separation. \n",
+ "The default path is `/tmp/nvflare/dataset/sklearn_iris.csv`. \n",
+ "\n",
+ "Note that the dataset contains a label for each sample, which will not be \n",
+ "used for training since k-Means clustering is an unsupervised method. \n",
+ "The entire dataset with labels will be used for performance evaluation \n",
+ "based on [homogeneity_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.homogeneity_score.html)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cf161c43",
+ "metadata": {},
+ "source": [
+ "## Run simulated kmeans experiment\n",
+ "We can run the federated training using the NVFlare Simulator with the JobAPI:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2a8f0ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%cd code\n",
+ "! python kmeans_job.py --num_clients 3 --split_mode uniform"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7b9fdb72",
+ "metadata": {},
+ "source": [
+ "With the default arguments, [kmeans_job.py](code/kmeans_job.py) will export the job to `/tmp/nvflare/workspace/jobs/kmeans` and then the job will be run with a workspace directory of `/tmp/nvflare/workspace/works/kmeans`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fb48af70",
+ "metadata": {},
+ "source": [
+ "## Result visualization\n",
+ "Model accuracy is computed as the homogeneity score between the cluster formed and the ground truth label, which can be visualized in tensorboard."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "88d9f366",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext tensorboard\n",
+ "%tensorboard --logdir /tmp/nvflare/workspace/works/kmeans/sklearn_kmeans_uniform_3_clients"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "88a470ec-c411-4f4f-b5d4-9bb66377583d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_baseline.png b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_baseline.png
new file mode 100644
index 0000000000..9ff1fcdb4c
Binary files /dev/null and b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_baseline.png differ
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_fl.png b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_fl.png
new file mode 100644
index 0000000000..df082d406a
Binary files /dev/null and b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_fl.png differ
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_fl_he.png b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_fl_he.png
new file mode 100644
index 0000000000..b1610c4183
Binary files /dev/null and b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/figs/km_curve_fl_he.png differ
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/km_job.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/km_job.py
new file mode 100644
index 0000000000..6a3e79f164
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/km_job.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+from src.kaplan_meier_wf import KM
+from src.kaplan_meier_wf_he import KM_HE
+
+from nvflare import FedJob
+from nvflare.job_config.script_runner import ScriptRunner
+
+
+def main():
+ args = define_parser()
+ # Default paths
+ data_root = "/tmp/nvflare/dataset/km_data"
+ he_context_path = "/tmp/nvflare/he_context/he_context_client.txt"
+
+ # Set the script and config
+ if args.encryption:
+ job_name = "KM_HE"
+ train_script = "src/kaplan_meier_train_he.py"
+ script_args = f"--data_root {data_root} --he_context_path {he_context_path}"
+ else:
+ job_name = "KM"
+ train_script = "src/kaplan_meier_train.py"
+ script_args = f"--data_root {data_root}"
+
+ # Set the number of clients and threads
+ num_clients = args.num_clients
+ if args.num_threads:
+ num_threads = args.num_threads
+ else:
+ num_threads = num_clients
+
+ # Set the output workspace and job directories
+ workspace_dir = os.path.join(args.workspace_dir, job_name)
+ job_dir = args.job_dir
+
+ # Create the FedJob
+ job = FedJob(name=job_name, min_clients=num_clients)
+
+ # Define the KM controller workflow and send to server
+ if args.encryption:
+ controller = KM_HE(min_clients=num_clients, he_context_path=he_context_path)
+ else:
+ controller = KM(min_clients=num_clients)
+ job.to_server(controller)
+
+ # Define the ScriptRunner and send to all clients
+ runner = ScriptRunner(
+ script=train_script,
+ script_args=script_args,
+ params_exchange_format="raw",
+ launch_external_process=False,
+ )
+ job.to_clients(runner, tasks=["train"])
+
+ # Export the job
+ print("job_dir=", job_dir)
+ job.export_job(job_dir)
+
+ # Run the job
+ print("workspace_dir=", workspace_dir)
+ print("num_threads=", num_threads)
+ job.simulator_run(workspace_dir, n_clients=num_clients, threads=num_threads)
+
+
+def define_parser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--workspace_dir",
+ type=str,
+ default="/tmp/nvflare/jobs/km/workdir",
+ help="work directory, default to '/tmp/nvflare/jobs/km/workdir'",
+ )
+ parser.add_argument(
+ "--job_dir",
+ type=str,
+ default="/tmp/nvflare/jobs/km/jobdir",
+ help="directory for job export, default to '/tmp/nvflare/jobs/km/jobdir'",
+ )
+ parser.add_argument(
+ "--encryption",
+ action=argparse.BooleanOptionalAction,
+ help="whether to enable encryption, default to False",
+ )
+ parser.add_argument(
+ "--num_clients",
+ type=int,
+ default=5,
+ help="number of clients to simulate, default to 5",
+ )
+ parser.add_argument(
+ "--num_threads",
+ type=int,
+ help="number of threads to use for FL simulation, default to the number of clients if not specified",
+ )
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/requirements.txt b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/requirements.txt
new file mode 100644
index 0000000000..e6d18ba9a3
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/requirements.txt
@@ -0,0 +1,3 @@
+lifelines
+tenseal
+scikit-survival
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_train.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_train.py
new file mode 100644
index 0000000000..d8d7e55d28
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_train.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from lifelines import KaplanMeierFitter
+from lifelines.utils import survival_table_from_events
+
+# (1) import nvflare client API
+import nvflare.client as flare
+from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
+
+
+# Client code
+def details_save(kmf):
+ # Get the survival function at all observed time points
+ survival_function_at_all_times = kmf.survival_function_
+ # Get the timeline (time points)
+ timeline = survival_function_at_all_times.index.values
+ # Get the KM estimate
+ km_estimate = survival_function_at_all_times["KM_estimate"].values
+ # Get the event count at each time point
+ event_count = kmf.event_table.iloc[:, 0].values # Assuming the first column is the observed events
+ # Get the survival rate at each time point (using the 1st column of the survival function)
+ survival_rate = 1 - survival_function_at_all_times.iloc[:, 0].values
+ # Return the results
+ results = {
+ "timeline": timeline.tolist(),
+ "km_estimate": km_estimate.tolist(),
+ "event_count": event_count.tolist(),
+ "survival_rate": survival_rate.tolist(),
+ }
+ file_path = os.path.join(os.getcwd(), "km_global.json")
+ print(f"save the details of KM analysis result to {file_path} \n")
+ with open(file_path, "w") as json_file:
+ json.dump(results, json_file, indent=4)
+
+
+def plot_and_save(kmf):
+ # Plot and save the Kaplan-Meier survival curve
+ plt.figure()
+ plt.title("Federated")
+ kmf.plot_survival_function()
+ plt.ylim(0, 1)
+ plt.ylabel("prob")
+ plt.xlabel("time")
+ plt.legend("", frameon=False)
+ plt.tight_layout()
+ file_path = os.path.join(os.getcwd(), "km_curve_fl.png")
+ print(f"save the curve plot to {file_path} \n")
+ plt.savefig(file_path)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="KM analysis")
+ parser.add_argument("--data_root", type=str, help="Root path for data files")
+ args = parser.parse_args()
+
+ flare.init()
+
+ site_name = flare.get_site_name()
+ print(f"Kaplan-meier analysis for {site_name}")
+
+ # get local data
+ data_path = os.path.join(args.data_root, site_name + ".csv")
+ data = pd.read_csv(data_path)
+ event_local = data["event"]
+ time_local = data["time"]
+
+ while flare.is_running():
+ # receives global message from NVFlare
+ global_msg = flare.receive()
+ curr_round = global_msg.current_round
+ print(f"current_round={curr_round}")
+
+ if curr_round == 1:
+ # First round:
+ # Empty payload from server, send local histogram
+ # Convert local data to histogram
+ event_table = survival_table_from_events(time_local, event_local)
+ hist_idx = event_table.index.values.astype(int)
+ hist_obs = {}
+ hist_cen = {}
+ for idx in range(max(hist_idx)):
+ hist_obs[idx] = 0
+ hist_cen[idx] = 0
+ # Assign values
+ idx = event_table.index.values.astype(int)
+ observed = event_table["observed"].to_numpy()
+ censored = event_table["censored"].to_numpy()
+ for i in range(len(idx)):
+ hist_obs[idx[i]] = observed[i]
+ hist_cen[idx[i]] = censored[i]
+ # Send histograms to server
+ response = FLModel(params={"hist_obs": hist_obs, "hist_cen": hist_cen}, params_type=ParamsType.FULL)
+ flare.send(response)
+
+ elif curr_round == 2:
+ # Get global histograms
+ hist_obs_global = global_msg.params["hist_obs_global"]
+ hist_cen_global = global_msg.params["hist_cen_global"]
+ # Unfold histogram to event list
+ time_unfold = []
+ event_unfold = []
+ for i in hist_obs_global.keys():
+ for j in range(hist_obs_global[i]):
+ time_unfold.append(i)
+ event_unfold.append(True)
+ for k in range(hist_cen_global[i]):
+ time_unfold.append(i)
+ event_unfold.append(False)
+ time_unfold = np.array(time_unfold)
+ event_unfold = np.array(event_unfold)
+
+ # Perform Kaplan-Meier analysis on global aggregated information
+ # Create a Kaplan-Meier estimator
+ kmf = KaplanMeierFitter()
+
+ # Fit the model
+ kmf.fit(durations=time_unfold, event_observed=event_unfold)
+
+ # Plot and save the KM curve
+ plot_and_save(kmf)
+
+ # Save details of the KM result to a json file
+ details_save(kmf)
+
+ # Send a simple response to server
+ response = FLModel(params={}, params_type=ParamsType.FULL)
+ flare.send(response)
+
+ print(f"finish send for {site_name}, complete")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_train_he.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_train_he.py
new file mode 100644
index 0000000000..1ff9c69dbb
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_train_he.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import base64
+import json
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import tenseal as ts
+from lifelines import KaplanMeierFitter
+from lifelines.utils import survival_table_from_events
+
+# (1) import nvflare client API
+import nvflare.client as flare
+from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
+
+
+# Client code
+def read_data(file_name: str):
+ with open(file_name, "rb") as f:
+ data = f.read()
+ return base64.b64decode(data)
+
+
+def details_save(kmf):
+ # Get the survival function at all observed time points
+ survival_function_at_all_times = kmf.survival_function_
+ # Get the timeline (time points)
+ timeline = survival_function_at_all_times.index.values
+ # Get the KM estimate
+ km_estimate = survival_function_at_all_times["KM_estimate"].values
+ # Get the event count at each time point
+ event_count = kmf.event_table.iloc[:, 0].values # Assuming the first column is the observed events
+ # Get the survival rate at each time point (using the 1st column of the survival function)
+ survival_rate = 1 - survival_function_at_all_times.iloc[:, 0].values
+ # Return the results
+ results = {
+ "timeline": timeline.tolist(),
+ "km_estimate": km_estimate.tolist(),
+ "event_count": event_count.tolist(),
+ "survival_rate": survival_rate.tolist(),
+ }
+ file_path = os.path.join(os.getcwd(), "km_global.json")
+ print(f"save the details of KM analysis result to {file_path} \n")
+ with open(file_path, "w") as json_file:
+ json.dump(results, json_file, indent=4)
+
+
+def plot_and_save(kmf):
+ # Plot and save the Kaplan-Meier survival curve
+ plt.figure()
+ plt.title("Federated HE")
+ kmf.plot_survival_function()
+ plt.ylim(0, 1)
+ plt.ylabel("prob")
+ plt.xlabel("time")
+ plt.legend("", frameon=False)
+ plt.tight_layout()
+ file_path = os.path.join(os.getcwd(), "km_curve_fl_he.png")
+ print(f"save the curve plot to {file_path} \n")
+ plt.savefig(file_path)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="KM analysis")
+ parser.add_argument("--data_root", type=str, help="Root path for data files")
+ parser.add_argument("--he_context_path", type=str, help="Path for the HE context file")
+ args = parser.parse_args()
+
+ flare.init()
+
+ site_name = flare.get_site_name()
+ print(f"Kaplan-meier analysis for {site_name}")
+
+ # get local data
+ data_path = os.path.join(args.data_root, site_name + ".csv")
+ data = pd.read_csv(data_path)
+ event_local = data["event"]
+ time_local = data["time"]
+
+ # HE context
+ # In real-life application, HE context is prepared by secure provisioning
+ he_context_serial = read_data(args.he_context_path)
+ he_context = ts.context_from(he_context_serial)
+
+ while flare.is_running():
+ # receives global message from NVFlare
+ global_msg = flare.receive()
+ curr_round = global_msg.current_round
+ print(f"current_round={curr_round}")
+
+ if curr_round == 1:
+ # First round:
+ # Empty payload from server, send max index back
+ # Condense local data to histogram
+ event_table = survival_table_from_events(time_local, event_local)
+ hist_idx = event_table.index.values.astype(int)
+ # Get the max index to be synced globally
+ max_hist_idx = max(hist_idx)
+
+ # Send max to server
+ print(f"send max hist index for site = {flare.get_site_name()}")
+ model = FLModel(params={"max_idx": max_hist_idx}, params_type=ParamsType.FULL)
+ flare.send(model)
+
+ elif curr_round == 2:
+ # Second round, get global max index
+ # Organize local histogram and encrypt
+ max_idx_global = global_msg.params["max_idx_global"]
+ print("Global Max Idx")
+ print(max_idx_global)
+ # Convert local table to uniform histogram
+ hist_obs = {}
+ hist_cen = {}
+ for idx in range(max_idx_global):
+ hist_obs[idx] = 0
+ hist_cen[idx] = 0
+ # assign values
+ idx = event_table.index.values.astype(int)
+ observed = event_table["observed"].to_numpy()
+ censored = event_table["censored"].to_numpy()
+ for i in range(len(idx)):
+ hist_obs[idx[i]] = observed[i]
+ hist_cen[idx[i]] = censored[i]
+ # Encrypt with tenseal using BFV scheme since observations are integers
+ hist_obs_he = ts.bfv_vector(he_context, list(hist_obs.values()))
+ hist_cen_he = ts.bfv_vector(he_context, list(hist_cen.values()))
+ # Serialize for transmission
+ hist_obs_he_serial = hist_obs_he.serialize()
+ hist_cen_he_serial = hist_cen_he.serialize()
+ # Send encrypted histograms to server
+ response = FLModel(
+ params={"hist_obs": hist_obs_he_serial, "hist_cen": hist_cen_he_serial}, params_type=ParamsType.FULL
+ )
+ flare.send(response)
+
+ elif curr_round == 3:
+ # Get global histograms
+ hist_obs_global_serial = global_msg.params["hist_obs_global"]
+ hist_cen_global_serial = global_msg.params["hist_cen_global"]
+ # Deserialize
+ hist_obs_global = ts.bfv_vector_from(he_context, hist_obs_global_serial)
+ hist_cen_global = ts.bfv_vector_from(he_context, hist_cen_global_serial)
+ # Decrypt
+ hist_obs_global = hist_obs_global.decrypt()
+ hist_cen_global = hist_cen_global.decrypt()
+ # Unfold histogram to event list
+ time_unfold = []
+ event_unfold = []
+ for i in range(max_idx_global):
+ for j in range(hist_obs_global[i]):
+ time_unfold.append(i)
+ event_unfold.append(True)
+ for k in range(hist_cen_global[i]):
+ time_unfold.append(i)
+ event_unfold.append(False)
+ time_unfold = np.array(time_unfold)
+ event_unfold = np.array(event_unfold)
+
+ # Perform Kaplan-Meier analysis on global aggregated information
+ # Create a Kaplan-Meier estimator
+ kmf = KaplanMeierFitter()
+
+ # Fit the model
+ kmf.fit(durations=time_unfold, event_observed=event_unfold)
+
+ # Plot and save the KM curve
+ plot_and_save(kmf)
+
+ # Save details of the KM result to a json file
+ details_save(kmf)
+
+ # Send a simple response to server
+ response = FLModel(params={}, params_type=ParamsType.FULL)
+ flare.send(response)
+
+ print(f"finish send for {site_name}, complete")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_wf.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_wf.py
new file mode 100644
index 0000000000..54fa1d384c
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_wf.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Dict
+
+from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
+from nvflare.app_common.workflows.model_controller import ModelController
+
+
+# Controller Workflow
+class KM(ModelController):
+ def __init__(self, min_clients: int):
+ super(KM, self).__init__()
+ self.logger = logging.getLogger(self.__class__.__name__)
+ self.min_clients = min_clients
+ self.num_rounds = 2
+
+ def run(self):
+ hist_local = self.start_fl_collect_hist()
+ hist_obs_global, hist_cen_global = self.aggr_hist(hist_local)
+ _ = self.distribute_global_hist(hist_obs_global, hist_cen_global)
+
+ def start_fl_collect_hist(self):
+ self.logger.info("send initial message to all sites to start FL \n")
+ model = FLModel(params={}, start_round=1, current_round=1, total_rounds=self.num_rounds)
+
+ results = self.send_model_and_wait(data=model)
+ return results
+
+ def aggr_hist(self, sag_result: Dict[str, Dict[str, FLModel]]):
+ self.logger.info("aggregate histogram \n")
+
+ if not sag_result:
+ raise RuntimeError("input is None or empty")
+
+ hist_idx_max = 0
+ for fl_model in sag_result:
+ hist = fl_model.params["hist_obs"]
+ if hist_idx_max < max(hist.keys()):
+ hist_idx_max = max(hist.keys())
+ hist_idx_max += 1
+
+ hist_obs_global = {}
+ hist_cen_global = {}
+ for idx in range(hist_idx_max + 1):
+ hist_obs_global[idx] = 0
+ hist_cen_global[idx] = 0
+
+ for fl_model in sag_result:
+ hist_obs = fl_model.params["hist_obs"]
+ hist_cen = fl_model.params["hist_cen"]
+ for i in hist_obs.keys():
+ hist_obs_global[i] += hist_obs[i]
+ for i in hist_cen.keys():
+ hist_cen_global[i] += hist_cen[i]
+
+ return hist_obs_global, hist_cen_global
+
+ def distribute_global_hist(self, hist_obs_global, hist_cen_global):
+ self.logger.info("send global accumulated histograms within HE to all sites \n")
+
+ model = FLModel(
+ params={"hist_obs_global": hist_obs_global, "hist_cen_global": hist_cen_global},
+ params_type=ParamsType.FULL,
+ start_round=1,
+ current_round=2,
+ total_rounds=self.num_rounds,
+ )
+
+ results = self.send_model_and_wait(data=model)
+ return results
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_wf_he.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_wf_he.py
new file mode 100644
index 0000000000..12acf51f4b
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/src/kaplan_meier_wf_he.py
@@ -0,0 +1,131 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+import logging
+from typing import Dict
+
+import tenseal as ts
+
+from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
+from nvflare.app_common.workflows.model_controller import ModelController
+
+# Controller Workflow
+
+
+class KM_HE(ModelController):
+ def __init__(self, min_clients: int, he_context_path: str):
+ super(KM_HE, self).__init__()
+ self.logger = logging.getLogger(self.__class__.__name__)
+ self.min_clients = min_clients
+ self.he_context_path = he_context_path
+ self.num_rounds = 3
+
+ def run(self):
+ max_idx_results = self.start_fl_collect_max_idx()
+ global_res = self.aggr_max_idx(max_idx_results)
+ enc_hist_results = self.distribute_max_idx_collect_enc_stats(global_res)
+ hist_obs_global, hist_cen_global = self.aggr_he_hist(enc_hist_results)
+ _ = self.distribute_global_hist(hist_obs_global, hist_cen_global)
+
+ def read_data(self, file_name: str):
+ with open(file_name, "rb") as f:
+ data = f.read()
+ return base64.b64decode(data)
+
+ def start_fl_collect_max_idx(self):
+ self.logger.info("send initial message to all sites to start FL \n")
+ model = FLModel(params={}, start_round=1, current_round=1, total_rounds=self.num_rounds)
+
+ results = self.send_model_and_wait(data=model)
+ return results
+
+ def aggr_max_idx(self, sag_result: Dict[str, Dict[str, FLModel]]):
+ self.logger.info("aggregate max histogram index \n")
+
+ if not sag_result:
+ raise RuntimeError("input is None or empty")
+
+ max_idx_global = []
+ for fl_model in sag_result:
+ max_idx = fl_model.params["max_idx"]
+ max_idx_global.append(max_idx)
+ # actual time point as index, so plus 1 for storage
+ return max(max_idx_global) + 1
+
+ def distribute_max_idx_collect_enc_stats(self, result: int):
+ self.logger.info("send global max_index to all sites \n")
+
+ model = FLModel(
+ params={"max_idx_global": result},
+ params_type=ParamsType.FULL,
+ start_round=1,
+ current_round=2,
+ total_rounds=self.num_rounds,
+ )
+
+ results = self.send_model_and_wait(data=model)
+ return results
+
+ def aggr_he_hist(self, sag_result: Dict[str, Dict[str, FLModel]]):
+ self.logger.info("aggregate histogram within HE \n")
+
+ # Load HE context
+ he_context_serial = self.read_data(self.he_context_path)
+ he_context = ts.context_from(he_context_serial)
+
+ if not sag_result:
+ raise RuntimeError("input is None or empty")
+
+ hist_obs_global = None
+ hist_cen_global = None
+ for fl_model in sag_result:
+ site = fl_model.meta.get("client_name", None)
+ hist_obs_he_serial = fl_model.params["hist_obs"]
+ hist_obs_he = ts.bfv_vector_from(he_context, hist_obs_he_serial)
+ hist_cen_he_serial = fl_model.params["hist_cen"]
+ hist_cen_he = ts.bfv_vector_from(he_context, hist_cen_he_serial)
+
+ if not hist_obs_global:
+ print(f"assign global hist with result from {site}")
+ hist_obs_global = hist_obs_he
+ else:
+ print(f"add to global hist with result from {site}")
+ hist_obs_global += hist_obs_he
+
+ if not hist_cen_global:
+ print(f"assign global hist with result from {site}")
+ hist_cen_global = hist_cen_he
+ else:
+ print(f"add to global hist with result from {site}")
+ hist_cen_global += hist_cen_he
+
+ # return the two accumulated vectors, serialized for transmission
+ hist_obs_global_serial = hist_obs_global.serialize()
+ hist_cen_global_serial = hist_cen_global.serialize()
+ return hist_obs_global_serial, hist_cen_global_serial
+
+ def distribute_global_hist(self, hist_obs_global_serial, hist_cen_global_serial):
+ self.logger.info("send global accumulated histograms within HE to all sites \n")
+
+ model = FLModel(
+ params={"hist_obs_global": hist_obs_global_serial, "hist_cen_global": hist_cen_global_serial},
+ params_type=ParamsType.FULL,
+ start_round=1,
+ current_round=3,
+ total_rounds=self.num_rounds,
+ )
+
+ results = self.send_model_and_wait(data=model)
+ return results
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/baseline_kaplan_meier.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/baseline_kaplan_meier.py
new file mode 100644
index 0000000000..0bd37b0bb1
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/baseline_kaplan_meier.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+import matplotlib.pyplot as plt
+import numpy as np
+from lifelines import KaplanMeierFitter
+from sksurv.datasets import load_veterans_lung_cancer
+
+
+def args_parser():
+ parser = argparse.ArgumentParser(description="Kaplan Meier Survival Analysis Baseline")
+ parser.add_argument(
+ "--output_curve_path",
+ type=str,
+ default="/tmp/nvflare/baseline/km_curve_baseline.png",
+ help="save path for the output curve",
+ )
+ return parser
+
+
+def prepare_data(bin_days: int = 7):
+ data_x, data_y = load_veterans_lung_cancer()
+ total_data_num = data_x.shape[0]
+ event = data_y["Status"]
+ time = data_y["Survival_in_days"]
+ # Categorize data to a bin, default is a week (7 days)
+ time = np.ceil(time / bin_days).astype(int) * bin_days
+ return event, time
+
+
+def main():
+ parser = args_parser()
+ args = parser.parse_args()
+
+ # Set parameters
+ output_curve_path = args.output_curve_path
+
+ # Set plot
+ plt.figure()
+ plt.title("Baseline")
+
+ # Fit and plot Kaplan Meier curve with lifelines
+
+ # Generate data with binning
+ event, time = prepare_data(bin_days=7)
+ kmf = KaplanMeierFitter()
+ # Fit the survival data
+ kmf.fit(time, event)
+ # Plot and save the Kaplan-Meier survival curve
+ kmf.plot_survival_function(label="Binned Weekly")
+
+ # Generate data without binning
+ event, time = prepare_data(bin_days=1)
+ kmf = KaplanMeierFitter()
+ # Fit the survival data
+ kmf.fit(time, event)
+ # Plot and save the Kaplan-Meier survival curve
+ kmf.plot_survival_function(label="No binning - Daily")
+
+ plt.ylim(0, 1)
+ plt.ylabel("prob")
+ plt.xlabel("time")
+ plt.tight_layout()
+ plt.legend()
+ plt.savefig(output_curve_path)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/prepare_data.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/prepare_data.py
new file mode 100644
index 0000000000..0517ad6274
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/prepare_data.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+import numpy as np
+import pandas as pd
+from sksurv.datasets import load_veterans_lung_cancer
+
+np.random.seed(77)
+
+
+def data_split_args_parser():
+ parser = argparse.ArgumentParser(description="Generate data split for dataset")
+ parser.add_argument("--site_num", type=int, default=5, help="Total number of sites, default is 5")
+ parser.add_argument(
+ "--site_name_prefix",
+ type=str,
+ default="site-",
+ help="Site name prefix, default is site-",
+ )
+ parser.add_argument("--bin_days", type=int, default=1, help="Bin days for categorizing data")
+ parser.add_argument("--out_path", type=str, help="Output root path for split data files")
+ return parser
+
+
+def prepare_data(data, site_num, bin_days):
+ # Get total data count
+ total_data_num = data.shape[0]
+ print(f"Total data count: {total_data_num}")
+ # Get event and time
+ event = data["Status"]
+ time = data["Survival_in_days"]
+ # Categorize data to a bin, default is a week (7 days)
+ time = np.ceil(time / bin_days).astype(int) * bin_days
+ # Shuffle data
+ idx = np.random.permutation(total_data_num)
+ # Split data to clients
+ event_clients = {}
+ time_clients = {}
+ for i in range(site_num):
+ start = int(i * total_data_num / site_num)
+ end = int((i + 1) * total_data_num / site_num)
+ event_i = event[idx[start:end]]
+ time_i = time[idx[start:end]]
+ event_clients["site-" + str(i + 1)] = event_i
+ time_clients["site-" + str(i + 1)] = time_i
+ return event_clients, time_clients
+
+
+def main():
+ parser = data_split_args_parser()
+ args = parser.parse_args()
+
+ # Load data
+ # For this KM analysis, we use full timeline and event label only
+ _, data = load_veterans_lung_cancer()
+
+ # Prepare data
+ event_clients, time_clients = prepare_data(data=data, site_num=args.site_num, bin_days=args.bin_days)
+
+ # Save data to csv files
+ if not os.path.exists(args.out_path):
+ os.makedirs(args.out_path, exist_ok=True)
+ for site in range(args.site_num):
+ output_file = os.path.join(args.out_path, f"{args.site_name_prefix}{site + 1}.csv")
+ df = pd.DataFrame(
+ {
+ "event": event_clients["site-" + str(site + 1)],
+ "time": time_clients["site-" + str(site + 1)],
+ }
+ )
+ df.to_csv(output_file, index=False)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/prepare_he_context.py b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/prepare_he_context.py
new file mode 100644
index 0000000000..ceedf4c9a4
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/code/utils/prepare_he_context.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import base64
+import os
+
+import tenseal as ts
+
+
+def data_split_args_parser():
+ parser = argparse.ArgumentParser(description="Generate HE context")
+ parser.add_argument("--scheme", type=str, default="BFV", help="HE scheme, default is BFV")
+ parser.add_argument("--poly_modulus_degree", type=int, default=4096, help="Poly modulus degree, default is 4096")
+ parser.add_argument("--out_path", type=str, help="Output root path for HE context files for client and server")
+ return parser
+
+
+def write_data(file_name: str, data: bytes):
+ data = base64.b64encode(data)
+ with open(file_name, "wb") as f:
+ f.write(data)
+
+
+def main():
+ parser = data_split_args_parser()
+ args = parser.parse_args()
+ if args.scheme == "BFV":
+ scheme = ts.SCHEME_TYPE.BFV
+ # Generate HE context
+ context = ts.context(scheme, poly_modulus_degree=args.poly_modulus_degree, plain_modulus=1032193)
+ elif args.scheme == "CKKS":
+ scheme = ts.SCHEME_TYPE.CKKS
+ # Generate HE context, CKKS does not need plain_modulus
+ context = ts.context(scheme, poly_modulus_degree=args.poly_modulus_degree)
+ else:
+ raise ValueError("HE scheme not supported")
+
+ # Save HE context to file for client
+ if not os.path.exists(args.out_path):
+ os.makedirs(args.out_path, exist_ok=True)
+ context_serial = context.serialize(save_secret_key=True)
+ write_data(os.path.join(args.out_path, "he_context_client.txt"), context_serial)
+
+ # Save HE context to file for server
+ context_serial = context.serialize(save_secret_key=False)
+ write_data(os.path.join(args.out_path, "he_context_server.txt"), context_serial)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/convert_survival_analysis_to_fl.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/convert_survival_analysis_to_fl.ipynb
new file mode 100644
index 0000000000..3626b93cc5
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/convert_survival_analysis_to_fl.ipynb
@@ -0,0 +1,293 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d40828dd",
+ "metadata": {},
+ "source": [
+ "# Secure Federated Kaplan-Meier Analysis via Time-Binning and Homomorphic Encryption"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c0937cf5",
+ "metadata": {},
+ "source": [
+ "This example illustrates two features:\n",
+ "* How to perform Kaplan-Meier survival analysis in federated setting without and with secure features via time-binning and Homomorphic Encryption (HE).\n",
+ "* How to use the FLARE ModelController API to contract a workflow to facilitate HE under simulator mode."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da8644ba",
+ "metadata": {},
+ "source": [
+ "## Basics of Kaplan-Meier Analysis\n",
+ "Kaplan-Meier survival analysis is a non-parametric statistic used to estimate the survival function from lifetime data. It is used to analyze the time it takes for an event of interest to occur. For example, during a clinical trial, the Kaplan-Meier estimator can be used to estimate the proportion of patients who survive a certain amount of time after treatment. \n",
+ "\n",
+ "The Kaplan-Meier estimator takes into account the time of the event (e.g. \"Survival Days\") and whether the event was observed or censored. An event is observed if the event of interest (e.g. \"death\") occurred at the end of the observation process. An event is censored if the event of interest did not occur (i.e. patient is still alive) at the end of the observation process.\n",
+ "\n",
+ "One example dataset used here for Kaplan-Meier analysis is the `veterans_lung_cancer` dataset. This dataset contains information about the survival time of veterans with advanced lung cancer. Below we provide some samples of the dataset:\n",
+ "\n",
+ "| ID | Age | Celltype | Karnofsky | Diagtime | Prior | Treat | Status | Survival Days |\n",
+ "|----|-----|------------|------------|----------|-------|-----------|--------|---------------|\n",
+ "| 1 | 64 | squamous | 70 | 5 | yes | standard | TRUE | 411 |\n",
+ "| 20 | 55 | smallcell | 40 | 3 | no | standard | FALSE | 123 |\n",
+ "| 45 | 61 | adeno | 20 | 19 | yes | standard | TRUE | 8 |\n",
+ "| 63 | 62 | large | 90 | 2 | no | standard | FALSE | 182 |\n",
+ "\n",
+ "To perform the analysis, in this data, we have:\n",
+ "- Time `Survival Days`: days passed from the beginning of the observation till the end\n",
+ "- Event `Status`: whether event (i.e. death) happened at the end of the observation, or not\n",
+ "\n",
+ "Based on the above understanding, we can interpret the data as follows:\n",
+ "- Patient #1 goes through an observation period of 411 days, and passes away at Day 411\n",
+ "- Patient #20 goes through an observation period of 123 days, and is still alive when the observation stops at Day 123 \n",
+ "\n",
+ "The purpose of Kaplan-Meier analysis is to estimate the survival function, which is the probability that a patient survives beyond a certain time. Naturally, it will be a monotonic decreasing function, since the probability of surviving will decrease as time goes by."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06986478",
+ "metadata": {},
+ "source": [
+ "## Secure Multi-party Kaplan-Meier Analysis\n",
+ "As described above, Kaplan-Meier survival analysis is a one-shot (non-iterative) analysis performed on a list of events (`Status`) and their corresponding time (`Survival Days`). In this example, we use [lifelines](https://zenodo.org/records/10456828) to perform this analysis. \n",
+ "\n",
+ "Essentially, the estimator needs to get access to this event list, and under the setting of federated analysis, the aggregated event list from all participants.\n",
+ "\n",
+ "However, this poses a data security concern - the event list is equivalent to the raw data. If it gets exposed to external parties, it essentially breaks the core value of federated analysis.\n",
+ "\n",
+ "Therefore, we would like to design a secure mechanism to enable collaborative Kaplan-Meier analysis without the risk of exposing the raw information from a participant, the targeted protection includes:\n",
+ "- Prevent clients from getting RAW data from each other;\n",
+ "- Prevent the aggregation server to access ANY information from participants' submissions.\n",
+ "\n",
+ "This is achieved by two techniques:\n",
+ "- Condense the raw event list to two histograms (one for observed events and the other for censored event) using binning at certain interval (e.g. a week)\n",
+ "- Perform the aggregation of the histograms using Homomorphic Encryption (HE)\n",
+ "\n",
+ "With time-binning, the above event list will be converted to histograms: if using a week as interval:\n",
+ "- Patient #1 will contribute 1 to the 411/7 = 58th bin of the observed event histogram\n",
+ "- Patient #20 will contribute 1 to the 123/7 = 17th bin of the censored event histogram\n",
+ "\n",
+ "In this way, events happened within the same bin from different participants can be aggregated and will not be distinguishable for the final aggregated histograms. Note that coarser binning will lead to higher protection, but also lower resolution of the final Kaplan-Meier curve.\n",
+ "\n",
+ "Local histograms will then be encrypted as one single vector before sending to server, and the global aggregation operation at server side will be performed entirely within encryption space with HE. This will not cause any information loss, while the server will not be able to access any plain-text information.\n",
+ "\n",
+ "With these two settings, the server will have no access to any knowledge regarding local submissions, and participants will only receive global aggregated histograms that will not contain distinguishable information regarding any individual participants (client number >= 3 - if only two participants, one can infer the other party's info by subtracting its own histograms).\n",
+ "\n",
+ "The final Kaplan-Meier survival analysis will be performed locally on the global aggregated event list, recovered from decrypted global histograms."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f75beeb3",
+ "metadata": {},
+ "source": [
+ "## Install requirements\n",
+ "Make sure to install the required packages:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "56133db2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%pip install -r code/requirements.txt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4b57b15",
+ "metadata": {},
+ "source": [
+ "## Baseline Kaplan-Meier Analysis\n",
+ "We first illustrate the baseline centralized Kaplan-Meier analysis without any secure features. We used veterans_lung_cancer dataset by\n",
+ "`from sksurv.datasets import load_veterans_lung_cancer`, and used `Status` as the event type and `Survival_in_days` as the event time to construct the event list.\n",
+ "\n",
+ "To run the baseline script, simply execute:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "41206a7d",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "! python3 utils/baseline_kaplan_meier.py"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "31ab94be",
+ "metadata": {},
+ "source": [
+ "By default, this will generate a KM curve image `km_curve_baseline.png` under `/tmp` directory. The resutling KM curve is shown below:\n",
+ "\n",
+ "\n",
+ "\n",
+ "Here, we show the survival curve for both daily (without binning) and weekly binning. The two curves aligns well with each other, while the weekly-binned curve has lower resolution."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a42f69c0",
+ "metadata": {},
+ "source": [
+ "## Federated Kaplan-Meier Analysis without and with Homomorphic Encryption\n",
+ "We make use of the FLARE ModelController API to implement the federated Kaplan-Meier analysis, both without and with HE.\n",
+ "\n",
+ "The FLARE ModelController API (`ModelController`) provides the functionality of flexible FLModel payloads for each round of federated analysis. This gives us the flexibility of transmitting various information needed by our scheme at different stages of federated learning.\n",
+ "\n",
+ "Our [existing HE examples](https://github.com/NVIDIA/NVFlare/tree/main/examples/advanced/cifar10/cifar10-real-world) use a data filter mechanism for HE, provisioning the HE context information (specs and keys) for both client and server of the federated job under the [CKKS](https://github.com/NVIDIA/NVFlare/blob/main/nvflare/app_opt/he/model_encryptor.py) scheme. In this example, we would like to illustrate ModelController's capability in supporting customized needs beyond the existing HE functionalities (designed mainly for encrypting deep learning models):\n",
+ "- different HE schemes (BFV) rather than CKKS\n",
+ "- different content at different rounds of federated learning, and only specific payloads need to be encrypted\n",
+ "\n",
+ "With the ModelController API, such experiments become easy. In this example, the federated analysis pipeline includes 2 rounds without HE or 3 rounds with HE.\n",
+ "\n",
+ "For the federated analysis without HE, the detailed steps are as follows:\n",
+ "1. Server sends the simple start message without any payload.\n",
+ "2. Clients submit the local event histograms to server. Server aggregates the histograms with varying lengths by adding event counts of the same slot together, and sends the aggregated histograms back to clients.\n",
+ "\n",
+ "For the federated analysis with HE, we need to ensure proper HE aggregation using BFV, and the detailed steps are as follows:\n",
+ "1. Server sends the simple start message without any payload. \n",
+ "2. Clients collect the information of the local maximum bin number (for event time) and send to the server, where the server aggregates the information by selecting the maximum among all clients. The global maximum number is then distributed back to the clients. This step is necessary because we would like to standardize the histograms generated by all clients, such that they will have the exact same length and can be encrypted as vectors of same size, which will be addable.\n",
+ "3. Clients condense their local raw event lists into two histograms with the global length received, encrypt the histogram value vectors, and send to the server. The server aggregates the received histograms by adding the encrypted vectors together, and sends the aggregated histograms back to the clients.\n",
+ "\n",
+ "After these rounds, the federated work is completed. Then at each client, the aggregated histograms will be decrypted and converted back to an event list, and Kaplan-Meier analysis can be performed on the global information."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "302c4285",
+ "metadata": {},
+ "source": [
+ "## Run the job\n",
+ "First, we prepare data for a 5-client federated job. We split and generate the data files for each client with binning interval of 7 days."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8a354d0d",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "! python3 code/utils/prepare_data.py --site_num 5 --bin_days 7 --out_path \"/tmp/nvflare/dataset/km_data\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40d6fa4e",
+ "metadata": {},
+ "source": [
+ "Then, we prepare the HE context for the clients and the server. Note that this step is done by secure provisioning for real-life applications, but in this study experimenting with BFV scheme, we use this script to distribute the HE context."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b12b162d",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "! python3 code/utils/prepare_he_context.py --out_path \"/tmp/nvflare/he_context\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7cc4d792",
+ "metadata": {},
+ "source": [
+ "Next, we run the federated training using the NVFlare Simulator via the [JobAPI](https://nvflare.readthedocs.io/en/main/programming_guide/fed_job_api.html), both without and with HE:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a4c91649",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "! python3 code/km_job.py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0c24c50a",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "! python3 code/km_job.py --encryption"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e31897b5",
+ "metadata": {},
+ "source": [
+ "By default, this will generate a KM curve image `km_curve_fl.png` and `km_curve_fl_he.png` under each client's directory."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e12cde9e",
+ "metadata": {},
+ "source": [
+ "## Display Result\n",
+ "\n",
+ "By comparing the two curves, we can observe that all curves are identical:\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/convert_ml_to_fl.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/convert_ml_to_fl.ipynb
new file mode 100644
index 0000000000..efef1032b9
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.3_convert_machine_learning_to_federated_learning/convert_ml_to_fl.ipynb
@@ -0,0 +1,34 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Simple ML/DL to FL transition with NVFlare\n",
+ "\n",
+ "Converting Deep Learning (DL) models to Federated Learning (FL) entails several key steps:\n",
+ "\n",
+ " - Formulating the algorithm: This involves determining how to adapt a DL model into an FL framework, including specifying the information exchange protocol between the server and clients.\n",
+ "\n",
+ " - Code conversion: Adapting existing standalone DL code into FL-compatible code. This typically involves minimal changes, often just a few lines of code, thanks to tools like NVFlare.\n",
+ "\n",
+ " - Workflow configuration: Once the code is modified, configuring the workflow to integrate the newly adapted FL code seamlessly.\n",
+ "\n",
+ "NVFlare simplifies the process of transitioning from traditional Machine Learning (ML) or DL algorithms to FL. With NVFlare, the conversion process requires only minor code adjustments.\n",
+ "\n",
+ "In this section, we have the following three examples for converting traditional ML to FL:\n",
+ "\n",
+ " * [Convert Logistics Regression to federated learning](02.3.1_convert_logistic_regression_to_federated_learning/convert_logistic_regression_to_fl.ipynb)\n",
+ " * [Convert KMeans to federated learning](02.3.2_convert_kmeans_to_federated_learning/convert_kmeans_to_fl.ipynb)\n",
+ " * [Convert Survival Analysis to federated learning](02.3.3_convert_survival_analysis_to_federated_learning/convert_survival_analysis_to_fl.ipynb)"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.4_recap/recap.ipynb b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.4_recap/recap.ipynb
new file mode 100644
index 0000000000..77f82177a4
--- /dev/null
+++ b/examples/tutorials/self-paced-training/part-1_federated_learning_introduction/chapter-2_develop_federated_learning_applications/02.4_recap/recap.ipynb
@@ -0,0 +1,65 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7b152728-3366-4432-adb1-29aa3051dc22",
+ "metadata": {},
+ "source": [
+ "# Summary of Chapter 2\n",
+ "\n",
+ "We covered developing federated learning applications in Chapter 2. Here is an overview:\n",
+ "\n",
+ "1. **Federated Statistics**\n",
+ " - **Federated Statistics with Image Data**: How to compute local and global image statistics with the consideration that data is private at each of the client sites.\n",
+ " - [federated_statistics_with_image_data.ipynb](../02.1_federated_statistics/federated_statistics_with_image_data/federated_statistics_with_image_data.ipynb)\n",
+ " - **Federated Statistics with Tabular Data**: How to create federated statistics for data that can be represented as Pandas DataFrames.\n",
+ " - [federated_statistics_with_tabular_data.ipynb](../02.1_federated_statistics/federated_statistics_with_tabular_data/federated_statistics_with_tabular_data.ipynb)\n",
+ "\n",
+ "2. **Converting PyTorch Lightning to FL**\n",
+ " - **PyTorch Lightning to FL**: Guide on converting PyTorch Lightning scripts to federated learning.\n",
+ " - [convert_torch_lightning_to_fl.ipynb](../02.2_convert_torch_lightning_to_federated_learning/convert_torch_lightning_to_fl.ipynb)\n",
+ "\n",
+ "3. **Simple ML/DL to FL transition with NVFlare**\n",
+ " - **Converting Logistic Regression to FL**: How to implement a federated binary classification via logistic regression with second-order Newton-Raphson optimization. \n",
+ " - [convert_logistic_regression_to_fl.ipynb](../02.3_convert_machine_learning_to_federated_learning/02.3.1_convert_logistic_regression_to_federated_learning/convert_logistic_regression_to_fl.ipynb)\n",
+ " - **Converting KMeans to FL**: ADD CONTENT HERE. \n",
+ " - [convert_kmeans_to_fl.ipynb](../02.3_convert_machine_learning_to_federated_learning/02.3.2_convert_kmeans_to_federated_learning/convert_kmeans_to_fl.ipynb)\n",
+ " - **Secure Federated Kaplan-Meier Analysis via Time-Binning and Homomorphic Encryption**: ADD CONTENT HERE. \n",
+ " - [convert_survival_analysis_to_fl.ipynb](../02.3_convert_machine_learning_to_federated_learning/02.3.3_convert_survival_analysis_to_federated_learning/convert_survival_analysis_to_fl.ipynb)\n",
+ "\n",
+ "4. **Client API**\n",
+ " - **Client API**: Here we focus on the core concepts of the Client API and explain how to configure it to run within the same process or in a separate subprocess. \n",
+ " - [client_api.ipynb](../02.4_client_api/client_api.ipynb)\n",
+ "\n",
+ "5. **Recap of Covered Topics**\n",
+ " - **Summary and Recap**: A recap of the topics covered in the previous sections.\n",
+ "\n",
+ "Each section is designed to provide comprehensive guidance and practical examples to help you implement and customize federated learning in your applications. For detailed instructions and examples, refer to the respective notebooks linked in each section.\n",
+ "\n",
+ "\n",
+ "Now let's move on to the [Chapter 3](../../../part-2_federated_learning_system/chapter-3_federated_computing_platform/03.0_introduction/introduction.ipynb)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}