From 8529119c288e3441a62eb7c1ba238c2797d3df98 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 1 Nov 2023 20:49:42 +0000 Subject: [PATCH 01/31] Initialise XGBoost --- examples/quickstart-xgboost/client.py | 131 +++++++++++++++++++ examples/quickstart-xgboost/dataset.py | 50 +++++++ examples/quickstart-xgboost/pyproject.toml | 16 +++ examples/quickstart-xgboost/requirements.txt | 3 + examples/quickstart-xgboost/run.sh | 20 +++ examples/quickstart-xgboost/server.py | 43 ++++++ examples/quickstart-xgboost/strategy.py | 96 ++++++++++++++ 7 files changed, 359 insertions(+) create mode 100644 examples/quickstart-xgboost/client.py create mode 100644 examples/quickstart-xgboost/dataset.py create mode 100644 examples/quickstart-xgboost/pyproject.toml create mode 100644 examples/quickstart-xgboost/requirements.txt create mode 100755 examples/quickstart-xgboost/run.sh create mode 100644 examples/quickstart-xgboost/server.py create mode 100644 examples/quickstart-xgboost/strategy.py diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py new file mode 100644 index 00000000000..5a22583f1a3 --- /dev/null +++ b/examples/quickstart-xgboost/client.py @@ -0,0 +1,131 @@ +import warnings +import xgboost as xgb + +import flwr as fl +from flwr.common import ( + Code, + EvaluateIns, + EvaluateRes, + FitIns, + FitRes, + GetParametersIns, + GetParametersRes, + Parameters, + Status, +) + +from dataset import init_higgs, load_partition, split_train_test + + +warnings.filterwarnings("ignore", category=UserWarning) + +# Load (HIGGS) dataset and conduct partitioning +num_partitions = 20 +split_method = "uniform" +fds = init_higgs(num_partitions, "uniform") + +# let's use the first partition as an example +partition_id = 0 +partition = load_partition(fds, partition_id) + +# train/test splitting and data re-formatting +SEED = 42 +split_rate = 0.2 +train_data, val_data = split_train_test(partition, split_rate, SEED) + +# Hyper-parameters for training +num_local_round = 1 +params = { + "objective": "binary:logistic", + "eta": 0.1, # lr + "max_depth": 8, + "eval_metric": "auc", + "nthread": 16, + "num_parallel_tree": 1, + "subsample": 1, + "tree_method": "hist", + } + + +# Define Flower client +class FlowerClient(fl.client.Client): + def __init__(self): + self.bst = None + + def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: + _ = (self, ins) + return GetParametersRes( + status=Status( + code=Code.OK, + message="OK", + ), + parameters=Parameters(tensor_type="", tensors=[]), + ) + + def local_boost(self): + for i in range(num_local_round): + self.bst.update(train_data, self.bst.num_boosted_rounds()) + + # extract the last N=num_local_round trees as new local model + bst = self.bst[self.bst.num_boosted_rounds() - num_local_round: self.bst.num_boosted_rounds()] + return bst + + def fit(self, ins: FitIns) -> FitRes: + if not self.bst: + # first round local training + print("Start training at round 1") + bst = xgb.train( + params, + train_data, + num_boost_round=num_local_round, + evals=[(val_data, "validate"), (train_data, "train")], + ) + self.config = bst.save_config() + self.bst = bst + else: + print("load global model") + for item in ins.parameters.tensors: + global_model = bytearray(item) + + # load global model into booster + self.bst.load_model(global_model) + self.bst.load_config(self.config) + + bst = self.local_boost() + + local_model = bst.save_raw("json") + local_model_bytes = bytes(local_model) + + return FitRes( + status=Status( + code=Code.OK, + message="OK", + ), + parameters=Parameters(tensor_type="", tensors=[local_model_bytes]), + num_examples=0, + metrics={}, + ) + + def evaluate(self, ins: EvaluateIns) -> EvaluateRes: + eval_results = self.bst.eval_set( + evals=[(train_data, "train"), (val_data, "valid")], iteration=self.bst.num_boosted_rounds() - 1 + ) + auc = round(float(eval_results.split("\t")[2].split(":")[1]), 4) + + global_round = ins.config["global_round"] + print(f"AUC = {auc} at round {global_round}") + + return EvaluateRes( + status=Status( + code=Code.OK, + message="OK", + ), + loss=0.0, + num_examples=1, + metrics={"AUC": auc}, + ) + + +# Start Flower client +fl.client.start_client(server_address="127.0.0.1:8080", client=FlowerClient().to_client()) + diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py new file mode 100644 index 00000000000..351ce37126d --- /dev/null +++ b/examples/quickstart-xgboost/dataset.py @@ -0,0 +1,50 @@ +import numpy as np +import datasets +import xgboost as xgb + +from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import (IidPartitioner, LinearPartitioner, + SquarePartitioner, ExponentialPartitioner) + +SPLIT_DICT = {"uniform": IidPartitioner, + "linear": LinearPartitioner, + "square": SquarePartitioner, + "exponential": ExponentialPartitioner + } + + +def init_higgs(num_partitions: int, split_method: str) -> FederatedDataset: + """Initialise FederatedDataset based on selected split method.""" + partitioner = SPLIT_DICT[split_method](num_partitions=num_partitions) + fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) + return fds + + +def load_partition(fds: FederatedDataset, partition_id: int) -> datasets.Dataset: + """Load partition based on the given partition ID.""" + partition = fds.load_partition(idx=partition_id, split="train") + partition.set_format("numpy") + return partition + + +def split_train_test(partition: datasets.Dataset, split_rate: float, seed: int): + """Split the data into train and validation set given split rate.""" + train_test = partition.train_test_split(test_size=split_rate, seed=seed) + partition_train = train_test["train"] + partition_test = train_test["test"] + + # Reformat data for xgboost input + train_data = _reformat_data(partition_train) + val_data = _reformat_data(partition_test) + return train_data, val_data + + +def _reformat_data(partition): + x = partition["inputs"] + y = partition["label"] + new_data = xgb.DMatrix(x, label=y) + return new_data + + + + diff --git a/examples/quickstart-xgboost/pyproject.toml b/examples/quickstart-xgboost/pyproject.toml new file mode 100644 index 00000000000..affdfee26d4 --- /dev/null +++ b/examples/quickstart-xgboost/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["poetry-core>=1.4.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "quickstart-pytorch" +version = "0.1.0" +description = "PyTorch Federated Learning Quickstart with Flower" +authors = ["The Flower Authors "] + +[tool.poetry.dependencies] +python = ">=3.8,<3.11" +flwr = ">=1.0,<2.0" +torch = "1.13.1" +torchvision = "0.14.1" +tqdm = "4.65.0" diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/quickstart-xgboost/requirements.txt new file mode 100644 index 00000000000..297c86a8d26 --- /dev/null +++ b/examples/quickstart-xgboost/requirements.txt @@ -0,0 +1,3 @@ +flwr>=1.0, <2.0 +xgboost==2.0.1 +tqdm==4.65.0 diff --git a/examples/quickstart-xgboost/run.sh b/examples/quickstart-xgboost/run.sh new file mode 100755 index 00000000000..9a157a04950 --- /dev/null +++ b/examples/quickstart-xgboost/run.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ + +# Download the CIFAR-10 dataset +python -c "from dataset import init_higgs; init_higgs(20, "uniform")" + +echo "Starting server" +python server.py & +sleep 3 # Sleep for 3s to give the server enough time to start + +for i in `seq 0 1`; do + echo "Starting client $i" + python client.py & +done + +# Enable CTRL+C to stop all background processes +trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM +# Wait for all background processes to complete +wait diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py new file mode 100644 index 00000000000..4bbc6a4e7e4 --- /dev/null +++ b/examples/quickstart-xgboost/server.py @@ -0,0 +1,43 @@ +from typing import List, Tuple, Dict + +import flwr as fl +from strategy import XGbBagging + +pool_size = 2 +num_rounds = 5 +num_clients_per_round = 2 +min_evaluate_clients = 2 + + +def eval_config(rnd: int) -> Dict[str, str]: + """Return a configuration with global epochs.""" + config = { + "global_round": str(rnd), + } + return config + + +def evaluate_metrics_aggregation(eval_metrics): + """Return an aggregated metric (AUC) for evaluation.""" + auc_aggregated = sum([metrics["AUC"] for _, metrics in eval_metrics]) / len(eval_metrics) + metrics_aggregated = {"AUC": auc_aggregated} + return metrics_aggregated + + +# Define strategy +strategy = XGbBagging( + fraction_fit=(float(num_clients_per_round) / pool_size), + min_fit_clients=num_clients_per_round, + min_available_clients=pool_size, + fraction_evaluate=1.0, + min_evaluate_clients=min_evaluate_clients, + on_evaluate_config_fn=eval_config, + evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, + ) + +# Start Flower server +fl.server.start_server( + server_address="0.0.0.0:8080", + config=fl.server.ServerConfig(num_rounds=num_rounds), + strategy=strategy, +) diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py new file mode 100644 index 00000000000..755e6a51ed8 --- /dev/null +++ b/examples/quickstart-xgboost/strategy.py @@ -0,0 +1,96 @@ +from logging import WARNING +from typing import Dict, List, Optional, Tuple, Union +import flwr as fl +import json + +from flwr.common import ( + EvaluateRes, + FitRes, + Parameters, + Scalar, +) +from flwr.server.client_proxy import ClientProxy +from flwr.common.logger import log + + +class XGbBagging(fl.server.strategy.FedAvg): + def aggregate_fit( + self, + server_round: int, + results: List[Tuple[ClientProxy, FitRes]], + failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], + ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: + """Aggregate fit results using bagging.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + # Aggregate all the client trees + global_model = None + for _, fit_res in results: + update = fit_res.parameters.tensors + for item in update: + global_model = aggregate(global_model, json.loads(bytearray(item))) + + weights_avg = json.dumps(global_model) + + return Parameters(tensor_type="", tensors=[bytes(bytearray(weights_avg, "utf-8"))]), {} + + def aggregate_evaluate( + self, + server_round: int, + results: List[Tuple[ClientProxy, EvaluateRes]], + failures: List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]], + ) -> Tuple[Optional[float], Dict[str, Scalar]]: + """Aggregate evaluation metrics using average.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + # Aggregate custom metrics if aggregation fn was provided + metrics_aggregated = {} + if self.evaluate_metrics_aggregation_fn: + eval_metrics = [(res.num_examples, res.metrics) for _, res in results] + metrics_aggregated = self.evaluate_metrics_aggregation_fn(eval_metrics) + elif server_round == 1: # Only log this warning once + log(WARNING, "No evaluate_metrics_aggregation_fn provided") + + return 0, metrics_aggregated + + +def aggregate(bst_prev, bst_curr): + if not bst_prev: + return bst_curr + else: + # get the tree numbers + tree_num_prev, paral_tree_num_prev = _get_tree_nums(bst_prev) + tree_num_curr, paral_tree_num_curr = _get_tree_nums(bst_curr) + + bst_prev["learner"]["gradient_booster"]["model"]["gbtree_model_param"]["num_trees"] = str( + tree_num_prev + paral_tree_num_curr + ) + iteration_indptr = bst_prev["learner"]["gradient_booster"]["model"]["iteration_indptr"] + bst_prev["learner"]["gradient_booster"]["model"]["iteration_indptr"].append( + iteration_indptr[-1] + 1 * paral_tree_num_curr) + + # aggregate new trees + trees_curr = bst_curr["learner"]["gradient_booster"]["model"]["trees"] + for tree_count in range(paral_tree_num_curr): + trees_curr[tree_count]["id"] = tree_num_prev + tree_count + bst_prev["learner"]["gradient_booster"]["model"]["trees"].append(trees_curr[tree_count]) + bst_prev["learner"]["gradient_booster"]["model"]["tree_info"].append(0) + return bst_prev + + +def _get_tree_nums(xgb_model): + # get the number of trees + tree_num = int(xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"]["num_trees"]) + # get the number of parallel trees + paral_tree_num = int( + xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"]["num_parallel_tree"] + ) + return tree_num, paral_tree_num From fe2d3e0f76d9fb3539c6696d8e10fa276c63b1fe Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 1 Nov 2023 20:53:30 +0000 Subject: [PATCH 02/31] Upload readme --- examples/quickstart-xgboost/README.md | 75 +++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 examples/quickstart-xgboost/README.md diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md new file mode 100644 index 00000000000..325287a0623 --- /dev/null +++ b/examples/quickstart-xgboost/README.md @@ -0,0 +1,75 @@ +# Flower Example using XGBoost + +This introductory example to Flower uses PyTorch, but deep knowledge of PyTorch is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. +Running this example in itself is quite easy. + +## Project Setup + +Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/quickstart-pytorch . && rm -rf flower && cd quickstart-pytorch +``` + +This will create a new directory called `quickstart-pytorch` containing the following files: + +```shell +-- pyproject.toml +-- requirements.txt +-- client.py +-- server.py +-- strategy.py +-- dataset.py +-- README.md +``` + +### Installing Dependencies + +Project dependencies (such as `xgboost` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences. + +#### Poetry + +```shell +poetry install +poetry shell +``` + +Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command: + +```shell +poetry run python3 -c "import flwr" +``` + +If you don't see any errors you're good to go! + +#### pip + +Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt. + +```shell +pip install -r requirements.txt +``` + +## Run Federated Learning with XGBoost and Flower + +Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows: + +```shell +python3 server.py +``` + +Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminal windows and run the following commands. + +Start client 1 in the first terminal: + +```shell +python3 client.py +``` + +Start client 2 in the second terminal: + +```shell +python3 client.py +``` + +You will see that XGBoost is starting a federated training. Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) for a detailed explanation. From 7efcdbf90fc3120a80efe523daff2d52c3238c24 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 2 Nov 2023 17:38:17 +0000 Subject: [PATCH 03/31] Pass the number of examples to server and do formatting --- examples/quickstart-xgboost/README.md | 10 +++--- examples/quickstart-xgboost/client.py | 38 +++++++++++--------- examples/quickstart-xgboost/dataset.py | 29 ++++++++------- examples/quickstart-xgboost/pyproject.toml | 4 +-- examples/quickstart-xgboost/server.py | 22 ++++++------ examples/quickstart-xgboost/strategy.py | 42 +++++++++++++++------- 6 files changed, 86 insertions(+), 59 deletions(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index 325287a0623..9264477ee23 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -1,17 +1,17 @@ # Flower Example using XGBoost -This introductory example to Flower uses PyTorch, but deep knowledge of PyTorch is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. -Running this example in itself is quite easy. +This example demonstrates how to perform EXtreme Gradient Boosting (XGBoost) within Flower using `xgboost` package. +Tree-based with bagging method is used for aggregation on the server. ## Project Setup Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: ```shell -git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/quickstart-pytorch . && rm -rf flower && cd quickstart-pytorch +git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/quickstart-xgboost . && rm -rf flower && cd quickstart-xgboost ``` -This will create a new directory called `quickstart-pytorch` containing the following files: +This will create a new directory called `quickstart-xgboost` containing the following files: ```shell -- pyproject.toml @@ -72,4 +72,4 @@ Start client 2 in the second terminal: python3 client.py ``` -You will see that XGBoost is starting a federated training. Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) for a detailed explanation. +You will see that XGBoost is starting a federated training. Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) and [tutorial](https://flower.dev/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index 5a22583f1a3..c7cd1892618 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -31,20 +31,20 @@ # train/test splitting and data re-formatting SEED = 42 split_rate = 0.2 -train_data, val_data = split_train_test(partition, split_rate, SEED) +train_data, val_data, num_train, num_val = split_train_test(partition, split_rate, SEED) # Hyper-parameters for training num_local_round = 1 params = { - "objective": "binary:logistic", - "eta": 0.1, # lr - "max_depth": 8, - "eval_metric": "auc", - "nthread": 16, - "num_parallel_tree": 1, - "subsample": 1, - "tree_method": "hist", - } + "objective": "binary:logistic", + "eta": 0.1, # lr + "max_depth": 8, + "eval_metric": "auc", + "nthread": 16, + "num_parallel_tree": 1, + "subsample": 1, + "tree_method": "hist", +} # Define Flower client @@ -63,11 +63,15 @@ def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: ) def local_boost(self): + # update trees based on local training data. for i in range(num_local_round): self.bst.update(train_data, self.bst.num_boosted_rounds()) # extract the last N=num_local_round trees as new local model - bst = self.bst[self.bst.num_boosted_rounds() - num_local_round: self.bst.num_boosted_rounds()] + bst = self.bst[ + self.bst.num_boosted_rounds() + - num_local_round : self.bst.num_boosted_rounds() + ] return bst def fit(self, ins: FitIns) -> FitRes: @@ -102,13 +106,14 @@ def fit(self, ins: FitIns) -> FitRes: message="OK", ), parameters=Parameters(tensor_type="", tensors=[local_model_bytes]), - num_examples=0, + num_examples=num_train, metrics={}, ) def evaluate(self, ins: EvaluateIns) -> EvaluateRes: eval_results = self.bst.eval_set( - evals=[(train_data, "train"), (val_data, "valid")], iteration=self.bst.num_boosted_rounds() - 1 + evals=[(train_data, "train"), (val_data, "valid")], + iteration=self.bst.num_boosted_rounds() - 1, ) auc = round(float(eval_results.split("\t")[2].split(":")[1]), 4) @@ -121,11 +126,12 @@ def evaluate(self, ins: EvaluateIns) -> EvaluateRes: message="OK", ), loss=0.0, - num_examples=1, + num_examples=num_val, metrics={"AUC": auc}, ) # Start Flower client -fl.client.start_client(server_address="127.0.0.1:8080", client=FlowerClient().to_client()) - +fl.client.start_client( + server_address="127.0.0.1:8080", client=FlowerClient().to_client() +) diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py index 351ce37126d..888ee71f53c 100644 --- a/examples/quickstart-xgboost/dataset.py +++ b/examples/quickstart-xgboost/dataset.py @@ -1,16 +1,20 @@ -import numpy as np import datasets import xgboost as xgb from flwr_datasets import FederatedDataset -from flwr_datasets.partitioner import (IidPartitioner, LinearPartitioner, - SquarePartitioner, ExponentialPartitioner) +from flwr_datasets.partitioner import ( + IidPartitioner, + LinearPartitioner, + SquarePartitioner, + ExponentialPartitioner, +) -SPLIT_DICT = {"uniform": IidPartitioner, - "linear": LinearPartitioner, - "square": SquarePartitioner, - "exponential": ExponentialPartitioner - } +SPLIT_DICT = { + "uniform": IidPartitioner, + "linear": LinearPartitioner, + "square": SquarePartitioner, + "exponential": ExponentialPartitioner, +} def init_higgs(num_partitions: int, split_method: str) -> FederatedDataset: @@ -33,10 +37,13 @@ def split_train_test(partition: datasets.Dataset, split_rate: float, seed: int): partition_train = train_test["train"] partition_test = train_test["test"] + num_train = len(partition_train) + num_val = len(partition_test) + # Reformat data for xgboost input train_data = _reformat_data(partition_train) val_data = _reformat_data(partition_test) - return train_data, val_data + return train_data, val_data, num_train, num_val def _reformat_data(partition): @@ -44,7 +51,3 @@ def _reformat_data(partition): y = partition["label"] new_data = xgb.DMatrix(x, label=y) return new_data - - - - diff --git a/examples/quickstart-xgboost/pyproject.toml b/examples/quickstart-xgboost/pyproject.toml index affdfee26d4..21a1d6d047a 100644 --- a/examples/quickstart-xgboost/pyproject.toml +++ b/examples/quickstart-xgboost/pyproject.toml @@ -3,9 +3,9 @@ requires = ["poetry-core>=1.4.0"] build-backend = "poetry.core.masonry.api" [tool.poetry] -name = "quickstart-pytorch" +name = "quickstart-xgboost" version = "0.1.0" -description = "PyTorch Federated Learning Quickstart with Flower" +description = "Federated XGBoost Quickstart with Flower" authors = ["The Flower Authors "] [tool.poetry.dependencies] diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 4bbc6a4e7e4..714bc01b06d 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -1,4 +1,4 @@ -from typing import List, Tuple, Dict +from typing import Dict import flwr as fl from strategy import XGbBagging @@ -19,21 +19,23 @@ def eval_config(rnd: int) -> Dict[str, str]: def evaluate_metrics_aggregation(eval_metrics): """Return an aggregated metric (AUC) for evaluation.""" - auc_aggregated = sum([metrics["AUC"] for _, metrics in eval_metrics]) / len(eval_metrics) + auc_aggregated = sum([metrics["AUC"] for _, metrics in eval_metrics]) / len( + eval_metrics + ) metrics_aggregated = {"AUC": auc_aggregated} return metrics_aggregated # Define strategy strategy = XGbBagging( - fraction_fit=(float(num_clients_per_round) / pool_size), - min_fit_clients=num_clients_per_round, - min_available_clients=pool_size, - fraction_evaluate=1.0, - min_evaluate_clients=min_evaluate_clients, - on_evaluate_config_fn=eval_config, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, - ) + fraction_fit=(float(num_clients_per_round) / pool_size), + min_fit_clients=num_clients_per_round, + min_available_clients=pool_size, + fraction_evaluate=1.0, + min_evaluate_clients=min_evaluate_clients, + on_evaluate_config_fn=eval_config, + evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, +) # Start Flower server fl.server.start_server( diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 755e6a51ed8..36a2020bf35 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -15,10 +15,10 @@ class XGbBagging(fl.server.strategy.FedAvg): def aggregate_fit( - self, - server_round: int, - results: List[Tuple[ClientProxy, FitRes]], - failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], + self, + server_round: int, + results: List[Tuple[ClientProxy, FitRes]], + failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: """Aggregate fit results using bagging.""" if not results: @@ -36,7 +36,12 @@ def aggregate_fit( weights_avg = json.dumps(global_model) - return Parameters(tensor_type="", tensors=[bytes(bytearray(weights_avg, "utf-8"))]), {} + return ( + Parameters( + tensor_type="", tensors=[bytes(bytearray(weights_avg, "utf-8"))] + ), + {}, + ) def aggregate_evaluate( self, @@ -70,27 +75,38 @@ def aggregate(bst_prev, bst_curr): tree_num_prev, paral_tree_num_prev = _get_tree_nums(bst_prev) tree_num_curr, paral_tree_num_curr = _get_tree_nums(bst_curr) - bst_prev["learner"]["gradient_booster"]["model"]["gbtree_model_param"]["num_trees"] = str( - tree_num_prev + paral_tree_num_curr - ) - iteration_indptr = bst_prev["learner"]["gradient_booster"]["model"]["iteration_indptr"] + bst_prev["learner"]["gradient_booster"]["model"]["gbtree_model_param"][ + "num_trees" + ] = str(tree_num_prev + paral_tree_num_curr) + iteration_indptr = bst_prev["learner"]["gradient_booster"]["model"][ + "iteration_indptr" + ] bst_prev["learner"]["gradient_booster"]["model"]["iteration_indptr"].append( - iteration_indptr[-1] + 1 * paral_tree_num_curr) + iteration_indptr[-1] + 1 * paral_tree_num_curr + ) # aggregate new trees trees_curr = bst_curr["learner"]["gradient_booster"]["model"]["trees"] for tree_count in range(paral_tree_num_curr): trees_curr[tree_count]["id"] = tree_num_prev + tree_count - bst_prev["learner"]["gradient_booster"]["model"]["trees"].append(trees_curr[tree_count]) + bst_prev["learner"]["gradient_booster"]["model"]["trees"].append( + trees_curr[tree_count] + ) bst_prev["learner"]["gradient_booster"]["model"]["tree_info"].append(0) return bst_prev def _get_tree_nums(xgb_model): # get the number of trees - tree_num = int(xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"]["num_trees"]) + tree_num = int( + xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"][ + "num_trees" + ] + ) # get the number of parallel trees paral_tree_num = int( - xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"]["num_parallel_tree"] + xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"][ + "num_parallel_tree" + ] ) return tree_num, paral_tree_num From 6ddb4481f68ab1ccceafc2bde6d5bbe855227fbd Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 2 Nov 2023 20:31:54 +0000 Subject: [PATCH 04/31] Add flwr_datasets as required package --- examples/quickstart-xgboost/requirements.txt | 1 + examples/quickstart-xgboost/strategy.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/quickstart-xgboost/requirements.txt index 297c86a8d26..67da34394f2 100644 --- a/examples/quickstart-xgboost/requirements.txt +++ b/examples/quickstart-xgboost/requirements.txt @@ -1,3 +1,4 @@ flwr>=1.0, <2.0 +flwr_datasets xgboost==2.0.1 tqdm==4.65.0 diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 36a2020bf35..7a0bbb61d6e 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -38,7 +38,7 @@ def aggregate_fit( return ( Parameters( - tensor_type="", tensors=[bytes(bytearray(weights_avg, "utf-8"))] + tensor_type="", tensors=[bytes(weights_avg, "utf-8")] ), {}, ) From 34452a5c2bf83e42106425670d5b657ba1d870c5 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Mon, 6 Nov 2023 10:54:49 +0000 Subject: [PATCH 05/31] Change dataset loading structure --- examples/quickstart-xgboost/client.py | 33 +++++++++++++++-------- examples/quickstart-xgboost/dataset.py | 37 +++++++++----------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index c7cd1892618..da8fbea42dc 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -2,6 +2,7 @@ import xgboost as xgb import flwr as fl +from flwr_datasets import FederatedDataset from flwr.common import ( Code, EvaluateIns, @@ -14,26 +15,36 @@ Status, ) -from dataset import init_higgs, load_partition, split_train_test +from dataset import instantiate_partitioner, train_test_split, transform_dataset_to_dmatrix warnings.filterwarnings("ignore", category=UserWarning) # Load (HIGGS) dataset and conduct partitioning num_partitions = 20 -split_method = "uniform" -fds = init_higgs(num_partitions, "uniform") +# partitioner type is chosen from ["uniform", "linear", "square", "exponential"] +partitioner_type = "uniform" + +# instantiate partitioner +partitioner = instantiate_partitioner(partitioner_type=partitioner_type, num_partitions=num_partitions) +fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) # let's use the first partition as an example partition_id = 0 -partition = load_partition(fds, partition_id) +partition = fds.load_partition(idx=partition_id, split="train") +partition.set_format("numpy") # train/test splitting and data re-formatting SEED = 42 -split_rate = 0.2 -train_data, val_data, num_train, num_val = split_train_test(partition, split_rate, SEED) +test_fraction = 0.2 +train_data, valid_data, num_train, num_val = train_test_split(partition, test_fraction=test_fraction, seed=SEED) + +# reformat data to DMatrix for xgboost +train_dmatrix = transform_dataset_to_dmatrix(train_data) +valid_dmatrix = transform_dataset_to_dmatrix(valid_data) + -# Hyper-parameters for training +# Hyper-parameters for xgboost training num_local_round = 1 params = { "objective": "binary:logistic", @@ -65,7 +76,7 @@ def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: def local_boost(self): # update trees based on local training data. for i in range(num_local_round): - self.bst.update(train_data, self.bst.num_boosted_rounds()) + self.bst.update(train_dmatrix, self.bst.num_boosted_rounds()) # extract the last N=num_local_round trees as new local model bst = self.bst[ @@ -80,9 +91,9 @@ def fit(self, ins: FitIns) -> FitRes: print("Start training at round 1") bst = xgb.train( params, - train_data, + train_dmatrix, num_boost_round=num_local_round, - evals=[(val_data, "validate"), (train_data, "train")], + evals=[(valid_dmatrix, "validate"), (train_dmatrix, "train")], ) self.config = bst.save_config() self.bst = bst @@ -112,7 +123,7 @@ def fit(self, ins: FitIns) -> FitRes: def evaluate(self, ins: EvaluateIns) -> EvaluateRes: eval_results = self.bst.eval_set( - evals=[(train_data, "train"), (val_data, "valid")], + evals=[(train_dmatrix, "train"), (valid_dmatrix, "valid")], iteration=self.bst.num_boosted_rounds() - 1, ) auc = round(float(eval_results.split("\t")[2].split(":")[1]), 4) diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py index 888ee71f53c..351f36e7138 100644 --- a/examples/quickstart-xgboost/dataset.py +++ b/examples/quickstart-xgboost/dataset.py @@ -1,7 +1,6 @@ import datasets import xgboost as xgb -from flwr_datasets import FederatedDataset from flwr_datasets.partitioner import ( IidPartitioner, LinearPartitioner, @@ -9,7 +8,7 @@ ExponentialPartitioner, ) -SPLIT_DICT = { +CORRELATION_TO_PARTITIONER = { "uniform": IidPartitioner, "linear": LinearPartitioner, "square": SquarePartitioner, @@ -17,37 +16,27 @@ } -def init_higgs(num_partitions: int, split_method: str) -> FederatedDataset: - """Initialise FederatedDataset based on selected split method.""" - partitioner = SPLIT_DICT[split_method](num_partitions=num_partitions) - fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) - return fds +def instantiate_partitioner(partitioner_type: str, num_partitions: int): + """Initialise partitioner based on selected partitioner type and number of partitions.""" + partitioner = CORRELATION_TO_PARTITIONER[partitioner_type](num_partitions=num_partitions) + return partitioner -def load_partition(fds: FederatedDataset, partition_id: int) -> datasets.Dataset: - """Load partition based on the given partition ID.""" - partition = fds.load_partition(idx=partition_id, split="train") - partition.set_format("numpy") - return partition - - -def split_train_test(partition: datasets.Dataset, split_rate: float, seed: int): +def train_test_split(partition: datasets.Dataset, test_fraction: float, seed: int): """Split the data into train and validation set given split rate.""" - train_test = partition.train_test_split(test_size=split_rate, seed=seed) + train_test = partition.train_test_split(test_size=test_fraction, seed=seed) partition_train = train_test["train"] partition_test = train_test["test"] num_train = len(partition_train) - num_val = len(partition_test) + num_test = len(partition_test) - # Reformat data for xgboost input - train_data = _reformat_data(partition_train) - val_data = _reformat_data(partition_test) - return train_data, val_data, num_train, num_val + return partition_train, partition_test, num_train, num_test -def _reformat_data(partition): - x = partition["inputs"] - y = partition["label"] +def transform_dataset_to_dmatrix(data): + """transform dataset to DMatrix format for xgboost.""" + x = data["inputs"] + y = data["label"] new_data = xgb.DMatrix(x, label=y) return new_data From b272f11ba0051086547757b51ca7a86e8fc2978b Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Tue, 7 Nov 2023 15:45:11 +0000 Subject: [PATCH 06/31] Clean up env; add weighted AUC aggregation --- examples/quickstart-xgboost/pyproject.toml | 5 ++--- examples/quickstart-xgboost/requirements.txt | 3 +-- examples/quickstart-xgboost/run.sh | 20 -------------------- examples/quickstart-xgboost/server.py | 5 ++--- examples/quickstart-xgboost/strategy.py | 2 +- 5 files changed, 6 insertions(+), 29 deletions(-) delete mode 100755 examples/quickstart-xgboost/run.sh diff --git a/examples/quickstart-xgboost/pyproject.toml b/examples/quickstart-xgboost/pyproject.toml index 21a1d6d047a..2475d2587a4 100644 --- a/examples/quickstart-xgboost/pyproject.toml +++ b/examples/quickstart-xgboost/pyproject.toml @@ -11,6 +11,5 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = ">=3.8,<3.11" flwr = ">=1.0,<2.0" -torch = "1.13.1" -torchvision = "0.14.1" -tqdm = "4.65.0" +flwr-datasets = ">=0.0.1,<1.0.0" +xgboost = ">=2.0.0,<3.0.0" diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/quickstart-xgboost/requirements.txt index 67da34394f2..da1ecac4fed 100644 --- a/examples/quickstart-xgboost/requirements.txt +++ b/examples/quickstart-xgboost/requirements.txt @@ -1,4 +1,3 @@ flwr>=1.0, <2.0 -flwr_datasets +flwr_datasets==0.0.1 xgboost==2.0.1 -tqdm==4.65.0 diff --git a/examples/quickstart-xgboost/run.sh b/examples/quickstart-xgboost/run.sh deleted file mode 100755 index 9a157a04950..00000000000 --- a/examples/quickstart-xgboost/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -e -cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ - -# Download the CIFAR-10 dataset -python -c "from dataset import init_higgs; init_higgs(20, "uniform")" - -echo "Starting server" -python server.py & -sleep 3 # Sleep for 3s to give the server enough time to start - -for i in `seq 0 1`; do - echo "Starting client $i" - python client.py & -done - -# Enable CTRL+C to stop all background processes -trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM -# Wait for all background processes to complete -wait diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 714bc01b06d..8382c2af666 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -19,9 +19,8 @@ def eval_config(rnd: int) -> Dict[str, str]: def evaluate_metrics_aggregation(eval_metrics): """Return an aggregated metric (AUC) for evaluation.""" - auc_aggregated = sum([metrics["AUC"] for _, metrics in eval_metrics]) / len( - eval_metrics - ) + total_num = sum([num for num, _ in eval_metrics]) + auc_aggregated = sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num metrics_aggregated = {"AUC": auc_aggregated} return metrics_aggregated diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 7a0bbb61d6e..b1cb3f9318d 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -82,7 +82,7 @@ def aggregate(bst_prev, bst_curr): "iteration_indptr" ] bst_prev["learner"]["gradient_booster"]["model"]["iteration_indptr"].append( - iteration_indptr[-1] + 1 * paral_tree_num_curr + iteration_indptr[-1] + paral_tree_num_curr ) # aggregate new trees From 0fb9d36c1478ea2a82eed5cc1764e7c5e9effa1a Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Tue, 7 Nov 2023 16:57:08 +0000 Subject: [PATCH 07/31] Replace print with log --- examples/quickstart-xgboost/client.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index da8fbea42dc..f3f6b1031fa 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -1,8 +1,10 @@ import warnings +from logging import INFO import xgboost as xgb import flwr as fl from flwr_datasets import FederatedDataset +from flwr.common.logger import log from flwr.common import ( Code, EvaluateIns, @@ -88,7 +90,7 @@ def local_boost(self): def fit(self, ins: FitIns) -> FitRes: if not self.bst: # first round local training - print("Start training at round 1") + log(INFO, "Start training at round 1") bst = xgb.train( params, train_dmatrix, @@ -98,7 +100,7 @@ def fit(self, ins: FitIns) -> FitRes: self.config = bst.save_config() self.bst = bst else: - print("load global model") + log(INFO, "Load global model") for item in ins.parameters.tensors: global_model = bytearray(item) @@ -129,7 +131,7 @@ def evaluate(self, ins: EvaluateIns) -> EvaluateRes: auc = round(float(eval_results.split("\t")[2].split(":")[1]), 4) global_round = ins.config["global_round"] - print(f"AUC = {auc} at round {global_round}") + log(INFO, f"AUC = {auc} at round {global_round}") return EvaluateRes( status=Status( From d0b33be2e2421881b9b4eb78840c713179ac1971 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Tue, 7 Nov 2023 20:02:52 +0000 Subject: [PATCH 08/31] Add file description in readme --- examples/quickstart-xgboost/README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index 9264477ee23..dad8814ece0 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -13,14 +13,14 @@ git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/qui This will create a new directory called `quickstart-xgboost` containing the following files: -```shell --- pyproject.toml --- requirements.txt --- client.py --- server.py --- strategy.py --- dataset.py --- README.md +``` +-- README.md <- Your're reading this right now +-- server.py <- Defines the server-side logic +-- strategy.py <- Defines the tree-based bagging aggregation +-- client.py <- Defines the client-side logic +-- dataset.py <- Defines the functions of data loading and partitioning +-- pyproject.toml <- Example dependencies (if you use Poetry) +-- requirements.txt <- Example dependencies ``` ### Installing Dependencies From 91af54831d55ef16054a27009807226fab76a318 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 8 Nov 2023 17:55:43 +0000 Subject: [PATCH 09/31] Add arguments parser on client side; Do formatting --- examples/quickstart-xgboost/client.py | 63 +++++++++++++++++++++---- examples/quickstart-xgboost/dataset.py | 9 ++-- examples/quickstart-xgboost/server.py | 4 +- examples/quickstart-xgboost/strategy.py | 4 +- 4 files changed, 65 insertions(+), 15 deletions(-) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index f3f6b1031fa..99f07ad79d7 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -1,4 +1,5 @@ import warnings +import argparse from logging import INFO import xgboost as xgb @@ -17,29 +18,75 @@ Status, ) -from dataset import instantiate_partitioner, train_test_split, transform_dataset_to_dmatrix +from dataset import ( + instantiate_partitioner, + train_test_split, + transform_dataset_to_dmatrix, +) warnings.filterwarnings("ignore", category=UserWarning) + +def args_parser(): + """Parse arguments to define experimental settings.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + "--num_partitions", default=20, type=int, help="Number of partitions." + ) + parser.add_argument( + "--partitioner_type", + default="uniform", + type=str, + choices=["uniform", "linear", "square", "exponential"], + help="Partitioner types.", + ) + parser.add_argument( + "--partition_id", + default=0, + type=int, + help="Partition ID used for the current client.", + ) + parser.add_argument( + "--seed", default=42, type=int, help="Seed used for train/test splitting." + ) + parser.add_argument( + "--test_fraction", + default=0.2, + type=float, + help="test fraction for train/test splitting.", + ) + + args_ = parser.parse_args() + return args_ + + +# Parse arguments for experimental settings +args = args_parser() + # Load (HIGGS) dataset and conduct partitioning -num_partitions = 20 +num_partitions = args.num_partitions # partitioner type is chosen from ["uniform", "linear", "square", "exponential"] -partitioner_type = "uniform" +partitioner_type = args.partitioner_type # instantiate partitioner -partitioner = instantiate_partitioner(partitioner_type=partitioner_type, num_partitions=num_partitions) +partitioner = instantiate_partitioner( + partitioner_type=partitioner_type, num_partitions=num_partitions +) fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) # let's use the first partition as an example -partition_id = 0 +partition_id = args.partition_id partition = fds.load_partition(idx=partition_id, split="train") partition.set_format("numpy") # train/test splitting and data re-formatting -SEED = 42 -test_fraction = 0.2 -train_data, valid_data, num_train, num_val = train_test_split(partition, test_fraction=test_fraction, seed=SEED) +SEED = args.seed +test_fraction = args.test_fraction +train_data, valid_data, num_train, num_val = train_test_split( + partition, test_fraction=test_fraction, seed=SEED +) # reformat data to DMatrix for xgboost train_dmatrix = transform_dataset_to_dmatrix(train_data) diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py index 351f36e7138..345dce2741a 100644 --- a/examples/quickstart-xgboost/dataset.py +++ b/examples/quickstart-xgboost/dataset.py @@ -17,8 +17,11 @@ def instantiate_partitioner(partitioner_type: str, num_partitions: int): - """Initialise partitioner based on selected partitioner type and number of partitions.""" - partitioner = CORRELATION_TO_PARTITIONER[partitioner_type](num_partitions=num_partitions) + """Initialise partitioner based on selected partitioner type and number of + partitions.""" + partitioner = CORRELATION_TO_PARTITIONER[partitioner_type]( + num_partitions=num_partitions + ) return partitioner @@ -35,7 +38,7 @@ def train_test_split(partition: datasets.Dataset, test_fraction: float, seed: in def transform_dataset_to_dmatrix(data): - """transform dataset to DMatrix format for xgboost.""" + """Transform dataset to DMatrix format for xgboost.""" x = data["inputs"] y = data["label"] new_data = xgb.DMatrix(x, label=y) diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 8382c2af666..613d61adaf6 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -20,7 +20,9 @@ def eval_config(rnd: int) -> Dict[str, str]: def evaluate_metrics_aggregation(eval_metrics): """Return an aggregated metric (AUC) for evaluation.""" total_num = sum([num for num, _ in eval_metrics]) - auc_aggregated = sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num + auc_aggregated = ( + sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num + ) metrics_aggregated = {"AUC": auc_aggregated} return metrics_aggregated diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index b1cb3f9318d..7bdb34eef1b 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -37,9 +37,7 @@ def aggregate_fit( weights_avg = json.dumps(global_model) return ( - Parameters( - tensor_type="", tensors=[bytes(weights_avg, "utf-8")] - ), + Parameters(tensor_type="", tensors=[bytes(weights_avg, "utf-8")]), {}, ) From b96584327c5119343c54c417d18509a6c7d5d08d Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 9 Nov 2023 09:58:32 +0000 Subject: [PATCH 10/31] Update required package flwr-datasets==0.02; pull back run.sh --- examples/quickstart-xgboost/pyproject.toml | 2 +- examples/quickstart-xgboost/requirements.txt | 2 +- examples/quickstart-xgboost/run.sh | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100755 examples/quickstart-xgboost/run.sh diff --git a/examples/quickstart-xgboost/pyproject.toml b/examples/quickstart-xgboost/pyproject.toml index 2475d2587a4..039809c577e 100644 --- a/examples/quickstart-xgboost/pyproject.toml +++ b/examples/quickstart-xgboost/pyproject.toml @@ -11,5 +11,5 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = ">=3.8,<3.11" flwr = ">=1.0,<2.0" -flwr-datasets = ">=0.0.1,<1.0.0" +flwr-datasets = ">=0.0.2,<1.0.0" xgboost = ">=2.0.0,<3.0.0" diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/quickstart-xgboost/requirements.txt index da1ecac4fed..d186af05e11 100644 --- a/examples/quickstart-xgboost/requirements.txt +++ b/examples/quickstart-xgboost/requirements.txt @@ -1,3 +1,3 @@ flwr>=1.0, <2.0 -flwr_datasets==0.0.1 +flwr-datasets==0.0.2 xgboost==2.0.1 diff --git a/examples/quickstart-xgboost/run.sh b/examples/quickstart-xgboost/run.sh new file mode 100755 index 00000000000..f7d37f47051 --- /dev/null +++ b/examples/quickstart-xgboost/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -e +cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ + +echo "Starting server" +python server.py & +sleep 3 # Sleep for 3s to give the server enough time to start + +for i in `seq 0 1`; do + echo "Starting client $i" + python client.py --partition_id=$i & +done + +# Enable CTRL+C to stop all background processes +trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM +# Wait for all background processes to complete +wait From 6bbee08da2cc18835d04c09b8f58038ccbc18d53 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 9 Nov 2023 11:20:36 +0000 Subject: [PATCH 11/31] Move argument parser to utils; Modify comments --- examples/quickstart-xgboost/client.py | 65 +++++--------------- examples/quickstart-xgboost/dataset.py | 3 +- examples/quickstart-xgboost/requirements.txt | 2 +- examples/quickstart-xgboost/strategy.py | 8 +-- examples/quickstart-xgboost/utils.py | 35 +++++++++++ 5 files changed, 58 insertions(+), 55 deletions(-) create mode 100644 examples/quickstart-xgboost/utils.py diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index 99f07ad79d7..789fa6a939f 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -1,5 +1,4 @@ import warnings -import argparse from logging import INFO import xgboost as xgb @@ -23,72 +22,39 @@ train_test_split, transform_dataset_to_dmatrix, ) +from utils import client_args_parser warnings.filterwarnings("ignore", category=UserWarning) -def args_parser(): - """Parse arguments to define experimental settings.""" - parser = argparse.ArgumentParser() - - parser.add_argument( - "--num_partitions", default=20, type=int, help="Number of partitions." - ) - parser.add_argument( - "--partitioner_type", - default="uniform", - type=str, - choices=["uniform", "linear", "square", "exponential"], - help="Partitioner types.", - ) - parser.add_argument( - "--partition_id", - default=0, - type=int, - help="Partition ID used for the current client.", - ) - parser.add_argument( - "--seed", default=42, type=int, help="Seed used for train/test splitting." - ) - parser.add_argument( - "--test_fraction", - default=0.2, - type=float, - help="test fraction for train/test splitting.", - ) - - args_ = parser.parse_args() - return args_ - - # Parse arguments for experimental settings -args = args_parser() +args = client_args_parser() # Load (HIGGS) dataset and conduct partitioning num_partitions = args.num_partitions -# partitioner type is chosen from ["uniform", "linear", "square", "exponential"] +# Partitioner type is chosen from ["uniform", "linear", "square", "exponential"] partitioner_type = args.partitioner_type -# instantiate partitioner +# Instantiate partitioner partitioner = instantiate_partitioner( partitioner_type=partitioner_type, num_partitions=num_partitions ) fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) -# let's use the first partition as an example +# Let's use the first partition as an example partition_id = args.partition_id partition = fds.load_partition(idx=partition_id, split="train") partition.set_format("numpy") -# train/test splitting and data re-formatting +# Train/test splitting and data re-formatting SEED = args.seed test_fraction = args.test_fraction train_data, valid_data, num_train, num_val = train_test_split( partition, test_fraction=test_fraction, seed=SEED ) -# reformat data to DMatrix for xgboost +# Reformat data to DMatrix for xgboost train_dmatrix = transform_dataset_to_dmatrix(train_data) valid_dmatrix = transform_dataset_to_dmatrix(valid_data) @@ -97,7 +63,7 @@ def args_parser(): num_local_round = 1 params = { "objective": "binary:logistic", - "eta": 0.1, # lr + "eta": 0.1, # Learning rate "max_depth": 8, "eval_metric": "auc", "nthread": 16, @@ -111,6 +77,7 @@ def args_parser(): class FlowerClient(fl.client.Client): def __init__(self): self.bst = None + self.config = None def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: _ = (self, ins) @@ -122,12 +89,12 @@ def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: parameters=Parameters(tensor_type="", tensors=[]), ) - def local_boost(self): - # update trees based on local training data. + def _local_boost(self): + # Update trees based on local training data. for i in range(num_local_round): self.bst.update(train_dmatrix, self.bst.num_boosted_rounds()) - # extract the last N=num_local_round trees as new local model + # Extract the last N=num_local_round trees as new local model bst = self.bst[ self.bst.num_boosted_rounds() - num_local_round : self.bst.num_boosted_rounds() @@ -136,7 +103,7 @@ def local_boost(self): def fit(self, ins: FitIns) -> FitRes: if not self.bst: - # first round local training + # First round local training log(INFO, "Start training at round 1") bst = xgb.train( params, @@ -151,11 +118,11 @@ def fit(self, ins: FitIns) -> FitRes: for item in ins.parameters.tensors: global_model = bytearray(item) - # load global model into booster + # Load global model into booster self.bst.load_model(global_model) self.bst.load_config(self.config) - bst = self.local_boost() + bst = self._local_boost() local_model = bst.save_raw("json") local_model_bytes = bytes(local_model) @@ -193,5 +160,5 @@ def evaluate(self, ins: EvaluateIns) -> EvaluateRes: # Start Flower client fl.client.start_client( - server_address="127.0.0.1:8080", client=FlowerClient().to_client() + server_address="127.0.0.1:8080", client=FlowerClient() ) diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py index 345dce2741a..a3f80a44c07 100644 --- a/examples/quickstart-xgboost/dataset.py +++ b/examples/quickstart-xgboost/dataset.py @@ -1,5 +1,6 @@ import datasets import xgboost as xgb +from datasets import DatasetDict from flwr_datasets.partitioner import ( IidPartitioner, @@ -37,7 +38,7 @@ def train_test_split(partition: datasets.Dataset, test_fraction: float, seed: in return partition_train, partition_test, num_train, num_test -def transform_dataset_to_dmatrix(data): +def transform_dataset_to_dmatrix(data: DatasetDict): """Transform dataset to DMatrix format for xgboost.""" x = data["inputs"] y = data["label"] diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/quickstart-xgboost/requirements.txt index d186af05e11..73ce54ec473 100644 --- a/examples/quickstart-xgboost/requirements.txt +++ b/examples/quickstart-xgboost/requirements.txt @@ -1,3 +1,3 @@ -flwr>=1.0, <2.0 +flwr==1.5.0 flwr-datasets==0.0.2 xgboost==2.0.1 diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 7bdb34eef1b..382055a35e8 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -69,7 +69,7 @@ def aggregate(bst_prev, bst_curr): if not bst_prev: return bst_curr else: - # get the tree numbers + # Get the tree numbers tree_num_prev, paral_tree_num_prev = _get_tree_nums(bst_prev) tree_num_curr, paral_tree_num_curr = _get_tree_nums(bst_curr) @@ -83,7 +83,7 @@ def aggregate(bst_prev, bst_curr): iteration_indptr[-1] + paral_tree_num_curr ) - # aggregate new trees + # Aggregate new trees trees_curr = bst_curr["learner"]["gradient_booster"]["model"]["trees"] for tree_count in range(paral_tree_num_curr): trees_curr[tree_count]["id"] = tree_num_prev + tree_count @@ -95,13 +95,13 @@ def aggregate(bst_prev, bst_curr): def _get_tree_nums(xgb_model): - # get the number of trees + # Get the number of trees tree_num = int( xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"][ "num_trees" ] ) - # get the number of parallel trees + # Get the number of parallel trees paral_tree_num = int( xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"][ "num_parallel_tree" diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py new file mode 100644 index 00000000000..0d2eb99a9fa --- /dev/null +++ b/examples/quickstart-xgboost/utils.py @@ -0,0 +1,35 @@ +import argparse + + +def client_args_parser(): + """Parse arguments to define experimental settings.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + "--num_partitions", default=20, type=int, help="Number of partitions." + ) + parser.add_argument( + "--partitioner_type", + default="uniform", + type=str, + choices=["uniform", "linear", "square", "exponential"], + help="Partitioner types.", + ) + parser.add_argument( + "--partition_id", + default=0, + type=int, + help="Partition ID used for the current client.", + ) + parser.add_argument( + "--seed", default=42, type=int, help="Seed used for train/test splitting." + ) + parser.add_argument( + "--test_fraction", + default=0.2, + type=float, + help="test fraction for train/test splitting.", + ) + + args = parser.parse_args() + return args From 4b92c81fdaca5dc2230013c49ec2988a527267ec Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Thu, 9 Nov 2023 23:36:51 +0000 Subject: [PATCH 12/31] Add feature of centralised/client evaluation --- examples/quickstart-xgboost/client.py | 35 +++++++----- examples/quickstart-xgboost/dataset.py | 27 ++++++++- examples/quickstart-xgboost/server.py | 76 ++++++++++++++++++++++--- examples/quickstart-xgboost/strategy.py | 28 ++++++++- examples/quickstart-xgboost/utils.py | 43 +++++++++++++- 5 files changed, 183 insertions(+), 26 deletions(-) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index 789fa6a939f..9b917408527 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -21,6 +21,7 @@ instantiate_partitioner, train_test_split, transform_dataset_to_dmatrix, + resplit, ) from utils import client_args_parser @@ -40,19 +41,29 @@ partitioner = instantiate_partitioner( partitioner_type=partitioner_type, num_partitions=num_partitions ) -fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) +fds = FederatedDataset( + dataset="jxie/higgs", partitioners={"train": partitioner}, resplitter=resplit +) # Let's use the first partition as an example partition_id = args.partition_id partition = fds.load_partition(idx=partition_id, split="train") partition.set_format("numpy") -# Train/test splitting and data re-formatting -SEED = args.seed -test_fraction = args.test_fraction -train_data, valid_data, num_train, num_val = train_test_split( - partition, test_fraction=test_fraction, seed=SEED -) +if args.centralised_eval: + # Use centralised test set for evaluation + train_data = partition + valid_data = fds.load_full("test") + valid_data.set_format("numpy") + num_train = train_data.shape[0] + num_val = valid_data.shape[0] +else: + # Train/test splitting + SEED = args.seed + test_fraction = args.test_fraction + train_data, valid_data, num_train, num_val = train_test_split( + partition, test_fraction=test_fraction, seed=SEED + ) # Reformat data to DMatrix for xgboost train_dmatrix = transform_dataset_to_dmatrix(train_data) @@ -114,7 +125,7 @@ def fit(self, ins: FitIns) -> FitRes: self.config = bst.save_config() self.bst = bst else: - log(INFO, "Load global model") + log(INFO, "Load global model...") for item in ins.parameters.tensors: global_model = bytearray(item) @@ -139,10 +150,10 @@ def fit(self, ins: FitIns) -> FitRes: def evaluate(self, ins: EvaluateIns) -> EvaluateRes: eval_results = self.bst.eval_set( - evals=[(train_dmatrix, "train"), (valid_dmatrix, "valid")], + evals=[(valid_dmatrix, "valid")], iteration=self.bst.num_boosted_rounds() - 1, ) - auc = round(float(eval_results.split("\t")[2].split(":")[1]), 4) + auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) global_round = ins.config["global_round"] log(INFO, f"AUC = {auc} at round {global_round}") @@ -159,6 +170,4 @@ def evaluate(self, ins: EvaluateIns) -> EvaluateRes: # Start Flower client -fl.client.start_client( - server_address="127.0.0.1:8080", client=FlowerClient() -) +fl.client.start_client(server_address="127.0.0.1:8080", client=FlowerClient()) diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py index a3f80a44c07..6c59df766f2 100644 --- a/examples/quickstart-xgboost/dataset.py +++ b/examples/quickstart-xgboost/dataset.py @@ -1,7 +1,6 @@ import datasets import xgboost as xgb -from datasets import DatasetDict - +from datasets import DatasetDict, concatenate_datasets from flwr_datasets.partitioner import ( IidPartitioner, LinearPartitioner, @@ -38,9 +37,31 @@ def train_test_split(partition: datasets.Dataset, test_fraction: float, seed: in return partition_train, partition_test, num_train, num_test -def transform_dataset_to_dmatrix(data: DatasetDict): +def transform_dataset_to_dmatrix(data): """Transform dataset to DMatrix format for xgboost.""" x = data["inputs"] y = data["label"] new_data = xgb.DMatrix(x, label=y) return new_data + + +def resplit(dataset: DatasetDict) -> DatasetDict: + """Increase the quantity of centralised test samples from 500K to 1M.""" + return DatasetDict( + { + "train": dataset["train"].select( + range(0, dataset["train"].num_rows - 500_000) + ), + "test": concatenate_datasets( + [ + dataset["train"].select( + range( + dataset["train"].num_rows - 500_000, + dataset["train"].num_rows, + ) + ), + dataset["test"], + ] + ), + } + ) diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 613d61adaf6..b74659d38f5 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -1,12 +1,44 @@ from typing import Dict +from logging import INFO +import xgboost as xgb import flwr as fl +from flwr.common.logger import log +from flwr.common import Parameters, Scalar +from flwr_datasets import FederatedDataset + from strategy import XGbBagging +from utils import server_args_parser +from dataset import resplit, transform_dataset_to_dmatrix + + +# Parse arguments for experimental settings +args = server_args_parser() +pool_size = args.pool_size +num_rounds = args.num_rounds +num_clients_per_round = args.num_clients_per_round +num_evaluate_clients = args.num_evaluate_clients +centralised_eval = args.centralised_eval + +# Load centralised test set +fds = FederatedDataset( + dataset="jxie/higgs", partitioners={"train": 20}, resplitter=resplit +) +test_set = fds.load_full("test") +test_set.set_format("numpy") +test_dmatrix = transform_dataset_to_dmatrix(test_set) -pool_size = 2 -num_rounds = 5 -num_clients_per_round = 2 -min_evaluate_clients = 2 +# Hyper-parameters used for initialisation +params = { + "objective": "binary:logistic", + "eta": 0.1, # Learning rate + "max_depth": 8, + "eval_metric": "auc", + "nthread": 16, + "num_parallel_tree": 1, + "subsample": 1, + "tree_method": "hist", +} def eval_config(rnd: int) -> Dict[str, str]: @@ -27,15 +59,45 @@ def evaluate_metrics_aggregation(eval_metrics): return metrics_aggregated +def get_evaluate_fn(test_data): + def evaluate_fn( + server_round: int, parameters: Parameters, config: Dict[str, Scalar] + ): + # If at the first round, skip the evaluation + if server_round == 0: + return 0, {} + else: + bst = xgb.Booster(params=params) + for para in parameters.tensors: + para_b = bytearray(para) + + # Load global model + bst.load_model(para_b) + # Run evaluation + eval_results = bst.eval_set( + evals=[(test_data, "valid")], + iteration=bst.num_boosted_rounds() - 1, + ) + auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) + log(INFO, f"AUC = {auc} at round {server_round}") + + return 0, {"AUC": auc} + + return evaluate_fn + + # Define strategy strategy = XGbBagging( + evaluate_function=get_evaluate_fn(test_dmatrix) if centralised_eval else None, fraction_fit=(float(num_clients_per_round) / pool_size), min_fit_clients=num_clients_per_round, min_available_clients=pool_size, - fraction_evaluate=1.0, - min_evaluate_clients=min_evaluate_clients, + min_evaluate_clients=num_evaluate_clients if not centralised_eval else 0, + fraction_evaluate=1.0 if not centralised_eval else 0.0, on_evaluate_config_fn=eval_config, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, + evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation + if not centralised_eval + else None, ) # Start Flower server diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 382055a35e8..54bf0721407 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -1,5 +1,5 @@ from logging import WARNING -from typing import Dict, List, Optional, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple, Union import flwr as fl import json @@ -14,6 +14,19 @@ class XGbBagging(fl.server.strategy.FedAvg): + def __init__( + self, + evaluate_function: Optional[ + Callable[ + [int, Parameters, Dict[str, Scalar]], + Optional[Tuple[float, Dict[str, Scalar]]], + ] + ] = None, + **kwargs, + ): + self.evaluate_function = evaluate_function + super().__init__(**kwargs) + def aggregate_fit( self, server_round: int, @@ -64,6 +77,19 @@ def aggregate_evaluate( return 0, metrics_aggregated + def evaluate( + self, server_round: int, parameters: Parameters + ) -> Optional[Tuple[float, Dict[str, Scalar]]]: + """Evaluate model parameters using an evaluation function.""" + if self.evaluate_function is None: + # No evaluation function provided + return None + eval_res = self.evaluate_function(server_round, parameters, {}) + if eval_res is None: + return None + loss, metrics = eval_res + return loss, metrics + def aggregate(bst_prev, bst_curr): if not bst_prev: diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py index 0d2eb99a9fa..9939c16fe5a 100644 --- a/examples/quickstart-xgboost/utils.py +++ b/examples/quickstart-xgboost/utils.py @@ -2,7 +2,7 @@ def client_args_parser(): - """Parse arguments to define experimental settings.""" + """Parse arguments to define experimental settings on client side.""" parser = argparse.ArgumentParser() parser.add_argument( @@ -28,7 +28,46 @@ def client_args_parser(): "--test_fraction", default=0.2, type=float, - help="test fraction for train/test splitting.", + help="Test fraction for train/test splitting.", + ) + parser.add_argument( + "--centralised_eval", + default=True, + type=bool, + help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", + ) + + args = parser.parse_args() + return args + + +def server_args_parser(): + """Parse arguments to define experimental settings on server side.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + "--pool_size", default=2, type=int, help="Number of total clients." + ) + parser.add_argument( + "--num_rounds", default=5, type=int, help="Number of FL rounds." + ) + parser.add_argument( + "--num_clients_per_round", + default=2, + type=int, + help="Number of clients participate in training each round.", + ) + parser.add_argument( + "--num_evaluate_clients", + default=2, + type=int, + help="Number of clients selected for evaluation.", + ) + parser.add_argument( + "--centralised_eval", + default=True, + type=bool, + help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) args = parser.parse_args() From 57bde1870d9336f9e9fc2474edeb711c33c91b4e Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Sun, 12 Nov 2023 21:54:44 +0000 Subject: [PATCH 13/31] Correct aggregation and match Navida results --- examples/quickstart-xgboost/client.py | 9 +++------ examples/quickstart-xgboost/requirements.txt | 6 +++--- examples/quickstart-xgboost/server.py | 1 + examples/quickstart-xgboost/strategy.py | 6 +++--- examples/quickstart-xgboost/utils.py | 6 ++---- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index 9b917408527..ad95e4cb174 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -105,11 +105,9 @@ def _local_boost(self): for i in range(num_local_round): self.bst.update(train_dmatrix, self.bst.num_boosted_rounds()) - # Extract the last N=num_local_round trees as new local model - bst = self.bst[ - self.bst.num_boosted_rounds() - - num_local_round : self.bst.num_boosted_rounds() - ] + # Extract the last N=num_local_round trees for sever aggregation + bst = self.bst[self.bst.num_boosted_rounds() - num_local_round: self.bst.num_boosted_rounds()] + return bst def fit(self, ins: FitIns) -> FitRes: @@ -125,7 +123,6 @@ def fit(self, ins: FitIns) -> FitRes: self.config = bst.save_config() self.bst = bst else: - log(INFO, "Load global model...") for item in ins.parameters.tensors: global_model = bytearray(item) diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/quickstart-xgboost/requirements.txt index 73ce54ec473..c6b9c1a6789 100644 --- a/examples/quickstart-xgboost/requirements.txt +++ b/examples/quickstart-xgboost/requirements.txt @@ -1,3 +1,3 @@ -flwr==1.5.0 -flwr-datasets==0.0.2 -xgboost==2.0.1 +flwr>=1.0, <2.0 +flwr-datasets>=0.0.2, <1.0.0 +xgboost>=2.0.0, <3.0.0 diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index b74659d38f5..81f0b16dd3a 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -60,6 +60,7 @@ def evaluate_metrics_aggregation(eval_metrics): def get_evaluate_fn(test_data): + """Return a function for centralised evaluation.""" def evaluate_fn( server_round: int, parameters: Parameters, config: Dict[str, Scalar] ): diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 54bf0721407..0fa8fd40530 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -25,6 +25,7 @@ def __init__( **kwargs, ): self.evaluate_function = evaluate_function + self.global_model = None super().__init__(**kwargs) def aggregate_fit( @@ -41,13 +42,12 @@ def aggregate_fit( return None, {} # Aggregate all the client trees - global_model = None for _, fit_res in results: update = fit_res.parameters.tensors for item in update: - global_model = aggregate(global_model, json.loads(bytearray(item))) + self.global_model = aggregate(self.global_model, json.loads(bytearray(item))) - weights_avg = json.dumps(global_model) + weights_avg = json.dumps(self.global_model) return ( Parameters(tensor_type="", tensors=[bytes(weights_avg, "utf-8")]), diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py index 9939c16fe5a..a215eb1f6a3 100644 --- a/examples/quickstart-xgboost/utils.py +++ b/examples/quickstart-xgboost/utils.py @@ -32,8 +32,7 @@ def client_args_parser(): ) parser.add_argument( "--centralised_eval", - default=True, - type=bool, + action='store_true', help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) @@ -65,8 +64,7 @@ def server_args_parser(): ) parser.add_argument( "--centralised_eval", - default=True, - type=bool, + action='store_true', help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) From 727a028b591cf13643bb70ff75ae86fb3a7389dd Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Sun, 12 Nov 2023 22:12:08 +0000 Subject: [PATCH 14/31] formatting --- examples/quickstart-xgboost/client.py | 5 ++++- examples/quickstart-xgboost/server.py | 1 + examples/quickstart-xgboost/strategy.py | 4 +++- examples/quickstart-xgboost/utils.py | 4 ++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index ad95e4cb174..795d42dd5fa 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -106,7 +106,10 @@ def _local_boost(self): self.bst.update(train_dmatrix, self.bst.num_boosted_rounds()) # Extract the last N=num_local_round trees for sever aggregation - bst = self.bst[self.bst.num_boosted_rounds() - num_local_round: self.bst.num_boosted_rounds()] + bst = self.bst[ + self.bst.num_boosted_rounds() + - num_local_round : self.bst.num_boosted_rounds() + ] return bst diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 81f0b16dd3a..d236cf83616 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -61,6 +61,7 @@ def evaluate_metrics_aggregation(eval_metrics): def get_evaluate_fn(test_data): """Return a function for centralised evaluation.""" + def evaluate_fn( server_round: int, parameters: Parameters, config: Dict[str, Scalar] ): diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 0fa8fd40530..d099978eff9 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -45,7 +45,9 @@ def aggregate_fit( for _, fit_res in results: update = fit_res.parameters.tensors for item in update: - self.global_model = aggregate(self.global_model, json.loads(bytearray(item))) + self.global_model = aggregate( + self.global_model, json.loads(bytearray(item)) + ) weights_avg = json.dumps(self.global_model) diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py index a215eb1f6a3..ba03ed1bd0e 100644 --- a/examples/quickstart-xgboost/utils.py +++ b/examples/quickstart-xgboost/utils.py @@ -32,7 +32,7 @@ def client_args_parser(): ) parser.add_argument( "--centralised_eval", - action='store_true', + action="store_true", help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) @@ -64,7 +64,7 @@ def server_args_parser(): ) parser.add_argument( "--centralised_eval", - action='store_true', + action="store_true", help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) From c91f7ad1f8b0b432841c9299b9c054ebb4f45f0c Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Mon, 13 Nov 2023 12:35:33 +0000 Subject: [PATCH 15/31] Add type hints --- examples/quickstart-xgboost/dataset.py | 8 ++++---- examples/quickstart-xgboost/server.py | 13 +++++++------ examples/quickstart-xgboost/strategy.py | 5 +++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/examples/quickstart-xgboost/dataset.py b/examples/quickstart-xgboost/dataset.py index 6c59df766f2..80c978f1077 100644 --- a/examples/quickstart-xgboost/dataset.py +++ b/examples/quickstart-xgboost/dataset.py @@ -1,6 +1,6 @@ -import datasets import xgboost as xgb -from datasets import DatasetDict, concatenate_datasets +from typing import Callable, Dict, List, Optional, Tuple, Union +from datasets import Dataset, DatasetDict, concatenate_datasets from flwr_datasets.partitioner import ( IidPartitioner, LinearPartitioner, @@ -25,7 +25,7 @@ def instantiate_partitioner(partitioner_type: str, num_partitions: int): return partitioner -def train_test_split(partition: datasets.Dataset, test_fraction: float, seed: int): +def train_test_split(partition: Dataset, test_fraction: float, seed: int): """Split the data into train and validation set given split rate.""" train_test = partition.train_test_split(test_size=test_fraction, seed=seed) partition_train = train_test["train"] @@ -37,7 +37,7 @@ def train_test_split(partition: datasets.Dataset, test_fraction: float, seed: in return partition_train, partition_test, num_train, num_test -def transform_dataset_to_dmatrix(data): +def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core.DMatrix: """Transform dataset to DMatrix format for xgboost.""" x = data["inputs"] y = data["label"] diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index d236cf83616..70684f51369 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -21,12 +21,13 @@ centralised_eval = args.centralised_eval # Load centralised test set -fds = FederatedDataset( - dataset="jxie/higgs", partitioners={"train": 20}, resplitter=resplit -) -test_set = fds.load_full("test") -test_set.set_format("numpy") -test_dmatrix = transform_dataset_to_dmatrix(test_set) +if centralised_eval: + fds = FederatedDataset( + dataset="jxie/higgs", partitioners={"train": 20}, resplitter=resplit + ) + test_set = fds.load_full("test") + test_set.set_format("numpy") + test_dmatrix = transform_dataset_to_dmatrix(test_set) # Hyper-parameters used for initialisation params = { diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index d099978eff9..8cdc7f795a9 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -93,7 +93,8 @@ def evaluate( return loss, metrics -def aggregate(bst_prev, bst_curr): +def aggregate(bst_prev: Optional[Dict], bst_curr: Dict) -> Dict: + """Conduct bagging aggregation for given trees.""" if not bst_prev: return bst_curr else: @@ -122,7 +123,7 @@ def aggregate(bst_prev, bst_curr): return bst_prev -def _get_tree_nums(xgb_model): +def _get_tree_nums(xgb_model: Dict) -> (int, int): # Get the number of trees tree_num = int( xgb_model["learner"]["gradient_booster"]["model"]["gbtree_model_param"][ From 054f17f7077968511e8bcd000f40f60dbd747c9d Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Tue, 14 Nov 2023 12:57:05 +0000 Subject: [PATCH 16/31] Update readme and run.sh --- examples/quickstart-xgboost/README.md | 22 +++++++++++++++++----- examples/quickstart-xgboost/run.sh | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index dad8814ece0..f7b98c11d3b 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -52,24 +52,36 @@ pip install -r requirements.txt ## Run Federated Learning with XGBoost and Flower -Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows: +Afterwards you are ready to start the Flower server as well as the clients. +You can simply start the server in a terminal as follows: ```shell python3 server.py ``` -Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminal windows and run the following commands. +Now you are ready to start the Flower clients which will participate in the learning. +To do so simply open two more terminal windows and run the following commands. Start client 1 in the first terminal: ```shell -python3 client.py +python3 client.py partition_id=0 ``` Start client 2 in the second terminal: ```shell -python3 client.py +python3 client.py partition_id=1 ``` -You will see that XGBoost is starting a federated training. Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) and [tutorial](https://flower.dev/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. +You will see that XGBoost is starting a federated training. + +Alternatively, you can use `run.sh` to run the same experiment in a single terminal as follows: + +```shell +bash run.sh +``` + +Besides, we provide options to customise the experimental settings, including data partitioning and centralised/distributed evaluation (see `utils.py`). +Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) +and [tutorial](https://flower.dev/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. diff --git a/examples/quickstart-xgboost/run.sh b/examples/quickstart-xgboost/run.sh index f7d37f47051..92e9973fdb5 100755 --- a/examples/quickstart-xgboost/run.sh +++ b/examples/quickstart-xgboost/run.sh @@ -4,7 +4,7 @@ cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ echo "Starting server" python server.py & -sleep 3 # Sleep for 3s to give the server enough time to start +sleep 15 # Sleep for 15s to give the server enough time to start for i in `seq 0 1`; do echo "Starting client $i" From 28b7305bb3ffb74efaf3515c8b5932022534e21c Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 15 Nov 2023 10:18:21 +0000 Subject: [PATCH 17/31] Format readme --- examples/quickstart-xgboost/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index f7b98c11d3b..b72642db621 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -52,14 +52,14 @@ pip install -r requirements.txt ## Run Federated Learning with XGBoost and Flower -Afterwards you are ready to start the Flower server as well as the clients. +Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows: ```shell python3 server.py ``` -Now you are ready to start the Flower clients which will participate in the learning. +Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminal windows and run the following commands. Start client 1 in the first terminal: @@ -83,5 +83,5 @@ bash run.sh ``` Besides, we provide options to customise the experimental settings, including data partitioning and centralised/distributed evaluation (see `utils.py`). -Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) +Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) and [tutorial](https://flower.dev/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. From ebe35a5dd3518a6e433d54bd2b795cee103b4141 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 15 Nov 2023 10:51:16 +0000 Subject: [PATCH 18/31] Update examples/quickstart-xgboost/client.py Co-authored-by: Daniel J. Beutel --- examples/quickstart-xgboost/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index 795d42dd5fa..2b6a9a78e17 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -34,6 +34,7 @@ # Load (HIGGS) dataset and conduct partitioning num_partitions = args.num_partitions + # Partitioner type is chosen from ["uniform", "linear", "square", "exponential"] partitioner_type = args.partitioner_type From 749a96087b7c4ac9bd198e5e7187070f05530618 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 15 Nov 2023 10:51:47 +0000 Subject: [PATCH 19/31] Update examples/quickstart-xgboost/README.md Co-authored-by: Daniel J. Beutel --- examples/quickstart-xgboost/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index b72642db621..b89d6aec467 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -65,7 +65,7 @@ To do so simply open two more terminal windows and run the following commands. Start client 1 in the first terminal: ```shell -python3 client.py partition_id=0 +python3 client.py node-id=0 ``` Start client 2 in the second terminal: From 40d0bed374ba524a774913ec93fa82f35da547b7 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 15 Nov 2023 10:52:01 +0000 Subject: [PATCH 20/31] Update examples/quickstart-xgboost/README.md Co-authored-by: Daniel J. Beutel --- examples/quickstart-xgboost/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index b89d6aec467..79fea330575 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -71,7 +71,7 @@ python3 client.py node-id=0 Start client 2 in the second terminal: ```shell -python3 client.py partition_id=1 +python3 client.py node-id=1 ``` You will see that XGBoost is starting a federated training. From 5cdfc3713811e8787c9b95c3db9048485d75232c Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 15 Nov 2023 10:52:31 +0000 Subject: [PATCH 21/31] Update examples/quickstart-xgboost/utils.py Co-authored-by: Daniel J. Beutel --- examples/quickstart-xgboost/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py index ba03ed1bd0e..bbb8f1067f9 100644 --- a/examples/quickstart-xgboost/utils.py +++ b/examples/quickstart-xgboost/utils.py @@ -16,7 +16,7 @@ def client_args_parser(): help="Partitioner types.", ) parser.add_argument( - "--partition_id", + "--node-id", default=0, type=int, help="Partition ID used for the current client.", From c2f85c0571a862ac493dca0d76b4ad081b4c2e70 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 15 Nov 2023 10:52:47 +0000 Subject: [PATCH 22/31] Update examples/quickstart-xgboost/utils.py Co-authored-by: Daniel J. Beutel --- examples/quickstart-xgboost/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py index bbb8f1067f9..6b0b4611f42 100644 --- a/examples/quickstart-xgboost/utils.py +++ b/examples/quickstart-xgboost/utils.py @@ -6,7 +6,7 @@ def client_args_parser(): parser = argparse.ArgumentParser() parser.add_argument( - "--num_partitions", default=20, type=int, help="Number of partitions." + "--num-partitions", default=20, type=int, help="Number of partitions." ) parser.add_argument( "--partitioner_type", From b63a0b07db854b285e56eebe7734e5c495c8b757 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Wed, 15 Nov 2023 10:53:03 +0000 Subject: [PATCH 23/31] Update examples/quickstart-xgboost/strategy.py Co-authored-by: Daniel J. Beutel --- examples/quickstart-xgboost/strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 8cdc7f795a9..23a18f05882 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -13,7 +13,7 @@ from flwr.common.logger import log -class XGbBagging(fl.server.strategy.FedAvg): +class XgbBagging(fl.server.strategy.FedAvg): def __init__( self, evaluate_function: Optional[ From 5484a47e7f0a7bdfe6c78688f3e03ba0155a9a02 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 15 Nov 2023 11:31:45 +0000 Subject: [PATCH 24/31] Format arguments parser --- examples/quickstart-xgboost/README.md | 4 ++-- examples/quickstart-xgboost/client.py | 4 ++-- examples/quickstart-xgboost/server.py | 4 ++-- examples/quickstart-xgboost/utils.py | 20 ++++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md index 79fea330575..3801d4813a2 100644 --- a/examples/quickstart-xgboost/README.md +++ b/examples/quickstart-xgboost/README.md @@ -65,13 +65,13 @@ To do so simply open two more terminal windows and run the following commands. Start client 1 in the first terminal: ```shell -python3 client.py node-id=0 +python3 client.py --node-id=0 ``` Start client 2 in the second terminal: ```shell -python3 client.py node-id=1 +python3 client.py --node-id=1 ``` You will see that XGBoost is starting a federated training. diff --git a/examples/quickstart-xgboost/client.py b/examples/quickstart-xgboost/client.py index 2b6a9a78e17..5aba30266b5 100644 --- a/examples/quickstart-xgboost/client.py +++ b/examples/quickstart-xgboost/client.py @@ -47,8 +47,8 @@ ) # Let's use the first partition as an example -partition_id = args.partition_id -partition = fds.load_partition(idx=partition_id, split="train") +node_id = args.node_id +partition = fds.load_partition(idx=node_id, split="train") partition.set_format("numpy") if args.centralised_eval: diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 70684f51369..331dfc09e75 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -7,7 +7,7 @@ from flwr.common import Parameters, Scalar from flwr_datasets import FederatedDataset -from strategy import XGbBagging +from strategy import XgbBagging from utils import server_args_parser from dataset import resplit, transform_dataset_to_dmatrix @@ -90,7 +90,7 @@ def evaluate_fn( # Define strategy -strategy = XGbBagging( +strategy = XgbBagging( evaluate_function=get_evaluate_fn(test_dmatrix) if centralised_eval else None, fraction_fit=(float(num_clients_per_round) / pool_size), min_fit_clients=num_clients_per_round, diff --git a/examples/quickstart-xgboost/utils.py b/examples/quickstart-xgboost/utils.py index 6b0b4611f42..51c1a1b9604 100644 --- a/examples/quickstart-xgboost/utils.py +++ b/examples/quickstart-xgboost/utils.py @@ -6,10 +6,10 @@ def client_args_parser(): parser = argparse.ArgumentParser() parser.add_argument( - "--num-partitions", default=20, type=int, help="Number of partitions." + "--num-partitions", default=10, type=int, help="Number of partitions." ) parser.add_argument( - "--partitioner_type", + "--partitioner-type", default="uniform", type=str, choices=["uniform", "linear", "square", "exponential"], @@ -19,19 +19,19 @@ def client_args_parser(): "--node-id", default=0, type=int, - help="Partition ID used for the current client.", + help="Node ID used for the current client.", ) parser.add_argument( "--seed", default=42, type=int, help="Seed used for train/test splitting." ) parser.add_argument( - "--test_fraction", + "--test-fraction", default=0.2, type=float, help="Test fraction for train/test splitting.", ) parser.add_argument( - "--centralised_eval", + "--centralised-eval", action="store_true", help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) @@ -45,25 +45,25 @@ def server_args_parser(): parser = argparse.ArgumentParser() parser.add_argument( - "--pool_size", default=2, type=int, help="Number of total clients." + "--pool-size", default=2, type=int, help="Number of total clients." ) parser.add_argument( - "--num_rounds", default=5, type=int, help="Number of FL rounds." + "--num-rounds", default=5, type=int, help="Number of FL rounds." ) parser.add_argument( - "--num_clients_per_round", + "--num-clients-per-round", default=2, type=int, help="Number of clients participate in training each round.", ) parser.add_argument( - "--num_evaluate_clients", + "--num-evaluate-clients", default=2, type=int, help="Number of clients selected for evaluation.", ) parser.add_argument( - "--centralised_eval", + "--centralised-eval", action="store_true", help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", ) From 0ad1c43114b60c2f6a561e9003303d144accb9a4 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 15 Nov 2023 11:33:19 +0000 Subject: [PATCH 25/31] Update run.sh --- examples/quickstart-xgboost/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart-xgboost/run.sh b/examples/quickstart-xgboost/run.sh index 92e9973fdb5..7cf65fa4d52 100755 --- a/examples/quickstart-xgboost/run.sh +++ b/examples/quickstart-xgboost/run.sh @@ -8,7 +8,7 @@ sleep 15 # Sleep for 15s to give the server enough time to start for i in `seq 0 1`; do echo "Starting client $i" - python client.py --partition_id=$i & + python3 client.py --node-id=$i & done # Enable CTRL+C to stop all background processes From b948294d3b4d1cb4bc84e174cb25104354b32a47 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 15 Nov 2023 12:49:29 +0000 Subject: [PATCH 26/31] Change strategy name to FedXgbBagging --- examples/quickstart-xgboost/server.py | 4 ++-- examples/quickstart-xgboost/strategy.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/quickstart-xgboost/server.py b/examples/quickstart-xgboost/server.py index 331dfc09e75..857e9952801 100644 --- a/examples/quickstart-xgboost/server.py +++ b/examples/quickstart-xgboost/server.py @@ -7,7 +7,7 @@ from flwr.common import Parameters, Scalar from flwr_datasets import FederatedDataset -from strategy import XgbBagging +from strategy import FedXgbBagging from utils import server_args_parser from dataset import resplit, transform_dataset_to_dmatrix @@ -90,7 +90,7 @@ def evaluate_fn( # Define strategy -strategy = XgbBagging( +strategy = FedXgbBagging( evaluate_function=get_evaluate_fn(test_dmatrix) if centralised_eval else None, fraction_fit=(float(num_clients_per_round) / pool_size), min_fit_clients=num_clients_per_round, diff --git a/examples/quickstart-xgboost/strategy.py b/examples/quickstart-xgboost/strategy.py index 23a18f05882..814010720a7 100644 --- a/examples/quickstart-xgboost/strategy.py +++ b/examples/quickstart-xgboost/strategy.py @@ -13,7 +13,7 @@ from flwr.common.logger import log -class XgbBagging(fl.server.strategy.FedAvg): +class FedXgbBagging(fl.server.strategy.FedAvg): def __init__( self, evaluate_function: Optional[ From 68d065ec726cb919a1e2c1ac37094bd27acc4d48 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 15 Nov 2023 16:55:20 +0000 Subject: [PATCH 27/31] Rename to xgboost-comprehensive --- examples/quickstart-xgboost/README.md | 87 ------------------- .../client.py | 0 .../dataset.py | 0 .../pyproject.toml | 0 .../requirements.txt | 0 .../run.sh | 0 .../server.py | 0 .../strategy.py | 0 .../utils.py | 0 9 files changed, 87 deletions(-) delete mode 100644 examples/quickstart-xgboost/README.md rename examples/{quickstart-xgboost => xgboost-comprehensive}/client.py (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/dataset.py (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/pyproject.toml (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/requirements.txt (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/run.sh (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/server.py (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/strategy.py (100%) rename examples/{quickstart-xgboost => xgboost-comprehensive}/utils.py (100%) diff --git a/examples/quickstart-xgboost/README.md b/examples/quickstart-xgboost/README.md deleted file mode 100644 index 3801d4813a2..00000000000 --- a/examples/quickstart-xgboost/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# Flower Example using XGBoost - -This example demonstrates how to perform EXtreme Gradient Boosting (XGBoost) within Flower using `xgboost` package. -Tree-based with bagging method is used for aggregation on the server. - -## Project Setup - -Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: - -```shell -git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/quickstart-xgboost . && rm -rf flower && cd quickstart-xgboost -``` - -This will create a new directory called `quickstart-xgboost` containing the following files: - -``` --- README.md <- Your're reading this right now --- server.py <- Defines the server-side logic --- strategy.py <- Defines the tree-based bagging aggregation --- client.py <- Defines the client-side logic --- dataset.py <- Defines the functions of data loading and partitioning --- pyproject.toml <- Example dependencies (if you use Poetry) --- requirements.txt <- Example dependencies -``` - -### Installing Dependencies - -Project dependencies (such as `xgboost` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences. - -#### Poetry - -```shell -poetry install -poetry shell -``` - -Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command: - -```shell -poetry run python3 -c "import flwr" -``` - -If you don't see any errors you're good to go! - -#### pip - -Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt. - -```shell -pip install -r requirements.txt -``` - -## Run Federated Learning with XGBoost and Flower - -Afterwards you are ready to start the Flower server as well as the clients. -You can simply start the server in a terminal as follows: - -```shell -python3 server.py -``` - -Now you are ready to start the Flower clients which will participate in the learning. -To do so simply open two more terminal windows and run the following commands. - -Start client 1 in the first terminal: - -```shell -python3 client.py --node-id=0 -``` - -Start client 2 in the second terminal: - -```shell -python3 client.py --node-id=1 -``` - -You will see that XGBoost is starting a federated training. - -Alternatively, you can use `run.sh` to run the same experiment in a single terminal as follows: - -```shell -bash run.sh -``` - -Besides, we provide options to customise the experimental settings, including data partitioning and centralised/distributed evaluation (see `utils.py`). -Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) -and [tutorial](https://flower.dev/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. diff --git a/examples/quickstart-xgboost/client.py b/examples/xgboost-comprehensive/client.py similarity index 100% rename from examples/quickstart-xgboost/client.py rename to examples/xgboost-comprehensive/client.py diff --git a/examples/quickstart-xgboost/dataset.py b/examples/xgboost-comprehensive/dataset.py similarity index 100% rename from examples/quickstart-xgboost/dataset.py rename to examples/xgboost-comprehensive/dataset.py diff --git a/examples/quickstart-xgboost/pyproject.toml b/examples/xgboost-comprehensive/pyproject.toml similarity index 100% rename from examples/quickstart-xgboost/pyproject.toml rename to examples/xgboost-comprehensive/pyproject.toml diff --git a/examples/quickstart-xgboost/requirements.txt b/examples/xgboost-comprehensive/requirements.txt similarity index 100% rename from examples/quickstart-xgboost/requirements.txt rename to examples/xgboost-comprehensive/requirements.txt diff --git a/examples/quickstart-xgboost/run.sh b/examples/xgboost-comprehensive/run.sh similarity index 100% rename from examples/quickstart-xgboost/run.sh rename to examples/xgboost-comprehensive/run.sh diff --git a/examples/quickstart-xgboost/server.py b/examples/xgboost-comprehensive/server.py similarity index 100% rename from examples/quickstart-xgboost/server.py rename to examples/xgboost-comprehensive/server.py diff --git a/examples/quickstart-xgboost/strategy.py b/examples/xgboost-comprehensive/strategy.py similarity index 100% rename from examples/quickstart-xgboost/strategy.py rename to examples/xgboost-comprehensive/strategy.py diff --git a/examples/quickstart-xgboost/utils.py b/examples/xgboost-comprehensive/utils.py similarity index 100% rename from examples/quickstart-xgboost/utils.py rename to examples/xgboost-comprehensive/utils.py From b0348c95dfd4a861e747e2bba67ec291a5e3e445 Mon Sep 17 00:00:00 2001 From: yan-gao-GY Date: Wed, 15 Nov 2023 17:12:55 +0000 Subject: [PATCH 28/31] Recover readme --- examples/xgboost-comprehensive/README.md | 87 ++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 examples/xgboost-comprehensive/README.md diff --git a/examples/xgboost-comprehensive/README.md b/examples/xgboost-comprehensive/README.md new file mode 100644 index 00000000000..3801d4813a2 --- /dev/null +++ b/examples/xgboost-comprehensive/README.md @@ -0,0 +1,87 @@ +# Flower Example using XGBoost + +This example demonstrates how to perform EXtreme Gradient Boosting (XGBoost) within Flower using `xgboost` package. +Tree-based with bagging method is used for aggregation on the server. + +## Project Setup + +Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/quickstart-xgboost . && rm -rf flower && cd quickstart-xgboost +``` + +This will create a new directory called `quickstart-xgboost` containing the following files: + +``` +-- README.md <- Your're reading this right now +-- server.py <- Defines the server-side logic +-- strategy.py <- Defines the tree-based bagging aggregation +-- client.py <- Defines the client-side logic +-- dataset.py <- Defines the functions of data loading and partitioning +-- pyproject.toml <- Example dependencies (if you use Poetry) +-- requirements.txt <- Example dependencies +``` + +### Installing Dependencies + +Project dependencies (such as `xgboost` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences. + +#### Poetry + +```shell +poetry install +poetry shell +``` + +Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command: + +```shell +poetry run python3 -c "import flwr" +``` + +If you don't see any errors you're good to go! + +#### pip + +Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt. + +```shell +pip install -r requirements.txt +``` + +## Run Federated Learning with XGBoost and Flower + +Afterwards you are ready to start the Flower server as well as the clients. +You can simply start the server in a terminal as follows: + +```shell +python3 server.py +``` + +Now you are ready to start the Flower clients which will participate in the learning. +To do so simply open two more terminal windows and run the following commands. + +Start client 1 in the first terminal: + +```shell +python3 client.py --node-id=0 +``` + +Start client 2 in the second terminal: + +```shell +python3 client.py --node-id=1 +``` + +You will see that XGBoost is starting a federated training. + +Alternatively, you can use `run.sh` to run the same experiment in a single terminal as follows: + +```shell +bash run.sh +``` + +Besides, we provide options to customise the experimental settings, including data partitioning and centralised/distributed evaluation (see `utils.py`). +Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-xgboost) +and [tutorial](https://flower.dev/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. From f10d4b5c4a74dc932e13c17d9cd764587a93b6e7 Mon Sep 17 00:00:00 2001 From: "Daniel J. Beutel" Date: Wed, 15 Nov 2023 19:36:26 +0100 Subject: [PATCH 29/31] Update examples/xgboost-comprehensive/pyproject.toml --- examples/xgboost-comprehensive/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/xgboost-comprehensive/pyproject.toml b/examples/xgboost-comprehensive/pyproject.toml index 039809c577e..19e548aed8d 100644 --- a/examples/xgboost-comprehensive/pyproject.toml +++ b/examples/xgboost-comprehensive/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "quickstart-xgboost" version = "0.1.0" -description = "Federated XGBoost Quickstart with Flower" +description = "Federated XGBoost with Flower (quickstart)" authors = ["The Flower Authors "] [tool.poetry.dependencies] From a0c65ebd53d0c6d3463c5c644af55c4245c6a982 Mon Sep 17 00:00:00 2001 From: "Daniel J. Beutel" Date: Wed, 15 Nov 2023 19:36:32 +0100 Subject: [PATCH 30/31] Update examples/xgboost-comprehensive/pyproject.toml --- examples/xgboost-comprehensive/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/xgboost-comprehensive/pyproject.toml b/examples/xgboost-comprehensive/pyproject.toml index 19e548aed8d..9091491af94 100644 --- a/examples/xgboost-comprehensive/pyproject.toml +++ b/examples/xgboost-comprehensive/pyproject.toml @@ -3,7 +3,7 @@ requires = ["poetry-core>=1.4.0"] build-backend = "poetry.core.masonry.api" [tool.poetry] -name = "quickstart-xgboost" +name = "xgboost-comprehensive" version = "0.1.0" description = "Federated XGBoost with Flower (quickstart)" authors = ["The Flower Authors "] From 5656bb41cf5c1f58aa76357384c8310ddea45e0b Mon Sep 17 00:00:00 2001 From: "Daniel J. Beutel" Date: Wed, 15 Nov 2023 19:36:51 +0100 Subject: [PATCH 31/31] Update examples/xgboost-comprehensive/pyproject.toml --- examples/xgboost-comprehensive/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/xgboost-comprehensive/pyproject.toml b/examples/xgboost-comprehensive/pyproject.toml index 9091491af94..5414b512215 100644 --- a/examples/xgboost-comprehensive/pyproject.toml +++ b/examples/xgboost-comprehensive/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "xgboost-comprehensive" version = "0.1.0" -description = "Federated XGBoost with Flower (quickstart)" +description = "Federated XGBoost with Flower (comprehensive)" authors = ["The Flower Authors "] [tool.poetry.dependencies]