diff --git a/doc/source/tutorial-quickstart-xgboost.rst b/doc/source/tutorial-quickstart-xgboost.rst index 3de4e0ef81c..8d6f78f3088 100644 --- a/doc/source/tutorial-quickstart-xgboost.rst +++ b/doc/source/tutorial-quickstart-xgboost.rst @@ -88,7 +88,8 @@ Prior to local training, we require loading the HIGGS dataset from Flower Datase .. code-block:: python # Load (HIGGS) dataset and conduct partitioning - partitioner = IidPartitioner(num_partitions=2) + # We use a small subset (num_partitions=30) of the dataset for demonstration to speed up the data loading process. + partitioner = IidPartitioner(num_partitions=30) fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) # Load the partition for this `node_id` @@ -544,39 +545,39 @@ You should now see how the training does in the very first terminal (the one tha .. code-block:: shell - INFO flwr 2023-11-19 18:36:33,599 | app.py:163 | Starting Flower server, config: ServerConfig(num_rounds=5, round_timeout=None) - INFO flwr 2023-11-19 18:36:33,629 | app.py:176 | Flower ECE: gRPC server running (5 rounds), SSL is disabled - INFO flwr 2023-11-19 18:36:33,629 | server.py:89 | Initializing global parameters - INFO flwr 2023-11-19 18:36:33,629 | server.py:276 | Requesting initial parameters from one random client - INFO flwr 2023-11-19 18:40:03,997 | server.py:280 | Received initial parameters from one random client - INFO flwr 2023-11-19 18:40:03,999 | server.py:91 | Evaluating initial parameters - INFO flwr 2023-11-19 18:40:04,000 | server.py:104 | FL starting - DEBUG flwr 2023-11-19 18:40:04,098 | server.py:222 | fit_round 1: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:09,097 | server.py:236 | fit_round 1 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:09,108 | server.py:173 | evaluate_round 1: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:09,236 | server.py:187 | evaluate_round 1 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:09,237 | server.py:222 | fit_round 2: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:09,819 | server.py:236 | fit_round 2 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:09,855 | server.py:173 | evaluate_round 2: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:10,007 | server.py:187 | evaluate_round 2 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:10,007 | server.py:222 | fit_round 3: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:10,623 | server.py:236 | fit_round 3 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:10,674 | server.py:173 | evaluate_round 3: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:10,847 | server.py:187 | evaluate_round 3 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:10,847 | server.py:222 | fit_round 4: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:11,515 | server.py:236 | fit_round 4 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:11,581 | server.py:173 | evaluate_round 4: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:11,775 | server.py:187 | evaluate_round 4 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:11,775 | server.py:222 | fit_round 5: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:12,568 | server.py:236 | fit_round 5 received 2 results and 0 failures - DEBUG flwr 2023-11-19 18:40:12,648 | server.py:173 | evaluate_round 5: strategy sampled 2 clients (out of 2) - DEBUG flwr 2023-11-19 18:40:12,862 | server.py:187 | evaluate_round 5 received 2 results and 0 failures - INFO flwr 2023-11-19 18:40:12,862 | server.py:153 | FL finished in 8.86196927200001 - INFO flwr 2023-11-19 18:40:12,864 | app.py:226 | app_fit: losses_distributed [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)] - INFO flwr 2023-11-19 18:40:12,864 | app.py:227 | app_fit: metrics_distributed_fit {} - INFO flwr 2023-11-19 18:40:12,864 | app.py:228 | app_fit: metrics_distributed {'AUC': [(1, 0.76315), (2, 0.7734), (3, 0.7783), (4, 0.7824), (5, 0.78595)]} - INFO flwr 2023-11-19 18:40:12,864 | app.py:229 | app_fit: losses_centralized [] - INFO flwr 2023-11-19 18:40:12,864 | app.py:230 | app_fit: metrics_centralized {} + INFO flwr 2023-11-20 11:21:56,454 | app.py:163 | Starting Flower server, config: ServerConfig(num_rounds=5, round_timeout=None) + INFO flwr 2023-11-20 11:21:56,473 | app.py:176 | Flower ECE: gRPC server running (5 rounds), SSL is disabled + INFO flwr 2023-11-20 11:21:56,473 | server.py:89 | Initializing global parameters + INFO flwr 2023-11-20 11:21:56,473 | server.py:276 | Requesting initial parameters from one random client + INFO flwr 2023-11-20 11:22:38,302 | server.py:280 | Received initial parameters from one random client + INFO flwr 2023-11-20 11:22:38,302 | server.py:91 | Evaluating initial parameters + INFO flwr 2023-11-20 11:22:38,302 | server.py:104 | FL starting + DEBUG flwr 2023-11-20 11:22:38,302 | server.py:222 | fit_round 1: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,636 | server.py:236 | fit_round 1 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:38,643 | server.py:173 | evaluate_round 1: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,653 | server.py:187 | evaluate_round 1 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:38,653 | server.py:222 | fit_round 2: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,721 | server.py:236 | fit_round 2 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:38,745 | server.py:173 | evaluate_round 2: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,756 | server.py:187 | evaluate_round 2 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:38,756 | server.py:222 | fit_round 3: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,831 | server.py:236 | fit_round 3 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:38,868 | server.py:173 | evaluate_round 3: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,881 | server.py:187 | evaluate_round 3 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:38,881 | server.py:222 | fit_round 4: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:38,960 | server.py:236 | fit_round 4 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:39,012 | server.py:173 | evaluate_round 4: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:39,026 | server.py:187 | evaluate_round 4 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:39,026 | server.py:222 | fit_round 5: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:39,111 | server.py:236 | fit_round 5 received 2 results and 0 failures + DEBUG flwr 2023-11-20 11:22:39,177 | server.py:173 | evaluate_round 5: strategy sampled 2 clients (out of 2) + DEBUG flwr 2023-11-20 11:22:39,193 | server.py:187 | evaluate_round 5 received 2 results and 0 failures + INFO flwr 2023-11-20 11:22:39,193 | server.py:153 | FL finished in 0.8905023969999988 + INFO flwr 2023-11-20 11:22:39,193 | app.py:226 | app_fit: losses_distributed [(1, 0), (2, 0), (3, 0), (4, 0), (5, 0)] + INFO flwr 2023-11-20 11:22:39,193 | app.py:227 | app_fit: metrics_distributed_fit {} + INFO flwr 2023-11-20 11:22:39,193 | app.py:228 | app_fit: metrics_distributed {'AUC': [(1, 0.7572), (2, 0.7705), (3, 0.77595), (4, 0.78), (5, 0.78385)]} + INFO flwr 2023-11-20 11:22:39,193 | app.py:229 | app_fit: losses_centralized [] + INFO flwr 2023-11-20 11:22:39,193 | app.py:230 | app_fit: metrics_centralized {} Congratulations! You've successfully built and run your first federated XGBoost system. diff --git a/examples/xgboost-comprehensive/README.md b/examples/xgboost-comprehensive/README.md index 783438188da..da002a10d30 100644 --- a/examples/xgboost-comprehensive/README.md +++ b/examples/xgboost-comprehensive/README.md @@ -1,7 +1,8 @@ # Flower Example using XGBoost (Comprehensive) This example demonstrates a comprehensive federated learning setup using Flower with XGBoost. -It differs from the quickstart example in the following ways: +We use [HIGGS](https://archive.ics.uci.edu/dataset/280/higgs) dataset to perform a binary classification task. +It differs from the [xgboost-quickstart](https://github.com/adap/flower/tree/main/examples/xgboost-quickstart) example in the following ways: - Arguments parsers of server and clients for hyperparameters selection. - Customised FL settings. diff --git a/examples/xgboost-comprehensive/client.py b/examples/xgboost-comprehensive/client.py index 889f5f47e0a..bc9735a2f65 100644 --- a/examples/xgboost-comprehensive/client.py +++ b/examples/xgboost-comprehensive/client.py @@ -47,6 +47,7 @@ ) # Load the partition for this `node_id` +log(INFO, "Loading partition...") node_id = args.node_id partition = fds.load_partition(idx=node_id, split="train") partition.set_format("numpy") @@ -67,6 +68,7 @@ ) # Reformat data to DMatrix for xgboost +log(INFO, "Reformatting data...") train_dmatrix = transform_dataset_to_dmatrix(train_data) valid_dmatrix = transform_dataset_to_dmatrix(valid_data) diff --git a/examples/xgboost-quickstart/README.md b/examples/xgboost-quickstart/README.md index 309f8f55b84..5174c236c66 100644 --- a/examples/xgboost-quickstart/README.md +++ b/examples/xgboost-quickstart/README.md @@ -1,6 +1,7 @@ # Flower Example using XGBoost This example demonstrates how to perform EXtreme Gradient Boosting (XGBoost) within Flower using `xgboost` package. +We use [HIGGS](https://archive.ics.uci.edu/dataset/280/higgs) dataset for this example to perform a binary classification task. Tree-based with bagging method is used for aggregation on the server. This project provides a minimal code example to enable you to get stated quickly. For a more comprehensive code example, take a look at [xgboost-comprehensive](https://github.com/adap/flower/tree/main/examples/xgboost-comprehensive). diff --git a/examples/xgboost-quickstart/client.py b/examples/xgboost-quickstart/client.py index ed85e98a3ab..e8858019712 100644 --- a/examples/xgboost-quickstart/client.py +++ b/examples/xgboost-quickstart/client.py @@ -57,10 +57,12 @@ def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core. # Load (HIGGS) dataset and conduct partitioning -partitioner = IidPartitioner(num_partitions=2) +# We use a small subset (num_partitions=30) of the dataset for demonstration to speed up the data loading process. +partitioner = IidPartitioner(num_partitions=30) fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) # Load the partition for this `node_id` +log(INFO, "Loading partition...") partition = fds.load_partition(idx=args.node_id, split="train") partition.set_format("numpy") @@ -70,6 +72,7 @@ def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core. ) # Reformat data to DMatrix for xgboost +log(INFO, "Reformatting data...") train_dmatrix = transform_dataset_to_dmatrix(train_data) valid_dmatrix = transform_dataset_to_dmatrix(valid_data)