Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions python/examples/flow/fedavg/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
```
sh test_run_server.sh
sh test_run_client.sh 1
sh test_run_client.sh 2
```
56 changes: 56 additions & 0 deletions python/examples/flow/fedavg/fedml_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
common_args:
training_type: "cross_silo"
scenario: "horizontal"
using_mlops: false
random_seed: 0

environment_args:
bootstrap: config/bootstrap.sh

data_args:
dataset: "mnist"
data_cache_dir: ~/fedml_data
partition_method: "hetero"
partition_alpha: 0.5

model_args:
model: "lr"
model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically
global_model_file_path: "./model_file_cache/global_model.pt"

train_args:
federated_optimizer: "FedAvg"
client_id_list:
client_num_in_total: 1000
client_num_per_round: 2
comm_round: 5
epochs: 1
batch_size: 10
client_optimizer: sgd
learning_rate: 0.03
weight_decay: 0.001

validation_args:
frequency_of_the_test: 5

device_args:
worker_num: 2
using_gpu: false
gpu_mapping_file: config/gpu_mapping.yaml
gpu_mapping_key: mapping_default

comm_args:
backend: "MQTT_S3"
mqtt_config_path: config/mqtt_config.yaml
s3_config_path: config/s3_config.yaml

tracking_args:
log_file_dir: ./log
enable_wandb: false
wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408
wandb_project: fedml
wandb_name: fedml_torch_fedavg_mnist_lr

#lsa_args:
# prime_number: 2 ** 15 - 19
# precision_parameter: 10
9 changes: 9 additions & 0 deletions python/examples/flow/fedavg/fedml_flow_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
MSG_TYPE_CONNECTION_IS_READY = 0
MSG_TYPE_NEIGHBOR_CHECK_NODE_STATUS = "MSG_TYPE_NEIGHBOR_CHECK_NODE_STATUS"
MSG_TYPE_NEIGHBOR_REPORT_NODE_STATUS = "MSG_TYPE_NEIGHBOR_REPORT_NODE_STATUS"
MSG_TYPE_FLOW_FINISH = "MSG_TYPE_FLOW_FINISH"

MSG_ARG_KEY_TYPE = "msg_type"

PARAMS_KEY_SENDER_ID = "sender_id"
PARAMS_KEY_RECEIVER_ID = "receiver_id"
112 changes: 112 additions & 0 deletions python/examples/flow/fedavg/test_fedml_flow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import logging

import fedml
from fedml import FedMLRunner
from fedml.core import FedMLExecutor, Params, FedMLAlgorithmFlow


class Client(FedMLExecutor):
def __init__(self, args):
self.args = args
id = args.rank
neighbor_id_list = [0]
super().__init__(id, neighbor_id_list)

self.device = None
self.dataset = None
self.model = None

def init(self, device, dataset, model):
self.device = device
self.dataset = dataset
self.model = model

def local_training(self):
logging.info("local_training start")
params = self.get_params()
model_params = params.get(Params.KEY_MODEL_PARAMS)
return params

def handle_init_global_model(self):
received_params = self.get_params()
model_params = received_params.get(Params.KEY_MODEL_PARAMS)

params = Params()
params.add(Params.KEY_MODEL_PARAMS, model_params)
return params


class Server(FedMLExecutor):
def __init__(self, args):
self.args = args
id = args.rank
neighbor_id_list = [1, 2]
super().__init__(id, neighbor_id_list)

self.device = None
self.dataset = None
self.model = None

self.round_idx = 0

self.client_count = 0
self.client_num = 2

def init(self, device, dataset, model):
self.device = device
self.dataset = dataset
self.model = model

def init_global_model(self):
logging.info("init_global_model")
params = Params()
params.add(Params.KEY_MODEL_PARAMS, self.model.state_dict())
return params

def server_aggregate(self):
logging.info("server_aggregate")
params = self.get_params()
model_params = params.get(Params.KEY_MODEL_PARAMS)
# logging.info("value1 = {}".format(value1))
self.round_idx += 1
self.client_count += 1
if self.client_count == self.client_num:
self.client_count = 0
params = Params()
params.add(Params.KEY_MODEL_PARAMS, model_params)
return params

def final_eval(self):
logging.info("final_eval")


if __name__ == "__main__":
args = fedml.init()

# init device
device = fedml.device.get_device(args)

# load data
dataset, output_dim = fedml.data.load(args)

# load model
model = fedml.model.create(args, output_dim)

if args.rank == 0:
executor = Server(args)
executor.init(device, dataset, model)
else:
executor = Client(args)
executor.init(device, dataset, model)

fedml_alg_flow = FedMLAlgorithmFlow(args, executor)
fedml_alg_flow.add_flow("init_global_model", Server.init_global_model)
fedml_alg_flow.add_flow("handle_init", Client.handle_init_global_model)
for round_idx in range(args.comm_round):
fedml_alg_flow.add_flow("local_training", Client.local_training)
fedml_alg_flow.add_flow("server_aggregate", Server.server_aggregate)
fedml_alg_flow.add_flow("final_eval", Server.final_eval)
fedml_alg_flow.build()

fedml_runner = FedMLRunner(args, device, dataset, model, algorithm_flow=fedml_alg_flow)
fedml_runner.run()
4 changes: 4 additions & 0 deletions python/examples/flow/fedavg/test_run_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

RANK=$1
RUN_ID=$2
python test_fedml_flow.py --cf fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID
2 changes: 2 additions & 0 deletions python/examples/flow/fedavg/test_run_server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
RUN_ID=$1
python test_fedml_flow.py --cf fedml_config.yaml --rank 0 --role server --run_id $RUN_ID
32 changes: 28 additions & 4 deletions python/examples/simulation/mpi_torch_fedavg/batch_schedule.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@


mpirun -np 9 \
-host "localhost:9" \
/home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_femnist_2.yaml \
--override_cmd_args
# mpirun -np 9 \
# -host "localhost:9" \
# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_femnist_2.yaml \
# --override_cmd_args



Expand All @@ -14,3 +14,27 @@ mpirun -np 9 \




# mpirun -np 9 \
# -host "localhost:9" \
# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_stackoverflow.yaml \
# --override_cmd_args


# mpirun -np 9 \
# -host "localhost:9" \
# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_stackoverflow_2.yaml \
# --override_cmd_args


# mpirun -np 5 \
# -host "localhost:5" \
# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_reddit.yaml \
# --override_cmd_args


# mpirun -np 5 \
# -host "localhost:5" \
# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_reddit_2.yaml \
# --override_cmd_args

Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ train_args:
federated_optimizer: "FedAvg_seq"
client_id_list: "[]"
client_num_in_total: 3400
client_num_per_round: 1000
comm_round: 500
client_num_per_round: 100
comm_round: 1000
epochs: 10
batch_size: 20
client_optimizer: sgd
Expand Down Expand Up @@ -53,8 +53,12 @@ tracking_args:
run_name: fedml_schedule_bench
wandb_only_server: True
using_mlops: False
# simulation_schedule: "LinearFit-DP"
simulation_schedule: "LinearFit-DP"
# runtime_est_mode: "time_window" # EMA
simulation_gpu_hetero: "ratio"
gpu_hetero_ratio: 1.0
# simulation_environment_hetero: "cos"
# environment_hetero_ratio: 1.0

attack_args:
enable_attack: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@ model_args:


train_args:
# federated_optimizer: "FedAvg_seq"
federated_optimizer: "FedOpt_seq"
federated_optimizer: "FedAvg_seq"
client_id_list: "[]"
client_num_in_total: 3400
client_num_per_round: 10
comm_round: 500
epochs: 1
client_num_per_round: 100
comm_round: 1000
epochs: 10
batch_size: 20
client_optimizer: sgd
learning_rate: 0.05
Expand Down Expand Up @@ -54,8 +53,12 @@ tracking_args:
run_name: fedml_schedule_bench
wandb_only_server: True
using_mlops: False
simulation_schedule: "LinearFit-DP"
runtime_est_mode: "time_window" # EMA
# simulation_schedule: "LinearFit-DP"
# runtime_est_mode: "time_window" # EMA
# simulation_gpu_hetero: "ratio"
# gpu_hetero_ratio: 1.0
# simulation_environment_hetero: "cos"
# environment_hetero_ratio: 1.0

attack_args:
enable_attack: false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
common_args:
training_type: "simulation"
random_seed: 0

data_args:
dataset: "reddit"
data_cache_dir: "/home/chaoyanghe/FedScale/benchmark/dataset/data/reddit"
data_map_file: "/home/chaoyanghe/FedScale//benchmark/dataset/data/reddit/client_data_mapping/train.csv"
partition_method: "hetero"
partition_alpha: 0.5
filter_less: 21
num_loaders: 0
task: "nlp"
block_size: 64
mlm_probability: 0.15
overwrite_cache: False
num_class: 10000

model_args:
# model: "cnn"
# model: "rnn" # resnet18
model: "albert-base-v2"

train_args:
federated_optimizer: "FedAvg_seq"
client_id_list: "[]"
client_num_in_total: 3400
client_num_per_round: 100
comm_round: 405
epochs: 5
batch_size: 20
client_optimizer: sgd
learning_rate: 0.0005
weight_decay: 0.0005
lr_schedule: None


validation_args:
frequency_of_the_test: 400

device_args:
worker_num: 4
using_gpu: false
gpu_mapping_file: config/gpu_mapping.yaml
gpu_mapping_key: mapping_default
# gpu_util_parse: "localhost:2,1,1,1,1,1,1,1"
# gpu_util_parse: "localhost:2,1,1,1,0,0,0,0"
gpu_util_parse: "localhost:2,1,0,1,1,0,0,0"

comm_args:
backend: "MPI"
is_mobile: 0


tracking_args:
log_file_dir: ./log
enable_wandb: True
wandb_entity: automl
wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408
wandb_project: bench_optim
wandb_name: fedml_optim_bench
run_name: fedml_schedule_bench
wandb_only_server: True
using_mlops: False
# simulation_schedule: "LinearFit-DP"
# runtime_est_mode: "time_window" # EMA
# simulation_gpu_hetero: "ratio"
# gpu_hetero_ratio: 1.0

attack_args:
enable_attack: false
attack_type: None

defense_args:
enable_defense: False
defense_type: norm_diff_clipping
norm_bound: 5.0




Loading