FedML-AI · chaoyanghe · Sep 24, 2022 · Sep 8, 2022 · Sep 8, 2022 · Sep 8, 2022
diff --git a/python/examples/flow/fedavg/README.md b/python/examples/flow/fedavg/README.md
@@ -0,0 +1,5 @@
+```
+sh test_run_server.sh
+sh test_run_client.sh 1
+sh test_run_client.sh 2
+```
diff --git a/python/examples/flow/fedavg/fedml_config.yaml b/python/examples/flow/fedavg/fedml_config.yaml
@@ -0,0 +1,56 @@
+common_args:
+  training_type: "cross_silo"
+  scenario: "horizontal"
+  using_mlops: false
+  random_seed: 0
+
+environment_args:
+  bootstrap: config/bootstrap.sh
+
+data_args:
+  dataset: "mnist"
+  data_cache_dir: ~/fedml_data
+  partition_method: "hetero"
+  partition_alpha: 0.5
+
+model_args:
+  model: "lr"
+  model_file_cache_folder: "./model_file_cache" # will be filled by the server automatically
+  global_model_file_path: "./model_file_cache/global_model.pt"
+
+train_args:
+  federated_optimizer: "FedAvg"
+  client_id_list:
+  client_num_in_total: 1000
+  client_num_per_round: 2
+  comm_round: 5
+  epochs: 1
+  batch_size: 10
+  client_optimizer: sgd
+  learning_rate: 0.03
+  weight_decay: 0.001
+
+validation_args:
+  frequency_of_the_test: 5
+
+device_args:
+  worker_num: 2
+  using_gpu: false
+  gpu_mapping_file: config/gpu_mapping.yaml
+  gpu_mapping_key: mapping_default
+
+comm_args:
+  backend: "MQTT_S3"
+  mqtt_config_path: config/mqtt_config.yaml
+  s3_config_path: config/s3_config.yaml
+
+tracking_args:
+  log_file_dir: ./log
+  enable_wandb: false
+  wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408
+  wandb_project: fedml
+  wandb_name: fedml_torch_fedavg_mnist_lr
+
+#lsa_args:
+#  prime_number: 2 ** 15 - 19
+#  precision_parameter: 10
diff --git a/python/examples/flow/fedavg/fedml_flow_constants.py b/python/examples/flow/fedavg/fedml_flow_constants.py
@@ -0,0 +1,9 @@
+MSG_TYPE_CONNECTION_IS_READY = 0
+MSG_TYPE_NEIGHBOR_CHECK_NODE_STATUS = "MSG_TYPE_NEIGHBOR_CHECK_NODE_STATUS"
+MSG_TYPE_NEIGHBOR_REPORT_NODE_STATUS = "MSG_TYPE_NEIGHBOR_REPORT_NODE_STATUS"
+MSG_TYPE_FLOW_FINISH = "MSG_TYPE_FLOW_FINISH"
+
+MSG_ARG_KEY_TYPE = "msg_type"
+
+PARAMS_KEY_SENDER_ID = "sender_id"
+PARAMS_KEY_RECEIVER_ID = "receiver_id"
diff --git a/python/examples/flow/fedavg/test_fedml_flow.py b/python/examples/flow/fedavg/test_fedml_flow.py
@@ -0,0 +1,112 @@
+import logging
+
+import fedml
+from fedml import FedMLRunner
+from fedml.core import FedMLExecutor, Params, FedMLAlgorithmFlow
+
+
+class Client(FedMLExecutor):
+    def __init__(self, args):
+        self.args = args
+        id = args.rank
+        neighbor_id_list = [0]
+        super().__init__(id, neighbor_id_list)
+
+        self.device = None
+        self.dataset = None
+        self.model = None
+
+    def init(self, device, dataset, model):
+        self.device = device
+        self.dataset = dataset
+        self.model = model
+
+    def local_training(self):
+        logging.info("local_training start")
+        params = self.get_params()
+        model_params = params.get(Params.KEY_MODEL_PARAMS)
+        return params
+
+    def handle_init_global_model(self):
+        received_params = self.get_params()
+        model_params = received_params.get(Params.KEY_MODEL_PARAMS)
+
+        params = Params()
+        params.add(Params.KEY_MODEL_PARAMS, model_params)
+        return params
+
+
+class Server(FedMLExecutor):
+    def __init__(self, args):
+        self.args = args
+        id = args.rank
+        neighbor_id_list = [1, 2]
+        super().__init__(id, neighbor_id_list)
+
+        self.device = None
+        self.dataset = None
+        self.model = None
+
+        self.round_idx = 0
+
+        self.client_count = 0
+        self.client_num = 2
+
+    def init(self, device, dataset, model):
+        self.device = device
+        self.dataset = dataset
+        self.model = model
+
+    def init_global_model(self):
+        logging.info("init_global_model")
+        params = Params()
+        params.add(Params.KEY_MODEL_PARAMS, self.model.state_dict())
+        return params
+
+    def server_aggregate(self):
+        logging.info("server_aggregate")
+        params = self.get_params()
+        model_params = params.get(Params.KEY_MODEL_PARAMS)
+        # logging.info("value1 = {}".format(value1))
+        self.round_idx += 1
+        self.client_count += 1
+        if self.client_count == self.client_num:
+            self.client_count = 0
+            params = Params()
+            params.add(Params.KEY_MODEL_PARAMS, model_params)
+            return params
+
+    def final_eval(self):
+        logging.info("final_eval")
+
+
+if __name__ == "__main__":
+    args = fedml.init()
+
+    # init device
+    device = fedml.device.get_device(args)
+
+    # load data
+    dataset, output_dim = fedml.data.load(args)
+
+    # load model
+    model = fedml.model.create(args, output_dim)
+
+    if args.rank == 0:
+        executor = Server(args)
+        executor.init(device, dataset, model)
+    else:
+        executor = Client(args)
+        executor.init(device, dataset, model)
+
+    fedml_alg_flow = FedMLAlgorithmFlow(args, executor)
+    fedml_alg_flow.add_flow("init_global_model", Server.init_global_model)
+    fedml_alg_flow.add_flow("handle_init", Client.handle_init_global_model)
+    for round_idx in range(args.comm_round):
+        fedml_alg_flow.add_flow("local_training", Client.local_training)
+        fedml_alg_flow.add_flow("server_aggregate", Server.server_aggregate)
+    fedml_alg_flow.add_flow("final_eval", Server.final_eval)
+    fedml_alg_flow.build()
+
+    fedml_runner = FedMLRunner(args, device, dataset, model, algorithm_flow=fedml_alg_flow)
+    fedml_runner.run()
diff --git a/python/examples/flow/fedavg/test_run_client.sh b/python/examples/flow/fedavg/test_run_client.sh
@@ -0,0 +1,4 @@
+
+RANK=$1
+RUN_ID=$2
+python test_fedml_flow.py --cf fedml_config.yaml --rank $RANK --role client --run_id $RUN_ID
diff --git a/python/examples/flow/fedavg/test_run_server.sh b/python/examples/flow/fedavg/test_run_server.sh
@@ -0,0 +1,2 @@
+RUN_ID=$1
+python test_fedml_flow.py --cf fedml_config.yaml --rank 0 --role server --run_id $RUN_ID
diff --git a/python/examples/simulation/mpi_torch_fedavg/batch_schedule.sh b/python/examples/simulation/mpi_torch_fedavg/batch_schedule.sh
@@ -1,9 +1,9 @@
 
 
-mpirun -np 9 \
--host "localhost:9" \
-/home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_femnist_2.yaml \
---override_cmd_args
+# mpirun -np 9 \
+# -host "localhost:9" \
+# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_femnist_2.yaml \
+# --override_cmd_args
 
 
 
@@ -14,3 +14,27 @@ mpirun -np 9 \
 
 
 
+
+# mpirun -np 9 \
+# -host "localhost:9" \
+# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_stackoverflow.yaml \
+# --override_cmd_args
+
+
+# mpirun -np 9 \
+# -host "localhost:9" \
+# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_stackoverflow_2.yaml \
+# --override_cmd_args
+
+
+# mpirun -np 5 \
+# -host "localhost:5" \
+# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_reddit.yaml \
+# --override_cmd_args
+
+
+# mpirun -np 5 \
+# -host "localhost:5" \
+# /home/chaoyanghe/anaconda3/envs/fedml/bin/python main.py --cf config/schedule_reddit_2.yaml \
+# --override_cmd_args
+
diff --git a/python/examples/simulation/mpi_torch_fedavg/config/schedule_femnist.yaml b/python/examples/simulation/mpi_torch_fedavg/config/schedule_femnist.yaml
@@ -17,8 +17,8 @@ train_args:
   federated_optimizer: "FedAvg_seq"
   client_id_list: "[]"
   client_num_in_total: 3400
-  client_num_per_round: 1000
-  comm_round: 500
+  client_num_per_round: 100
+  comm_round: 1000
   epochs: 10
   batch_size: 20
   client_optimizer: sgd
@@ -53,8 +53,12 @@ tracking_args:
   run_name: fedml_schedule_bench
   wandb_only_server: True
   using_mlops: False
-  # simulation_schedule: "LinearFit-DP"
+  simulation_schedule: "LinearFit-DP"
   # runtime_est_mode: "time_window" # EMA
+  simulation_gpu_hetero: "ratio"
+  gpu_hetero_ratio: 1.0
+  # simulation_environment_hetero: "cos"
+  # environment_hetero_ratio: 1.0
 
 attack_args:
   enable_attack: false

diff --git a/python/examples/simulation/mpi_torch_fedavg/config/schedule_femnist_2.yaml b/python/examples/simulation/mpi_torch_fedavg/config/schedule_femnist_2.yaml
@@ -14,13 +14,12 @@ model_args:
 
 
 train_args:
-  # federated_optimizer: "FedAvg_seq"
-  federated_optimizer: "FedOpt_seq"
+  federated_optimizer: "FedAvg_seq"
   client_id_list: "[]"
   client_num_in_total: 3400
-  client_num_per_round: 10
-  comm_round: 500
-  epochs: 1
+  client_num_per_round: 100
+  comm_round: 1000
+  epochs: 10
   batch_size: 20
   client_optimizer: sgd
   learning_rate: 0.05
@@ -54,8 +53,12 @@ tracking_args:
   run_name: fedml_schedule_bench
   wandb_only_server: True
   using_mlops: False
-  simulation_schedule: "LinearFit-DP"
-  runtime_est_mode: "time_window" # EMA
+  # simulation_schedule: "LinearFit-DP"
+  # runtime_est_mode: "time_window" # EMA
+  # simulation_gpu_hetero: "ratio"
+  # gpu_hetero_ratio: 1.0
+  # simulation_environment_hetero: "cos"
+  # environment_hetero_ratio: 1.0
 
 attack_args:
   enable_attack: false

diff --git a/python/examples/simulation/mpi_torch_fedavg/config/schedule_reddit.yaml b/python/examples/simulation/mpi_torch_fedavg/config/schedule_reddit.yaml
@@ -0,0 +1,81 @@
+common_args:
+  training_type: "simulation"
+  random_seed: 0
+
+data_args:
+  dataset: "reddit"
+  data_cache_dir: "/home/chaoyanghe/FedScale/benchmark/dataset/data/reddit"
+  data_map_file: "/home/chaoyanghe/FedScale//benchmark/dataset/data/reddit/client_data_mapping/train.csv"
+  partition_method: "hetero"
+  partition_alpha: 0.5
+  filter_less: 21
+  num_loaders: 0
+  task: "nlp"
+  block_size: 64
+  mlm_probability: 0.15
+  overwrite_cache: False
+  num_class: 10000
+
+model_args:
+  # model: "cnn"
+  # model: "rnn"  #  resnet18
+  model: "albert-base-v2"
+
+train_args:
+  federated_optimizer: "FedAvg_seq"
+  client_id_list: "[]"
+  client_num_in_total: 3400
+  client_num_per_round: 100
+  comm_round: 405
+  epochs: 5
+  batch_size: 20
+  client_optimizer: sgd
+  learning_rate: 0.0005
+  weight_decay: 0.0005
+  lr_schedule: None
+
+
+validation_args:
+  frequency_of_the_test: 400
+
+device_args:
+  worker_num: 4
+  using_gpu: false
+  gpu_mapping_file: config/gpu_mapping.yaml
+  gpu_mapping_key: mapping_default
+  # gpu_util_parse: "localhost:2,1,1,1,1,1,1,1"
+  # gpu_util_parse: "localhost:2,1,1,1,0,0,0,0"
+  gpu_util_parse: "localhost:2,1,0,1,1,0,0,0"
+
+comm_args:
+  backend: "MPI"
+  is_mobile: 0
+
+
+tracking_args:
+  log_file_dir: ./log
+  enable_wandb: True
+  wandb_entity: automl
+  wandb_key: ee0b5f53d949c84cee7decbe7a629e63fb2f8408
+  wandb_project: bench_optim
+  wandb_name: fedml_optim_bench
+  run_name: fedml_schedule_bench
+  wandb_only_server: True
+  using_mlops: False
+  # simulation_schedule: "LinearFit-DP"
+  # runtime_est_mode: "time_window" # EMA
+  # simulation_gpu_hetero: "ratio"
+  # gpu_hetero_ratio: 1.0
+
+attack_args:
+  enable_attack: false
+  attack_type: None
+
+defense_args:
+  enable_defense: False
+  defense_type: norm_diff_clipping
+  norm_bound: 5.0
+
+
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		RUN_ID=$1
		python test_fedml_flow.py --cf fedml_config.yaml --rank 0 --role server --run_id $RUN_ID