FedML-AI · difan0224 · Dec 21, 2020 · Dec 29, 2020 · Dec 30, 2020 · Dec 30, 2020
diff --git a/.gitignore b/.gitignore
@@ -4,8 +4,8 @@
 .vscode/*
 
 *.pyc
+*.conf
 
 wandb
 
 *.zip
-*.log
diff --git a/FedML b/FedML
diff --git a/data/cifar100/download_cifar100.sh b/data/cifar100/download_cifar100.sh
@@ -0,0 +1 @@
+wget https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
diff --git a/data_preprocessing/ImageNet/data_loader.py b/data_preprocessing/ImageNet/data_loader.py
diff --git a/data_preprocessing/ImageNet/datasets.py b/data_preprocessing/ImageNet/datasets.py
@@ -1,8 +1,10 @@
 import os
 import os.path
+import logging
 
-import torch.utils.data as data
 from PIL import Image
+import torch.utils.data as data
+from timm.data import Dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset
 
 
 def has_file_allowed_extension(filename, extensions):
@@ -25,18 +27,21 @@ def find_classes(dir):
     return classes, class_to_idx
 
 
-def make_dataset(dir, class_to_idx, extensions):
+def make_dataset(dir, class_to_idx, extensions, num_classes=1000):
     images = []
 
     data_local_num_dict = dict()
     net_dataidx_map = dict()
     sum_temp = 0
     dir = os.path.expanduser(dir)
+
+    i_target = 0 
     for target in sorted(os.listdir(dir)):
+        if not (i_target < num_classes):
+            break
         d = os.path.join(dir, target)
         if not os.path.isdir(d):
             continue
-
         target_num = 0
         for root, _, fnames in sorted(os.walk(d)):
             for fname in sorted(fnames):
@@ -49,6 +54,7 @@ def make_dataset(dir, class_to_idx, extensions):
         net_dataidx_map[class_to_idx[target]] = (sum_temp, sum_temp + target_num)
         data_local_num_dict[class_to_idx[target]] = target_num
         sum_temp += target_num
+        i_target += 1
 
     assert len(images) == sum_temp
     return images, data_local_num_dict, net_dataidx_map
@@ -126,7 +132,83 @@ def __getdatasets__(self):
         if len(all_data) == 0:
             raise (RuntimeError("Found 0 files in subfolders of: " + self.data_dir + "\n"
                                                                                      "Supported extensions are: " + ",".join(
-                extensions)))
+                IMG_EXTENSIONS)))
+        return all_data, data_local_num_dict, net_dataidx_map
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        # img, target = self.data[index], self.target[index]
+
+        path, target = self.local_data[index]
+        img = self.loader(path)
+        if self.transform is not None:
+            img = self.transform(img)
+
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return img, target
+
+    def __len__(self):
+        return len(self.local_data)
+
+
+class ImageNet100(data.Dataset):
+
+    def __init__(self, data_dir, dataidxs=None, train=True, transform=None, target_transform=None, download=False):
+        """
+            Generating this class too many times will be time-consuming.
+            So it will be better calling this once and put it into ImageNet_truncated.
+        """
+        self.dataidxs = dataidxs
+        self.train = train
+        self.transform = transform
+        self.target_transform = target_transform
+        self.download = download
+        self.loader = default_loader
+        if self.train:
+            self.data_dir = os.path.join(data_dir, 'train')
+        else:
+            self.data_dir = os.path.join(data_dir, 'val')
+
+        self.all_data, self.data_local_num_dict, self.net_dataidx_map = self.__getdatasets__()
+        if dataidxs == None:
+            self.local_data = self.all_data
+        elif type(dataidxs) == int:
+            (begin, end) = self.net_dataidx_map[dataidxs]
+            self.local_data = self.all_data[begin: end]
+        else:
+            self.local_data = []
+            for idxs in dataidxs:
+                (begin, end) = self.net_dataidx_map[idxs]
+                self.local_data += self.all_data[begin: end]
+
+    def get_local_data(self):
+        return self.local_data
+
+    def get_net_dataidx_map(self):
+        return self.net_dataidx_map
+
+    def get_data_local_num_dict(self):
+        return self.data_local_num_dict
+
+    def __getdatasets__(self):
+        # all_data = datasets.ImageFolder(data_dir, self.transform, self.target_transform)
+
+        classes, class_to_idx = find_classes(self.data_dir)
+        IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
+        all_data, data_local_num_dict, net_dataidx_map = make_dataset(
+            self.data_dir, class_to_idx, IMG_EXTENSIONS, num_classes=100)
+        if len(all_data) == 0:
+            raise (RuntimeError("Found 0 files in subfolders of: " + self.data_dir + "\n"
+                                                                                     "Supported extensions are: " + ",".join(
+                IMG_EXTENSIONS)))
         return all_data, data_local_num_dict, net_dataidx_map
 
     def __getitem__(self, index):

diff --git a/data_preprocessing/ImageNet/datasets_hdf5.py b/data_preprocessing/ImageNet/datasets_hdf5.py
@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function
+
+import os
+import os.path
+
+import torch.utils.data as data
+import torchvision.transforms as transforms
+import h5py
+import numpy as np
+
+class DatasetHDF5(data.Dataset):
+    def __init__(self, hdf5fn, t, transform=None, target_transform=None):
+        """
+        t: 'train' or 'val'
+        """
+        super(DatasetHDF5, self).__init__()
+        self.hf = h5py.File(hdf5fn, 'r', libver='latest', swmr=True)
+        self.t = t
+        self.n_images= self.hf['%s_img'%self.t].shape[0]
+        self.dlabel = self.hf['%s_labels'%self.t][...]
+        self.d = self.hf['%s_img'%self.t]
+        # self.transform = transform
+        # self.target_transform = target_transform
+
+    def _get_dataset_x_and_target(self, index):
+        img = self.d[index, ...]
+        target = self.dlabel[index]
+        return img, np.int64(target)
+
+    def __getitem__(self, index):
+        img, target = self._get_dataset_x_and_target(index)
+        # if self.transform is not None:
+        #     img = self.transform(img)
+        # if self.target_transform is not None:
+        #     target = self.target_transform(target)
+        return img, target
+
+    def __len__(self):
+        return self.n_images
+
+
+class ImageNet_hdf5(data.Dataset):
+
+    def __init__(self, data_dir, dataidxs=None, train=True, transform=None, target_transform=None, download=False):
+        """
+            Generating this class too many times will be time-consuming.
+            So it will be better calling this once and put it into ImageNet_truncated.
+        """
+        self.dataidxs = dataidxs
+        self.train = train
+        self.transform = transform
+        self.target_transform = target_transform
+        self.download = download
+        self.hdf5fn = os.path.join(data_dir)
+
+        # if self.train:
+        #     self.data_dir = os.path.join(data_dir, 'train')
+        # else:
+        #     self.data_dir = os.path.join(data_dir, 'val')
+
+        self.all_data_hdf5 = DatasetHDF5(self.hdf5fn, 'train' if self.train else 'val', 
+            transform=self.transform, target_transform=self.target_transform)
+
+        self.data_local_num_dict, self.net_dataidx_map = \
+                                    self._get_net_dataidx_map()
+
+        """
+            self.local_data_idx is a list containing indexes of local client
+        """
+        self.all_data_idx = range(len(self.all_data_hdf5))
+        if dataidxs == None:
+            self.local_data_idx = self.all_data_idx
+        elif type(dataidxs) == int:
+            self.local_data_idx = self.net_dataidx_map[dataidxs]
+        else:
+            self.local_data_idx = []
+            for idxs in dataidxs:
+                self.local_data_idx += self.net_dataidx_map[idxs]
+
+
+    def _get_net_dataidx_map(self):
+        data_local_num_dict = dict()
+        net_dataidx_map = dict()
+        for i, label in enumerate(self.all_data_hdf5.dlabel):
+            label_int = np.int64(label)
+            if label in net_dataidx_map:
+                net_dataidx_map[label_int].append(i)
+            else:
+                net_dataidx_map[label_int] = []
+                net_dataidx_map[label_int].append(i)
+
+        for key, value in net_dataidx_map.items():
+            data_local_num_dict[key] = len(value)
+
+        return data_local_num_dict, net_dataidx_map
+
+
+    def get_net_dataidx_map(self):
+        return self.net_dataidx_map
+
+    def get_data_local_num_dict(self):
+        return self.data_local_num_dict
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+
+        img, target = self.all_data_hdf5[self.local_data_idx[index]]
+        img = transforms.ToPILImage()(img)
+        # img = self.loader(path)
+        if self.transform is not None:
+           img = self.transform(img)
+
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return img, target
+
+    def __len__(self):
+        return len(self.local_data_idx)
+
+
+
+class ImageNet_truncated_hdf5(data.Dataset):
+
+    def __init__(self, imagenet_dataset: ImageNet_hdf5, dataidxs, net_dataidx_map, train=True, transform=None,
+                 target_transform=None, download=False):
+
+        self.dataidxs = dataidxs
+        self.train = train
+        # self.transform = transform
+        # self.target_transform = target_transform
+        self.download = download
+
+        self.all_data_hdf5 = imagenet_dataset
+
+        self.data_local_num_dict = imagenet_dataset.data_local_num_dict
+
+        self.net_dataidx_map = imagenet_dataset.net_dataidx_map
+
+        """
+            self.local_data_idx is a list containing indexes of local client
+        """
+        self.all_data_idx = range(len(self.all_data_hdf5))
+        if dataidxs == None:
+            self.local_data_idx = self.all_data_idx
+        elif type(dataidxs) == int:
+            self.local_data_idx = self.net_dataidx_map[dataidxs]
+        else:
+            self.local_data_idx = []
+            for idxs in dataidxs:
+                self.local_data_idx += self.net_dataidx_map[idxs]
+
+
+    def get_net_dataidx_map(self):
+        return self.net_dataidx_map
+
+    def get_data_local_num_dict(self):
+        return self.data_local_num_dict
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+
+        # Transform operation has been conducted in all_data_hdf5
+        img, target = self.all_data_hdf5[self.local_data_idx[index]]
+        return img, target
+
+    def __len__(self):
+        return len(self.local_data_idx)
+
+
+
+
+
+
+
+
+
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,8 +4,8 @@ @@
     .vscode/*
     *.pyc
+    *.conf
     wandb
     *.zip
-    *.log
+674 −201		LICENSE
+1 −4		README.md
+6 −6		fedml_api/data_preprocessing/FederatedEMNIST/data_loader.py
+4 −4		fedml_api/data_preprocessing/fed_cifar100/data_loader.py
+4 −4		fedml_api/data_preprocessing/fed_shakespeare/data_loader.py
+1 −1		fedml_api/data_preprocessing/stackoverflow_lr/data_loader.py
+3 −3		fedml_api/distributed/fedavg/FedAVGTrainer.py
+6 −12		fedml_api/distributed/fedavg/FedAvgAPI.py
+1 −2		fedml_api/distributed/fedavg/FedAvgClientManager.py
+10 −16		fedml_api/distributed/fedavg/FedAvgServerManager.py
+0 −12		fedml_api/distributed/fedavg/utils.py
+4 −10		fedml_api/distributed/fedopt/FedOptAPI.py
+2 −2		fedml_api/distributed/fedopt/FedOptAggregator.py
+1 −2		fedml_api/distributed/fedopt/FedOptClientManager.py
+9 −17		fedml_api/distributed/fedopt/FedOptServerManager.py
+2 −2		fedml_api/distributed/fedopt/FedOptTrainer.py
+0 −11		fedml_api/distributed/fedopt/utils.py
+0 −77		fedml_api/distributed/fedprox/FedProxAPI.py
+0 −163		fedml_api/distributed/fedprox/FedProxAggregator.py
+0 −77		fedml_api/distributed/fedprox/FedProxClientManager.py
+0 −96		fedml_api/distributed/fedprox/FedProxServerManager.py
+0 −56		fedml_api/distributed/fedprox/FedProxTrainer.py
+0 −91		fedml_api/distributed/fedprox/MyModelTrainer.py
+0 −2		fedml_api/distributed/fedprox/README.md
+0 −0		fedml_api/distributed/fedprox/__init__.py
+0 −32		fedml_api/distributed/fedprox/message_define.py
+0 −26		fedml_api/distributed/fedprox/utils.py
+26 −30		fedml_api/distributed/fedseg/FedSegAggregator.py
+1 −1		fedml_api/distributed/fedseg/utils.py
+0 −14		fedml_api/distributed/utils/ip_config_utils.py
+0 −463		fedml_api/model/cv/batchnorm_utils.py
+0 −2		fedml_api/model/cv/cnn.py
+5 −4		fedml_api/standalone/fedavg/fedavg_api.py
+6 −6		fedml_api/standalone/fedavg/my_model_trainer_classification.py
+2 −11		fedml_core/distributed/client/client_manager.py
+0 −0		fedml_core/distributed/communication/gRPC/__init__.py
+0 −4		fedml_core/distributed/communication/gRPC/grpc_client_test.sh
+0 −106		fedml_core/distributed/communication/gRPC/grpc_comm_manager.py
+0 −160		fedml_core/distributed/communication/gRPC/grpc_comm_manager_pb2.py
+0 −99		fedml_core/distributed/communication/gRPC/grpc_comm_manager_pb2_grpc.py
+0 −40		fedml_core/distributed/communication/gRPC/grpc_server.py
+0 −0		fedml_core/distributed/communication/gRPC/proto/__init__.py
+0 −1		fedml_core/distributed/communication/gRPC/proto/generate_grpc.sh
+0 −17		fedml_core/distributed/communication/gRPC/proto/grpc_comm_manager.proto
+1 −1		fedml_core/distributed/communication/mpi/mpi_send_thread.py
+4 −4		fedml_core/distributed/communication/mqtt/mqtt_comm_manager.py
+1 −12		fedml_core/distributed/server/server_manager.py
+1 −8		fedml_experiments/distributed/fedall/main_fedavg.py
+2 −35		fedml_experiments/distributed/fedavg/README.md
+0 −11		fedml_experiments/distributed/fedavg/gpu_mapping.yaml
+0 −6		fedml_experiments/distributed/fedavg/grpc_ipconfig.csv
+10 −22		fedml_experiments/distributed/fedavg/main_fedavg.py
+0 −0		fedml_experiments/distributed/fedavg/run_fedavg_distributed_pytorch.sh
+0 −1		fedml_experiments/standalone/fedavg/main_fedavg.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		wget https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz