From 08cce3c0f7935d62b3e305d25d7583f11219f2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Pineda?= Date: Thu, 1 Dec 2022 21:20:49 +0100 Subject: [PATCH] Add detection_linking_hela dataset --- deeptrack/datasets/__init__.py | 1 + .../detection_linking_hela/__init__.py | 3 + .../detection_linking_hela/checksums.tsv | 3 + .../detection_linking_hela.py | 121 ++++++++++++++++++ .../TODO-add_fake_data_in_this_directory.txt | 0 5 files changed, 128 insertions(+) create mode 100644 deeptrack/datasets/detection_linking_hela/__init__.py create mode 100644 deeptrack/datasets/detection_linking_hela/checksums.tsv create mode 100644 deeptrack/datasets/detection_linking_hela/detection_linking_hela.py create mode 100644 deeptrack/datasets/detection_linking_hela/dummy_data/TODO-add_fake_data_in_this_directory.txt diff --git a/deeptrack/datasets/__init__.py b/deeptrack/datasets/__init__.py index 264091f9e..c2f15216a 100644 --- a/deeptrack/datasets/__init__.py +++ b/deeptrack/datasets/__init__.py @@ -4,5 +4,6 @@ regression_holography_nanoparticles, segmentation_fluorescence_u2os, detection_holography_nanoparticles, + detection_linking_hela, dmdataset, ) \ No newline at end of file diff --git a/deeptrack/datasets/detection_linking_hela/__init__.py b/deeptrack/datasets/detection_linking_hela/__init__.py new file mode 100644 index 000000000..fda9a004a --- /dev/null +++ b/deeptrack/datasets/detection_linking_hela/__init__.py @@ -0,0 +1,3 @@ +"""detection_linking_hela dataset.""" + +from .detection_linking_hela import DetectionLinkingHela diff --git a/deeptrack/datasets/detection_linking_hela/checksums.tsv b/deeptrack/datasets/detection_linking_hela/checksums.tsv new file mode 100644 index 000000000..bd2ff74b8 --- /dev/null +++ b/deeptrack/datasets/detection_linking_hela/checksums.tsv @@ -0,0 +1,3 @@ +# TODO(detection_linking_hela): If your dataset downloads files, then the checksums +# will be automatically added here when running +# `tfds build --register_checksums`. diff --git a/deeptrack/datasets/detection_linking_hela/detection_linking_hela.py b/deeptrack/datasets/detection_linking_hela/detection_linking_hela.py new file mode 100644 index 000000000..4f3ff8896 --- /dev/null +++ b/deeptrack/datasets/detection_linking_hela/detection_linking_hela.py @@ -0,0 +1,121 @@ +import tensorflow as tf +import tensorflow_datasets as tfds + +import numpy as np +import pandas as pd + +_DESCRIPTION = """ +This dataset includes tracking data from DIC-C2DH-HELA (provided by the sixth edition of the Cell Tracking Challenge). +It consists of two dataframes: ``nodes`` and ``parenthood``. ``nodes`` contains information about the individual +cells, while "parenthood" includes information on the lineage of the cells. +""" + +_CITATION = """ +@article{pineda2022geometric, + title={Geometric deep learning reveals the spatiotemporal fingerprint of microscopic motion}, + author={Pineda, Jes{\'u}s and Midtvedt, Benjamin and Bachimanchi, Harshith and No{\'e}, Sergio and Midtvedt, Daniel and Volpe, Giovanni and Manzo, Carlo}, + journal={arXiv preprint arXiv:2202.06355}, + year={2022} +} +""" + + +class DetectionLinkingHela(tfds.core.GeneratorBasedBuilder): + """DatasetBuilder for detection_linking_Hela dataset.""" + + VERSION = tfds.core.Version("1.0.0") + RELEASE_NOTES = { + "1.0.0": "Initial release.", + } + + def _info(self) -> tfds.core.DatasetInfo: + """Returns the dataset metadata.""" + NODE_FEATURES = self.get_node_features() + return tfds.core.DatasetInfo( + builder=self, + description=_DESCRIPTION, + features=tfds.features.FeaturesDict( + { + "nodes": tfds.features.FeaturesDict( + { + **{ + key: tfds.features.Tensor( + shape=(None,), dtype=NODE_FEATURES[key] + ) + for key in NODE_FEATURES.keys() + }, + } + ), + "parenthood": tfds.features.FeaturesDict( + { + "child": tfds.features.Tensor( + shape=(None,), dtype=tf.int32 + ), + "parent": tfds.features.Tensor( + shape=(None,), dtype=tf.int32 + ), + } + ), + "images": tfds.features.Tensor( + shape=(84, 512, 512, 1), dtype=tf.float64 + ), + "masks": tfds.features.Tensor( + shape=(84, 512, 512, 1), dtype=tf.float64 + ), + } + ), + supervised_keys=None, + homepage="https://dataset-homepage/", + citation=_CITATION, + ) + + def _split_generators(self, dl_manager: tfds.download.DownloadManager): + """Returns SplitGenerators.""" + # Downloads the data and defines the splits + path = dl_manager.download_and_extract( + "https://drive.google.com/u/1/uc?id=1itHz4KmrUqDCKpGNyHUiHE4AFhwiJ5XR&export=download" + ) + + # Returns the Dict[split names, Iterator[Key, Example]] + return { + "train": self._generate_examples( + path / "detection_linking_hela", "train" + ), + "test": self._generate_examples( + path / "detection_linking_hela", "test" + ), + } + + def _generate_examples(self, path, split): + """Yields examples.""" + + # Load data + nodes, parenthood, images, masks = ( + pd.read_csv(path / split / "nodesdf.csv"), + pd.read_csv(path / split / "parenthood.csv"), + np.load(path / split / "images.npy"), + np.load(path / split / "masks.npy"), + ) + + yield "_", { + "nodes": {**nodes.to_dict("list")}, + "parenthood": {**parenthood.to_dict("list")}, + "images": images * 1.0, + "masks": masks * 1.0, + } + + def get_node_features(self): + return { + "frame": tf.int32, + "label": tf.int32, + "centroid-0": tf.float32, + "centroid-1": tf.float32, + "area": tf.float32, + "mean_intensity": tf.float32, + "perimeter": tf.float32, + "eccentricity": tf.float32, + "solidity": tf.float32, + "set": tf.float32, + "parent": tf.int32, + "solution": tf.float32, + } diff --git a/deeptrack/datasets/detection_linking_hela/dummy_data/TODO-add_fake_data_in_this_directory.txt b/deeptrack/datasets/detection_linking_hela/dummy_data/TODO-add_fake_data_in_this_directory.txt new file mode 100644 index 000000000..e69de29bb