CirroBio · sminot · Mar 6, 2025 · Mar 6, 2025 · Mar 7, 2025 · Mar 11, 2025
diff --git a/community/midas/1.3/README.md b/community/midas/1.3/README.md
@@ -0,0 +1 @@
+## MIDAS3: Metagenomic Intra-species Diversity Analysis System
diff --git a/community/midas/1.3/preprocess.py b/community/midas/1.3/preprocess.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+from cirro.helpers.preprocess_dataset import PreprocessDataset
+
+ds = PreprocessDataset.from_running()
+
+ds.logger.info("Files annotated in the dataset:")
+ds.logger.info(ds.files.to_csv(index=None))
+
+# Filter out any index files that may have been uploaded
+ds.files = ds.files.loc[
+    ds.files.apply(
+        lambda r: r.get('readType', 'R') == 'R',
+        axis=1
+    )
+]
+
+# Make a wide samplesheet with the columns
+# sample, fastq_1, fastq_1
+samplesheet = (
+    ds.files
+    .reindex(columns=["dataset", "sampleIndex", "sample", "lane", "read", "file"])
+    .pivot(
+        index=["dataset", "sampleIndex", "sample", "lane"],
+        columns="read",
+        values="file"
+    )
+    .rename(columns=lambda i: f"fastq_{int(i)}")
+    .reset_index()
+    .reindex(columns=["sample", "fastq_1", "fastq_2"])
+)
+
+ds.logger.info("Formatted samplesheet:")
+ds.logger.info(samplesheet.to_csv(index=None))
+assert samplesheet.shape[0] > 0, "No files detected -- there may be an error with data ingest"
+
+# Write out to a file
+samplesheet.to_csv("samplesheet.csv", index=None)
+
+# log
+ds.logger.info(ds.params)
diff --git a/community/midas/1.3/process-compute.config b/community/midas/1.3/process-compute.config
@@ -0,0 +1,6 @@
+process {
+    errorStrategy = 'terminate'
+    maxRetries = 3
+    cpus   = { 4 * task.attempt }
+    memory = { 128.GB * task.attempt }
+}
diff --git a/community/midas/1.3/process-dynamo.json b/community/midas/1.3/process-dynamo.json
@@ -0,0 +1,39 @@
+{
+  "id": "process-midas-1-3",
+  "parentProcessIds": [
+    "process-nf-core-fetchngs",
+    "paired_dnaseq",
+    "paired_rnaseq",
+    "process-kneaddata-1-0"
+  ],
+  "childProcessIds": [],
+  "dataType": "MIDAS3 Analysis Output",
+  "name": "MIDAS3: Metagenomic Intra-species Diversity Analysis System",
+  "desc": "Identify SNVs and gene copy number variants in microbial populations",
+  "executor": "NEXTFLOW",
+  "category": "Microbial Analysis",
+  "documentationUrl": "https://<DOCS_SITE>/pipelines/catalog-microbial-analysis#midas3-metagenomic-intra-species-diversity-analysis-system",
+  "code": {
+    "repository": "GITHUBPUBLIC",
+    "script": "main.nf",
+    "uri": "FredHutch/nf-midas",
+    "version": "main"
+  },
+  "allowMultipleSources": true,
+  "paramDefaults": [],
+  "computeDefaults": [
+    {
+      "executor": "NEXTFLOW",
+      "json": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-compute.config",
+      "name": "Default"
+    }
+  ],
+  "paramMapJson": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-input.json",
+  "formJson": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-form.json",
+  "preProcessScript": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/preprocess.py",
+  "fileJson": "",
+  "componentJson": "",
+  "infoJson": "",
+  "webOptimizationJson": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-output.json",
+  "fileMappingRules": []
+}
diff --git a/community/midas/1.3/process-form.json b/community/midas/1.3/process-form.json
@@ -0,0 +1,198 @@
+{
+  "form": {
+    "type": "object",
+    "name": "MIDAS3: Identify SNVs and Gene Copy Number Variants from Metagenomes",
+    "properties": {
+      "db": {
+        "type": "object",
+        "title": "Reference Genomes",
+        "properties": {
+          "db": {
+            "type": "string",
+            "title": "Pre-Built Database",
+            "default": "s3://pubweb-references/midas/2024-08-08-midas-uhgg-refdb.tar.gz",
+            "enum": ["s3://pubweb-references/midas/2024-08-08-midas-uhgg-refdb.tar.gz"],
+            "enumNames": ["Unified Human Gastrointestinal Genome (UHGG)"]
+          }
+        }
+      },
+      "analysis": {
+        "type": "object",
+        "title": "Analysis Options",
+        "properties": {
+          "analysis_mode": {
+            "type": "string",
+            "title": "Analysis Mode",
+            "default": "species",
+            "enum": [
+              "species",
+              "genes",
+              "snps"
+            ],
+            "enumNames": [
+              "Species",
+              "Gene Copy Number",
+              "SNPs / SNVs"
+            ]
+          }
+        },
+        "dependencies": {
+          "analysis_mode": {
+            "oneOf": [
+              {
+                "properties": {
+                  "analysis_mode": {"enum": ["species"]},
+                  "word_size": {
+                    "type": "integer",
+                    "title": "Word Size",
+                    "description": "Word size for BLAST search. Use word sizes > 16 for greatest efficiency.",
+                    "default": 28
+                  },
+                  "aln_cov": {
+                    "type": "number",
+                    "title": "Alignment Coverage",
+                    "description": "Discard reads with alignment coverage < ALN_COV. Values between 0-1 accepted.",
+                    "default": 0.75
+                  }
+                }
+              },
+              {
+                "properties": {
+                  "analysis_mode": {"enum": ["genes"]},
+                  "aln_speed": {
+                    "type": "string",
+                    "title": "Alignment speed/sensitivity",
+                    "description": "Option passed to bowtie2. ",
+                    "default": "very-sensitive",
+                    "enum": [
+                      "very-sensitive",
+                      "very-fast",
+                      "fast",
+                      "sensitive"
+                    ]
+                  },
+                  "aln_mode": {
+                    "type": "string",
+                    "title": "Alignment mode",
+                    "description": "Option passed to bowtie2. ",
+                    "default": "local",
+                    "enum": [
+                      "local",
+                      "global"
+                    ]
+                  },
+                  "aln_mapid": {
+                    "type": "number",
+                    "title": "Alignment Map ID",
+                    "description": "Discard reads with alignment map ID < ALN_MAPID. Values between 100-0 accepted.",
+                    "default": 94
+                  },
+                  "aln_mapq": {
+                    "type": "number",
+                    "title": "Alignment Map Quality",
+                    "description": "Discard reads with alignment map quality < ALN_MAPQ.",
+                    "default": 0
+                  },
+                  "aln_readq": {
+                    "type": "number",
+                    "title": "Alignment Read Quality",
+                    "description": "Discard reads with alignment read quality < ALN_READQ.",
+                    "default": 20
+                  },
+                  "aln_cov": {
+                    "type": "number",
+                    "title": "Alignment Coverage",
+                    "description": "Discard reads with alignment coverage < ALN_COV. Values between 0-1 accepted.",
+                    "default": 0.75
+                  },
+                  "total_depth": {
+                    "type": "number",
+                    "title": "Total Depth",
+                    "description": "Discard genes with total depth < TOTAL_DEPTH.",
+                    "default": 1
+                  }
+                }
+              },
+              {
+                "properties": {
+                  "analysis_mode": {"enum": ["snps"]},
+                  "aln_speed": {
+                    "type": "string",
+                    "title": "Alignment speed/sensitivity",
+                    "description": "Option passed to bowtie2. ",
+                    "default": "very-sensitive",
+                    "enum": [
+                      "very-sensitive",
+                      "very-fast",
+                      "fast",
+                      "sensitive"
+                    ]
+                  },
+                  "aln_mode": {
+                    "type": "string",
+                    "title": "Alignment mode",
+                    "description": "Option passed to bowtie2. ",
+                    "default": "local",
+                    "enum": [
+                      "local",
+                      "global"
+                    ]
+                  },
+                  "aln_mapid": {
+                    "type": "number",
+                    "title": "Alignment Map ID",
+                    "description": "Discard reads with alignment map ID < ALN_MAPID. Values between 100-0 accepted.",
+                    "default": 94
+                  },
+                  "aln_mapq": {
+                    "type": "number",
+                    "title": "Alignment Map Quality",
+                    "description": "Discard reads with alignment map quality < ALN_MAPQ.",
+                    "default": 0
+                  },
+                  "aln_readq": {
+                    "type": "number",
+                    "title": "Alignment Read Quality",
+                    "description": "Discard reads with alignment read quality < ALN_READQ.",
+                    "default": 20
+                  },
+                  "aln_cov": {
+                    "type": "number",
+                    "title": "Alignment Coverage",
+                    "description": "Discard reads with alignment coverage < ALN_COV. Values between 0-1 accepted.",
+                    "default": 0.75
+                  },
+                  "aln_baseq": {
+                    "type": "number",
+                    "title": "Alignment Base Quality",
+                    "description": "Discard reads with alignment base quality < ALN_BASEQ.",
+                    "default": 30
+                  },
+                  "aln_trim": {
+                    "type": "number",
+                    "title": "Alignment Trim",
+                    "description": "Trim ALN_TRIM base-pairs from 3'right end of read.",
+                    "default": 0
+                  },
+                  "site_depth": {
+                    "type": "number",
+                    "title": "Site Depth",
+                    "description": "Only perform pileup for genomic site covered by post-filtered reads.",
+                    "default": 2
+                  },
+                  "snp_maf": {
+                    "type": "number",
+                    "title": "SNP Minor Allele Frequency",
+                    "description": "Discard SNPs with minor allele frequency < SNP_MAF.",
+                    "default": 0.1
+                  }
+                }
+              }
+            ]
+          }
+        }
+      }
+    }
+  },
+  "ui": {}
+}
diff --git a/community/midas/1.3/process-input.json b/community/midas/1.3/process-input.json
@@ -0,0 +1,19 @@
+{
+    "samplesheet": "samplesheet.csv",
+    "db": "$.dataset.params.db.db",
+    "analysis_mode": "$.dataset.params.analysis.analysis_mode",
+    "word_size": "$.dataset.params.analysis.word_size",
+    "aln_cov": "$.dataset.params.analysis.aln_cov",
+    "aln_speed": "$.dataset.params.analysis.aln_speed",
+    "aln_mode": "$.dataset.params.analysis.aln_mode",
+    "aln_mapid": "$.dataset.params.analysis.aln_mapid",
+    "aln_mapq": "$.dataset.params.analysis.aln_mapq",
+    "aln_readq": "$.dataset.params.analysis.aln_readq",
+    "total_depth": "$.dataset.params.analysis.total_depth",
+    "fragment_length": "$.dataset.params.analysis.fragment_length",
+    "aln_baseq": "$.dataset.params.analysis.aln_baseq",
+    "aln_trim": "$.dataset.params.analysis.aln_trim",
+    "site_depth": "$.dataset.params.analysis.site_depth",
+    "snp_maf": "$.dataset.params.analysis.snp_maf",
+    "outdir": "$.dataset.dataPath"
+}
diff --git a/community/midas/1.3/process-output.json b/community/midas/1.3/process-output.json
@@ -0,0 +1,3 @@
+{
+  "commands": []
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		## MIDAS3: Metagenomic Intra-species Diversity Analysis System