Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions community/midas/1.3/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
## MIDAS3: Metagenomic Intra-species Diversity Analysis System
41 changes: 41 additions & 0 deletions community/midas/1.3/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3

from cirro.helpers.preprocess_dataset import PreprocessDataset

ds = PreprocessDataset.from_running()

ds.logger.info("Files annotated in the dataset:")
ds.logger.info(ds.files.to_csv(index=None))

# Filter out any index files that may have been uploaded
ds.files = ds.files.loc[
ds.files.apply(
lambda r: r.get('readType', 'R') == 'R',
axis=1
)
]

# Make a wide samplesheet with the columns
# sample, fastq_1, fastq_1
samplesheet = (
ds.files
.reindex(columns=["dataset", "sampleIndex", "sample", "lane", "read", "file"])
.pivot(
index=["dataset", "sampleIndex", "sample", "lane"],
columns="read",
values="file"
)
.rename(columns=lambda i: f"fastq_{int(i)}")
.reset_index()
.reindex(columns=["sample", "fastq_1", "fastq_2"])
)

ds.logger.info("Formatted samplesheet:")
ds.logger.info(samplesheet.to_csv(index=None))
assert samplesheet.shape[0] > 0, "No files detected -- there may be an error with data ingest"

# Write out to a file
samplesheet.to_csv("samplesheet.csv", index=None)

# log
ds.logger.info(ds.params)
6 changes: 6 additions & 0 deletions community/midas/1.3/process-compute.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {
errorStrategy = 'terminate'
maxRetries = 3
cpus = { 4 * task.attempt }
memory = { 128.GB * task.attempt }
}
39 changes: 39 additions & 0 deletions community/midas/1.3/process-dynamo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"id": "process-midas-1-3",
"parentProcessIds": [
"process-nf-core-fetchngs",
"paired_dnaseq",
"paired_rnaseq",
"process-kneaddata-1-0"
],
"childProcessIds": [],
"dataType": "MIDAS3 Analysis Output",
"name": "MIDAS3: Metagenomic Intra-species Diversity Analysis System",
"desc": "Identify SNVs and gene copy number variants in microbial populations",
"executor": "NEXTFLOW",
"category": "Microbial Analysis",
"documentationUrl": "https://<DOCS_SITE>/pipelines/catalog-microbial-analysis#midas3-metagenomic-intra-species-diversity-analysis-system",
"code": {
"repository": "GITHUBPUBLIC",
"script": "main.nf",
"uri": "FredHutch/nf-midas",
"version": "main"
},
"allowMultipleSources": true,
"paramDefaults": [],
"computeDefaults": [
{
"executor": "NEXTFLOW",
"json": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-compute.config",
"name": "Default"
}
],
"paramMapJson": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-input.json",
"formJson": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-form.json",
"preProcessScript": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/preprocess.py",
"fileJson": "",
"componentJson": "",
"infoJson": "",
"webOptimizationJson": "s3://<RESOURCES_BUCKET>/<PROCESS_DIRECTORY>/process-output.json",
"fileMappingRules": []
}
198 changes: 198 additions & 0 deletions community/midas/1.3/process-form.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
{
"form": {
"type": "object",
"name": "MIDAS3: Identify SNVs and Gene Copy Number Variants from Metagenomes",
"properties": {
"db": {
"type": "object",
"title": "Reference Genomes",
"properties": {
"db": {
"type": "string",
"title": "Pre-Built Database",
"default": "s3://pubweb-references/midas/2024-08-08-midas-uhgg-refdb.tar.gz",
"enum": ["s3://pubweb-references/midas/2024-08-08-midas-uhgg-refdb.tar.gz"],
"enumNames": ["Unified Human Gastrointestinal Genome (UHGG)"]
}
}
},
"analysis": {
"type": "object",
"title": "Analysis Options",
"properties": {
"analysis_mode": {
"type": "string",
"title": "Analysis Mode",
"default": "species",
"enum": [
"species",
"genes",
"snps"
],
"enumNames": [
"Species",
"Gene Copy Number",
"SNPs / SNVs"
]
}
},
"dependencies": {
"analysis_mode": {
"oneOf": [
{
"properties": {
"analysis_mode": {"enum": ["species"]},
"word_size": {
"type": "integer",
"title": "Word Size",
"description": "Word size for BLAST search. Use word sizes > 16 for greatest efficiency.",
"default": 28
},
"aln_cov": {
"type": "number",
"title": "Alignment Coverage",
"description": "Discard reads with alignment coverage < ALN_COV. Values between 0-1 accepted.",
"default": 0.75
}
}
},
{
"properties": {
"analysis_mode": {"enum": ["genes"]},
"aln_speed": {
"type": "string",
"title": "Alignment speed/sensitivity",
"description": "Option passed to bowtie2. ",
"default": "very-sensitive",
"enum": [
"very-sensitive",
"very-fast",
"fast",
"sensitive"
]
},
"aln_mode": {
"type": "string",
"title": "Alignment mode",
"description": "Option passed to bowtie2. ",
"default": "local",
"enum": [
"local",
"global"
]
},
"aln_mapid": {
"type": "number",
"title": "Alignment Map ID",
"description": "Discard reads with alignment map ID < ALN_MAPID. Values between 100-0 accepted.",
"default": 94
},
"aln_mapq": {
"type": "number",
"title": "Alignment Map Quality",
"description": "Discard reads with alignment map quality < ALN_MAPQ.",
"default": 0
},
"aln_readq": {
"type": "number",
"title": "Alignment Read Quality",
"description": "Discard reads with alignment read quality < ALN_READQ.",
"default": 20
},
"aln_cov": {
"type": "number",
"title": "Alignment Coverage",
"description": "Discard reads with alignment coverage < ALN_COV. Values between 0-1 accepted.",
"default": 0.75
},
"total_depth": {
"type": "number",
"title": "Total Depth",
"description": "Discard genes with total depth < TOTAL_DEPTH.",
"default": 1
}
}
},
{
"properties": {
"analysis_mode": {"enum": ["snps"]},
"aln_speed": {
"type": "string",
"title": "Alignment speed/sensitivity",
"description": "Option passed to bowtie2. ",
"default": "very-sensitive",
"enum": [
"very-sensitive",
"very-fast",
"fast",
"sensitive"
]
},
"aln_mode": {
"type": "string",
"title": "Alignment mode",
"description": "Option passed to bowtie2. ",
"default": "local",
"enum": [
"local",
"global"
]
},
"aln_mapid": {
"type": "number",
"title": "Alignment Map ID",
"description": "Discard reads with alignment map ID < ALN_MAPID. Values between 100-0 accepted.",
"default": 94
},
"aln_mapq": {
"type": "number",
"title": "Alignment Map Quality",
"description": "Discard reads with alignment map quality < ALN_MAPQ.",
"default": 0
},
"aln_readq": {
"type": "number",
"title": "Alignment Read Quality",
"description": "Discard reads with alignment read quality < ALN_READQ.",
"default": 20
},
"aln_cov": {
"type": "number",
"title": "Alignment Coverage",
"description": "Discard reads with alignment coverage < ALN_COV. Values between 0-1 accepted.",
"default": 0.75
},
"aln_baseq": {
"type": "number",
"title": "Alignment Base Quality",
"description": "Discard reads with alignment base quality < ALN_BASEQ.",
"default": 30
},
"aln_trim": {
"type": "number",
"title": "Alignment Trim",
"description": "Trim ALN_TRIM base-pairs from 3'right end of read.",
"default": 0
},
"site_depth": {
"type": "number",
"title": "Site Depth",
"description": "Only perform pileup for genomic site covered by post-filtered reads.",
"default": 2
},
"snp_maf": {
"type": "number",
"title": "SNP Minor Allele Frequency",
"description": "Discard SNPs with minor allele frequency < SNP_MAF.",
"default": 0.1
}
}
}
]
}
}
}
}
},
"ui": {}
}
19 changes: 19 additions & 0 deletions community/midas/1.3/process-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"samplesheet": "samplesheet.csv",
"db": "$.dataset.params.db.db",
"analysis_mode": "$.dataset.params.analysis.analysis_mode",
"word_size": "$.dataset.params.analysis.word_size",
"aln_cov": "$.dataset.params.analysis.aln_cov",
"aln_speed": "$.dataset.params.analysis.aln_speed",
"aln_mode": "$.dataset.params.analysis.aln_mode",
"aln_mapid": "$.dataset.params.analysis.aln_mapid",
"aln_mapq": "$.dataset.params.analysis.aln_mapq",
"aln_readq": "$.dataset.params.analysis.aln_readq",
"total_depth": "$.dataset.params.analysis.total_depth",
"fragment_length": "$.dataset.params.analysis.fragment_length",
"aln_baseq": "$.dataset.params.analysis.aln_baseq",
"aln_trim": "$.dataset.params.analysis.aln_trim",
"site_depth": "$.dataset.params.analysis.site_depth",
"snp_maf": "$.dataset.params.analysis.snp_maf",
"outdir": "$.dataset.dataPath"
}
3 changes: 3 additions & 0 deletions community/midas/1.3/process-output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"commands": []
}