Skip to content

Commit 03f75a2

Browse files
committed
code cleanup + enhancements for custom easyblock for CRISPR-DAV
1 parent 0e7be32 commit 03f75a2

File tree

1 file changed

+95
-102
lines changed

1 file changed

+95
-102
lines changed

easybuild/easyblocks/c/crispr_dav.py

Lines changed: 95 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -26,93 +26,104 @@
2626
EasyBuild support for building and installing CRISPR-DAV, implemented as an easyblock.
2727
2828
@author: Denis Kristak (INUITS)
29+
@author: Kenneth Hoste (HPC-UGent)
2930
"""
31+
import glob
3032
import os
31-
from easybuild.tools.filetools import write_file
3233
from easybuild.easyblocks.generic.binary import Binary
33-
from easybuild.tools.modules import get_software_root
3434
from easybuild.tools.build_log import EasyBuildError
35-
from easybuild.tools.run import run_cmd
36-
37-
38-
# this command is improvement (no hardcoded ways) of
39-
# https://github.com/pinetree1/crispr-dav/blob/master/Examples/example1/run.sh
40-
CUSTOM_SANITY_CHECK_COMMAND = r"""
41-
{crisprdav_installdir}/crispr.pl --conf {crisprdav_installdir}/Examples/example1/conf.txt \
42-
--region {crisprdav_installdir}/Examples/example1/amplicon.bed \
43-
--crispr {crisprdav_installdir}/Examples/example1/site.bed \
44-
--sitemap {crisprdav_installdir}/Examples/example1/sample.site \
45-
--fastqmap {crisprdav_installdir}/Examples/example1/fastq.list \
46-
--genome genomex 2>&1 | grep 'Generated HTML report for GENEX_CR1'
47-
"""
35+
from easybuild.tools.filetools import apply_regex_substitutions, back_up_file, write_file
36+
from easybuild.tools.modules import get_software_root
4837

4938

5039
class EB_CRISPR_minus_DAV(Binary):
5140
"""
52-
Support for building/installing crispr-dav.
41+
Support for building/installing CRISPR-DAV.
5342
"""
54-
extract_sources = True
5543

5644
def __init__(self, *args, **kwargs):
45+
"""Constructor for CRISPR-DAV easyblock."""
5746
super(EB_CRISPR_minus_DAV, self).__init__(*args, **kwargs)
5847
self.cfg['extract_sources'] = True
5948

6049
def post_install_step(self):
61-
"""Update config.txt files"""
50+
"""Update configuration files with correct paths to dependencies and files in installation."""
51+
6252
# getting paths of deps + files we will work with
63-
crisprdav_ex = os.path.join(self.installdir, 'Examples', 'example1')
53+
example_dir = os.path.join(self.installdir, 'Examples')
6454
config_file = os.path.join(self.installdir, 'conf.txt')
65-
config_file_ex = os.path.join(crisprdav_ex, 'conf.txt')
66-
abra2_dir = get_software_root('ABRA2')
67-
prinseq_dir = get_software_root('PRINSEQ')
68-
flash_dir = get_software_root('FLASH')
69-
dep_err_msg = "Failed to find root directory for {sw_name}. Is it included as dependency?"
70-
if not abra2_dir:
71-
raise EasyBuildError(dep_err_msg.format(sw_name="ABRA2"))
72-
if not prinseq_dir:
73-
raise EasyBuildError(dep_err_msg.format(sw_name="PRINSEQ"))
74-
if not flash_dir:
75-
raise EasyBuildError(dep_err_msg.format(sw_name="FLASH"))
55+
56+
dep_roots = {}
57+
for dep in ('ABRA2', 'BEDTools', 'FLASH', 'Java', 'PRINSEQ', 'pysamstats', 'R', 'SAMtools'):
58+
root = get_software_root(dep)
59+
if root:
60+
dep_roots[dep] = root
61+
else:
62+
raise EasyBuildError("Failed to find root directory for %s. Is it included as dependency?", dep)
63+
7664
# we will be changing both Examples/... conf file (for sanity checks) as well as root (for proper functioning)
77-
config_files_to_change = [config_file, config_file_ex]
65+
cfg_files = [config_file] + glob.glob(os.path.join(example_dir, 'example*', 'conf.txt'))
7866

7967
# create a backup of original Examples/example1/conf.txt file + the root conf.txt file
80-
run_cmd("cp {config_file_ex} {config_file_ex}_EB_BACKUP".format(config_file_ex=config_file_ex))
81-
run_cmd("cp {config_file} {config_file}_EB_BACKUP".format(config_file=config_file))
68+
for filename in cfg_files:
69+
back_up_file(filename)
8270

8371
# according to docs, we have to setup conf.txt so that it contains correct paths to dependencies
8472
# https://github.com/pinetree1/crispr-dav/blob/master/Install-and-Run.md
8573
# User then has to change conf.txt to include paths to genomes
8674
# changing both example conf.txt as well as the main one (in root directory) to make it easier for user.
8775

8876
# func to replace everything in both conf.txt files
89-
self.modify_conf_files(crisprdav_ex, abra2_dir, prinseq_dir, flash_dir, config_files_to_change)
77+
self.modify_conf_files(dep_roots, cfg_files)
9078

9179
# generating fastq file with correct paths (used for sanity checks)
92-
example_fastq_list_file = os.path.join(self.installdir, 'Examples', 'example1', 'fastq.list')
93-
fastq_list_file_text_formatted = ''
94-
for x in range(1, 5):
95-
# we have to use \t or it wont work!
96-
fastq_list_file_text = """sample{x}\t{crisprdav_ex}/rawfastq/sample{x}_R1.fastq.gz\t"""
97-
fastq_list_file_text += """{crisprdav_ex}/rawfastq/sample{x}_R2.fastq.gz"""
98-
if x < 4:
99-
fastq_list_file_text += '\n' # formatting is very important here - last line cant be \n
100-
fastq_list_file_text_formatted += fastq_list_file_text.format(x=x, crisprdav_ex=crisprdav_ex)
101-
write_file(example_fastq_list_file, fastq_list_file_text_formatted)
80+
for example in ('example1', 'example2'):
81+
example_dir = os.path.join(example_dir, example)
82+
fastq_list_file = os.path.join(example_dir, 'fastq.list')
83+
fastq_list = []
84+
rawfastq_dir = os.path.join(example_dir, 'rawfastq')
85+
86+
for x in range(1, 5):
87+
# we have to use \t or it wont work!
88+
line = '\t'.join([
89+
'sample%s' % x,
90+
os.path.join(rawfastq_dir, 'sample%s_R1.fastq.gz' % x),
91+
os.path.join(rawfastq_dir, 'sample%s_R2.fastq.gz' % x),
92+
])
93+
fastq_list.append(line)
94+
95+
# last line should not end with newline (\n)!
96+
write_file(fastq_list_file, '\n'.join(fastq_list))
10297

10398
def sanity_check_step(self):
10499
"""Custom sanity check paths for CRISPR-DAV"""
105-
crisprdav_installdir = self.installdir
106100
custom_paths = {
107-
'files': [],
101+
'files': ['crispr.pl'],
108102
'dirs': ['Modules', 'Examples', 'Rscripts'],
109103
}
110104

111105
# example command from docs - https://github.com/pinetree1/crispr-dav/blob/master/Install-and-Run.md
112-
custom_sanity_check_command_formatted = CUSTOM_SANITY_CHECK_COMMAND.format(
113-
crisprdav_installdir=crisprdav_installdir)
114-
custom_commands = [("crispr.pl --help 2>&1 | grep 'Usage: '", ''),
115-
(custom_sanity_check_command_formatted, '')]
106+
# this command is an improvement (no hardcoded stuff) of
107+
# https://github.com/pinetree1/crispr-dav/blob/master/Examples/example1/run.sh
108+
example_dir = os.path.join(self.installdir, 'Examples', 'example1')
109+
outfile = os.path.join(self.builddir, 'test.out')
110+
example_cmd = ' '.join([
111+
os.path.join(self.installdir, 'crispr.pl'),
112+
"--conf %s" % os.path.join(example_dir, 'conf.txt'),
113+
"--region %s" % os.path.join(example_dir, 'amplicon.bed'),
114+
"--crispr %s" % os.path.join(example_dir, 'site.bed'),
115+
"--sitemap %s" % os.path.join(example_dir, 'sample.site'),
116+
"--fastqmap %s" % os.path.join(example_dir, 'fastq.list'),
117+
"--conf %s" % os.path.join(example_dir, 'conf.txt'),
118+
"--genome genomex",
119+
"2>&1 | tee %s" % outfile,
120+
" && grep 'Generated HTML report for GENEX_CR1' %s" % outfile,
121+
])
122+
123+
custom_commands = [
124+
"crispr.pl --help 2>&1 | grep 'Usage: '",
125+
example_cmd,
126+
]
116127

117128
super(EB_CRISPR_minus_DAV, self).sanity_check_step(custom_paths=custom_paths, custom_commands=custom_commands)
118129

@@ -121,55 +132,37 @@ def make_module_extra(self):
121132
txt += self.module_generator.prepend_paths('PATH', [''])
122133
return txt
123134

124-
# replacing hardcoded paths in conf.txt files
125-
def modify_conf_files(self, crisprdav_ex, abra2_dir, prinseq_dir, flash_dir, config_files_to_change):
126-
# read input file
127-
for curr_config_file in config_files_to_change:
128-
fin = open(curr_config_file, "rt")
129-
# read file contents to string
130-
data = fin.read()
131-
# creating pairs (tuples) with original and new string
132-
ref_fasta_o = 'ref_fasta = genome/genomex.fa'
133-
ref_fasta_r = 'ref_fasta = {crisprdav_ex}/genome/genomex.fa'.format(crisprdav_ex=crisprdav_ex)
134-
bwa_idx_o = 'bwa_idx = genome/genomex.fa'
135-
bwa_idx_r = 'bwa_idx = {crisprdav_ex}/genome/genomex.fa'.format(crisprdav_ex=crisprdav_ex)
136-
refGene_o = 'refGene = genome/refgenex.txt'
137-
refGene_r = 'refGene = {crisprdav_ex}/genome/refgenex.txt'.format(crisprdav_ex=crisprdav_ex)
138-
abra_o = 'abra = /bfx/app/bin/abra-0.97-SNAPSHOT-jar-with-dependencies.jar'
139-
abra_r = 'abra = {abra2_dir}/abra2-2.23.jar'.format(abra2_dir=abra2_dir)
140-
prinseq_o = 'prinseq = /bfx/app/bin/prinseq-lite.pl'
141-
prinseq_r = 'prinseq = {prinseq_dir}/prinseq-lite.pl'.format(prinseq_dir=prinseq_dir)
142-
samtools_o = 'samtools = /bfx/app/bin/samtools'
143-
samtools_r = ''
144-
flash_o = 'flash = /bfx/app/bin/flash2'
145-
flash_r = 'flash = {flash_dir}/bin/flash2'.format(flash_dir=flash_dir)
146-
bedtools_o = 'bedtools = /bfx/app/bin/bedtools'
147-
bedtools_r = ''
148-
java_o = 'java = /usr/bin/java'
149-
java_r = ''
150-
pysamstats_o = 'pysamstats = /bfx/app/bin/pysamstats'
151-
pysamstats_r = ''
152-
rscript_o = 'rscript = /bfx/app/bin/Rscript'
153-
rscript_r = ''
154-
# saving in list of tuples for easy iteration
155-
text_to_replace = [
156-
(ref_fasta_o, ref_fasta_r),
157-
(bwa_idx_o, bwa_idx_r),
158-
(refGene_o, refGene_r),
159-
(abra_o, abra_r),
160-
(prinseq_o, prinseq_r),
161-
(samtools_o, samtools_r),
162-
(flash_o, flash_r),
163-
(bedtools_o, bedtools_r),
164-
(java_o, java_r),
165-
(pysamstats_o, pysamstats_r),
166-
(rscript_o, rscript_r)]
167-
168-
# iterating through all tuples & replacing in string stored in memory
169-
for searched_str, replace_str in text_to_replace:
170-
data = data.replace(searched_str, replace_str)
171-
fin.close()
172-
# override the original conf.txt file with modified conf.txt string stored in memory
173-
fin = open(curr_config_file, "wt")
174-
fin.write(data)
175-
fin.close()
135+
def modify_conf_files(self, dep_roots, cfg_files):
136+
"""Replace hardcoded paths in config files."""
137+
138+
abra2_jar = os.path.join(dep_roots['ABRA2'], 'abra2-%s.jar' % os.getenv('EBVERSIONABRA2'))
139+
pysamstats_bin = os.path.join(dep_roots['pysamstats'], 'bin', 'pysamstats')
140+
141+
regex_subs = [
142+
(r'^abra\s*=.*/abra.*.jar', 'abra = ' + abra2_jar),
143+
(r'^bedtools\s*=.*/bin/bedtools', 'bedtools = ' + os.path.join(dep_roots['BEDTools'], 'bin', 'bedtools')),
144+
(r'^flash\s*=.*/bin/flash2', 'flash = ' + os.path.join(dep_roots['FLASH'], 'bin', 'flash2')),
145+
(r'^java\s*=.*/bin/java', 'java = ' + os.path.join(dep_roots['Java'], 'bin', 'java')),
146+
(r'^prinseq\s*=.*/prinseq-lite.pl', 'prinseq = ' + os.path.join(dep_roots['PRINSEQ'], 'prinseq-lite.pl')),
147+
(r'^pysamstats\s*=.*/bin/pysamstats', 'pysamstats = ' + pysamstats_bin),
148+
(r'^rscript\s*=.*/bin/Rscript', 'rscript = ' + os.path.join(dep_roots['R'], 'bin', 'Rscript')),
149+
(r'^samtools\s*=.*/bin/samtools', 'samtools = ' + os.path.join(dep_roots['SAMtools'], 'bin', 'samtools')),
150+
]
151+
152+
for cfg_file in cfg_files:
153+
dirname = os.path.dirname(cfg_file)
154+
if os.path.basename(dirname).startswith('example'):
155+
example_dir = dirname
156+
else:
157+
example_dir = os.path.join(self.installdir, 'Examples', 'example1')
158+
159+
genome_dir = os.path.join(example_dir, 'genome')
160+
genomex_fa = os.path.join(genome_dir, 'genomex.fa')
161+
162+
regex_subs.extend([
163+
(r'^ref_fasta\s*=.*genome/genomex.fa', 'ref_fasta = ' + genomex_fa),
164+
(r'^bwa_idx\s*=.*genome/genomex.fa', 'bwa_idx = ' + genomex_fa),
165+
(r'^refGene\s*=.*genome/refgenex.txt', 'refGene = ' + os.path.join(genome_dir, 'refgenex.txt')),
166+
])
167+
168+
apply_regex_substitutions(cfg_file, regex_subs, on_missing_match='error')

0 commit comments

Comments
 (0)