2626EasyBuild support for building and installing CRISPR-DAV, implemented as an easyblock.
2727
2828@author: Denis Kristak (INUITS)
29+ @author: Kenneth Hoste (HPC-UGent)
2930"""
31+ import glob
3032import os
31- from easybuild .tools .filetools import write_file
3233from easybuild .easyblocks .generic .binary import Binary
33- from easybuild .tools .modules import get_software_root
3434from easybuild .tools .build_log import EasyBuildError
35- from easybuild .tools .run import run_cmd
36-
37-
38- # this command is improvement (no hardcoded ways) of
39- # https://github.com/pinetree1/crispr-dav/blob/master/Examples/example1/run.sh
40- CUSTOM_SANITY_CHECK_COMMAND = r"""
41- {crisprdav_installdir}/crispr.pl --conf {crisprdav_installdir}/Examples/example1/conf.txt \
42- --region {crisprdav_installdir}/Examples/example1/amplicon.bed \
43- --crispr {crisprdav_installdir}/Examples/example1/site.bed \
44- --sitemap {crisprdav_installdir}/Examples/example1/sample.site \
45- --fastqmap {crisprdav_installdir}/Examples/example1/fastq.list \
46- --genome genomex 2>&1 | grep 'Generated HTML report for GENEX_CR1'
47- """
35+ from easybuild .tools .filetools import apply_regex_substitutions , back_up_file , write_file
36+ from easybuild .tools .modules import get_software_root
4837
4938
5039class EB_CRISPR_minus_DAV (Binary ):
5140 """
52- Support for building/installing crispr-dav .
41+ Support for building/installing CRISPR-DAV .
5342 """
54- extract_sources = True
5543
5644 def __init__ (self , * args , ** kwargs ):
45+ """Constructor for CRISPR-DAV easyblock."""
5746 super (EB_CRISPR_minus_DAV , self ).__init__ (* args , ** kwargs )
5847 self .cfg ['extract_sources' ] = True
5948
6049 def post_install_step (self ):
61- """Update config.txt files"""
50+ """Update configuration files with correct paths to dependencies and files in installation."""
51+
6252 # getting paths of deps + files we will work with
63- crisprdav_ex = os .path .join (self .installdir , 'Examples' , 'example1 ' )
53+ example_dir = os .path .join (self .installdir , 'Examples' )
6454 config_file = os .path .join (self .installdir , 'conf.txt' )
65- config_file_ex = os .path .join (crisprdav_ex , 'conf.txt' )
66- abra2_dir = get_software_root ('ABRA2' )
67- prinseq_dir = get_software_root ('PRINSEQ' )
68- flash_dir = get_software_root ('FLASH' )
69- dep_err_msg = "Failed to find root directory for {sw_name}. Is it included as dependency?"
70- if not abra2_dir :
71- raise EasyBuildError (dep_err_msg .format (sw_name = "ABRA2" ))
72- if not prinseq_dir :
73- raise EasyBuildError (dep_err_msg .format (sw_name = "PRINSEQ" ))
74- if not flash_dir :
75- raise EasyBuildError (dep_err_msg .format (sw_name = "FLASH" ))
55+
56+ dep_roots = {}
57+ for dep in ('ABRA2' , 'BEDTools' , 'FLASH' , 'Java' , 'PRINSEQ' , 'pysamstats' , 'R' , 'SAMtools' ):
58+ root = get_software_root (dep )
59+ if root :
60+ dep_roots [dep ] = root
61+ else :
62+ raise EasyBuildError ("Failed to find root directory for %s. Is it included as dependency?" , dep )
63+
7664 # we will be changing both Examples/... conf file (for sanity checks) as well as root (for proper functioning)
77- config_files_to_change = [config_file , config_file_ex ]
65+ cfg_files = [config_file ] + glob . glob ( os . path . join ( example_dir , 'example*' , 'conf.txt' ))
7866
7967 # create a backup of original Examples/example1/conf.txt file + the root conf.txt file
80- run_cmd ( "cp {config_file_ex} {config_file_ex}_EB_BACKUP" . format ( config_file_ex = config_file_ex ))
81- run_cmd ( "cp {config_file} {config_file}_EB_BACKUP" . format ( config_file = config_file ) )
68+ for filename in cfg_files :
69+ back_up_file ( filename )
8270
8371 # according to docs, we have to setup conf.txt so that it contains correct paths to dependencies
8472 # https://github.com/pinetree1/crispr-dav/blob/master/Install-and-Run.md
8573 # User then has to change conf.txt to include paths to genomes
8674 # changing both example conf.txt as well as the main one (in root directory) to make it easier for user.
8775
8876 # func to replace everything in both conf.txt files
89- self .modify_conf_files (crisprdav_ex , abra2_dir , prinseq_dir , flash_dir , config_files_to_change )
77+ self .modify_conf_files (dep_roots , cfg_files )
9078
9179 # generating fastq file with correct paths (used for sanity checks)
92- example_fastq_list_file = os .path .join (self .installdir , 'Examples' , 'example1' , 'fastq.list' )
93- fastq_list_file_text_formatted = ''
94- for x in range (1 , 5 ):
95- # we have to use \t or it wont work!
96- fastq_list_file_text = """sample{x}\t {crisprdav_ex}/rawfastq/sample{x}_R1.fastq.gz\t """
97- fastq_list_file_text += """{crisprdav_ex}/rawfastq/sample{x}_R2.fastq.gz"""
98- if x < 4 :
99- fastq_list_file_text += '\n ' # formatting is very important here - last line cant be \n
100- fastq_list_file_text_formatted += fastq_list_file_text .format (x = x , crisprdav_ex = crisprdav_ex )
101- write_file (example_fastq_list_file , fastq_list_file_text_formatted )
80+ for example in ('example1' , 'example2' ):
81+ example_dir = os .path .join (example_dir , example )
82+ fastq_list_file = os .path .join (example_dir , 'fastq.list' )
83+ fastq_list = []
84+ rawfastq_dir = os .path .join (example_dir , 'rawfastq' )
85+
86+ for x in range (1 , 5 ):
87+ # we have to use \t or it wont work!
88+ line = '\t ' .join ([
89+ 'sample%s' % x ,
90+ os .path .join (rawfastq_dir , 'sample%s_R1.fastq.gz' % x ),
91+ os .path .join (rawfastq_dir , 'sample%s_R2.fastq.gz' % x ),
92+ ])
93+ fastq_list .append (line )
94+
95+ # last line should not end with newline (\n)!
96+ write_file (fastq_list_file , '\n ' .join (fastq_list ))
10297
10398 def sanity_check_step (self ):
10499 """Custom sanity check paths for CRISPR-DAV"""
105- crisprdav_installdir = self .installdir
106100 custom_paths = {
107- 'files' : [],
101+ 'files' : ['crispr.pl' ],
108102 'dirs' : ['Modules' , 'Examples' , 'Rscripts' ],
109103 }
110104
111105 # example command from docs - https://github.com/pinetree1/crispr-dav/blob/master/Install-and-Run.md
112- custom_sanity_check_command_formatted = CUSTOM_SANITY_CHECK_COMMAND .format (
113- crisprdav_installdir = crisprdav_installdir )
114- custom_commands = [("crispr.pl --help 2>&1 | grep 'Usage: '" , '' ),
115- (custom_sanity_check_command_formatted , '' )]
106+ # this command is an improvement (no hardcoded stuff) of
107+ # https://github.com/pinetree1/crispr-dav/blob/master/Examples/example1/run.sh
108+ example_dir = os .path .join (self .installdir , 'Examples' , 'example1' )
109+ outfile = os .path .join (self .builddir , 'test.out' )
110+ example_cmd = ' ' .join ([
111+ os .path .join (self .installdir , 'crispr.pl' ),
112+ "--conf %s" % os .path .join (example_dir , 'conf.txt' ),
113+ "--region %s" % os .path .join (example_dir , 'amplicon.bed' ),
114+ "--crispr %s" % os .path .join (example_dir , 'site.bed' ),
115+ "--sitemap %s" % os .path .join (example_dir , 'sample.site' ),
116+ "--fastqmap %s" % os .path .join (example_dir , 'fastq.list' ),
117+ "--conf %s" % os .path .join (example_dir , 'conf.txt' ),
118+ "--genome genomex" ,
119+ "2>&1 | tee %s" % outfile ,
120+ " && grep 'Generated HTML report for GENEX_CR1' %s" % outfile ,
121+ ])
122+
123+ custom_commands = [
124+ "crispr.pl --help 2>&1 | grep 'Usage: '" ,
125+ example_cmd ,
126+ ]
116127
117128 super (EB_CRISPR_minus_DAV , self ).sanity_check_step (custom_paths = custom_paths , custom_commands = custom_commands )
118129
@@ -121,55 +132,37 @@ def make_module_extra(self):
121132 txt += self .module_generator .prepend_paths ('PATH' , ['' ])
122133 return txt
123134
124- # replacing hardcoded paths in conf.txt files
125- def modify_conf_files (self , crisprdav_ex , abra2_dir , prinseq_dir , flash_dir , config_files_to_change ):
126- # read input file
127- for curr_config_file in config_files_to_change :
128- fin = open (curr_config_file , "rt" )
129- # read file contents to string
130- data = fin .read ()
131- # creating pairs (tuples) with original and new string
132- ref_fasta_o = 'ref_fasta = genome/genomex.fa'
133- ref_fasta_r = 'ref_fasta = {crisprdav_ex}/genome/genomex.fa' .format (crisprdav_ex = crisprdav_ex )
134- bwa_idx_o = 'bwa_idx = genome/genomex.fa'
135- bwa_idx_r = 'bwa_idx = {crisprdav_ex}/genome/genomex.fa' .format (crisprdav_ex = crisprdav_ex )
136- refGene_o = 'refGene = genome/refgenex.txt'
137- refGene_r = 'refGene = {crisprdav_ex}/genome/refgenex.txt' .format (crisprdav_ex = crisprdav_ex )
138- abra_o = 'abra = /bfx/app/bin/abra-0.97-SNAPSHOT-jar-with-dependencies.jar'
139- abra_r = 'abra = {abra2_dir}/abra2-2.23.jar' .format (abra2_dir = abra2_dir )
140- prinseq_o = 'prinseq = /bfx/app/bin/prinseq-lite.pl'
141- prinseq_r = 'prinseq = {prinseq_dir}/prinseq-lite.pl' .format (prinseq_dir = prinseq_dir )
142- samtools_o = 'samtools = /bfx/app/bin/samtools'
143- samtools_r = ''
144- flash_o = 'flash = /bfx/app/bin/flash2'
145- flash_r = 'flash = {flash_dir}/bin/flash2' .format (flash_dir = flash_dir )
146- bedtools_o = 'bedtools = /bfx/app/bin/bedtools'
147- bedtools_r = ''
148- java_o = 'java = /usr/bin/java'
149- java_r = ''
150- pysamstats_o = 'pysamstats = /bfx/app/bin/pysamstats'
151- pysamstats_r = ''
152- rscript_o = 'rscript = /bfx/app/bin/Rscript'
153- rscript_r = ''
154- # saving in list of tuples for easy iteration
155- text_to_replace = [
156- (ref_fasta_o , ref_fasta_r ),
157- (bwa_idx_o , bwa_idx_r ),
158- (refGene_o , refGene_r ),
159- (abra_o , abra_r ),
160- (prinseq_o , prinseq_r ),
161- (samtools_o , samtools_r ),
162- (flash_o , flash_r ),
163- (bedtools_o , bedtools_r ),
164- (java_o , java_r ),
165- (pysamstats_o , pysamstats_r ),
166- (rscript_o , rscript_r )]
167-
168- # iterating through all tuples & replacing in string stored in memory
169- for searched_str , replace_str in text_to_replace :
170- data = data .replace (searched_str , replace_str )
171- fin .close ()
172- # override the original conf.txt file with modified conf.txt string stored in memory
173- fin = open (curr_config_file , "wt" )
174- fin .write (data )
175- fin .close ()
135+ def modify_conf_files (self , dep_roots , cfg_files ):
136+ """Replace hardcoded paths in config files."""
137+
138+ abra2_jar = os .path .join (dep_roots ['ABRA2' ], 'abra2-%s.jar' % os .getenv ('EBVERSIONABRA2' ))
139+ pysamstats_bin = os .path .join (dep_roots ['pysamstats' ], 'bin' , 'pysamstats' )
140+
141+ regex_subs = [
142+ (r'^abra\s*=.*/abra.*.jar' , 'abra = ' + abra2_jar ),
143+ (r'^bedtools\s*=.*/bin/bedtools' , 'bedtools = ' + os .path .join (dep_roots ['BEDTools' ], 'bin' , 'bedtools' )),
144+ (r'^flash\s*=.*/bin/flash2' , 'flash = ' + os .path .join (dep_roots ['FLASH' ], 'bin' , 'flash2' )),
145+ (r'^java\s*=.*/bin/java' , 'java = ' + os .path .join (dep_roots ['Java' ], 'bin' , 'java' )),
146+ (r'^prinseq\s*=.*/prinseq-lite.pl' , 'prinseq = ' + os .path .join (dep_roots ['PRINSEQ' ], 'prinseq-lite.pl' )),
147+ (r'^pysamstats\s*=.*/bin/pysamstats' , 'pysamstats = ' + pysamstats_bin ),
148+ (r'^rscript\s*=.*/bin/Rscript' , 'rscript = ' + os .path .join (dep_roots ['R' ], 'bin' , 'Rscript' )),
149+ (r'^samtools\s*=.*/bin/samtools' , 'samtools = ' + os .path .join (dep_roots ['SAMtools' ], 'bin' , 'samtools' )),
150+ ]
151+
152+ for cfg_file in cfg_files :
153+ dirname = os .path .dirname (cfg_file )
154+ if os .path .basename (dirname ).startswith ('example' ):
155+ example_dir = dirname
156+ else :
157+ example_dir = os .path .join (self .installdir , 'Examples' , 'example1' )
158+
159+ genome_dir = os .path .join (example_dir , 'genome' )
160+ genomex_fa = os .path .join (genome_dir , 'genomex.fa' )
161+
162+ regex_subs .extend ([
163+ (r'^ref_fasta\s*=.*genome/genomex.fa' , 'ref_fasta = ' + genomex_fa ),
164+ (r'^bwa_idx\s*=.*genome/genomex.fa' , 'bwa_idx = ' + genomex_fa ),
165+ (r'^refGene\s*=.*genome/refgenex.txt' , 'refGene = ' + os .path .join (genome_dir , 'refgenex.txt' )),
166+ ])
167+
168+ apply_regex_substitutions (cfg_file , regex_subs , on_missing_match = 'error' )
0 commit comments