Skip to content

Commit f91c156

Browse files
committed
[amd] regenerate all processes, including OPTFLAGS=-O2 for hipcc instead of -O3 (workaround for gq_ttq crash madgraph5#806)
1 parent 3c2792a commit f91c156

File tree

46 files changed

+188
-166
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+188
-166
lines changed

epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
5757
No model currently active, so we import the Standard Model
5858
INFO: load particles
5959
INFO: load vertices
60-
DEBUG: model prefixing takes 0.005692958831787109 
60+
DEBUG: model prefixing takes 0.0057468414306640625 
6161
INFO: Restrict model sm with file models/sm/restrict_default.dat .
6262
DEBUG: Simplifying conditional expressions 
6363
DEBUG: remove interactions: u s w+ at order: QED=1 
@@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes.
149149
INFO: Please specify coupling orders to bypass this step.
150150
INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1
151151
INFO: Process has 2 diagrams
152-
1 processes with 2 diagrams generated in 0.005 s
152+
1 processes with 2 diagrams generated in 0.004 s
153153
Total: 1 processes with 2 diagrams
154154
output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
155155
Load PLUGIN.CUDACPP_OUTPUT
@@ -182,19 +182,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum
182182
DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547] 
183183
DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548] 
184184
Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
185-
Wrote files for 8 helas calls in 0.072 s
185+
Wrote files for 8 helas calls in 0.071 s
186186
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
187187
ALOHA: aloha starts to compute helicity amplitudes
188188
ALOHA: aloha creates FFV1 routines
189189
ALOHA: aloha creates FFV2 routines
190190
ALOHA: aloha creates FFV4 routines
191-
ALOHA: aloha creates 3 routines in 0.205 s
191+
ALOHA: aloha creates 3 routines in 0.204 s
192192
ALOHA: aloha starts to compute helicity amplitudes
193193
ALOHA: aloha creates FFV1 routines
194194
ALOHA: aloha creates FFV2 routines
195195
ALOHA: aloha creates FFV4 routines
196196
ALOHA: aloha creates FFV2_4 routines
197-
ALOHA: aloha creates 7 routines in 0.260 s
197+
ALOHA: aloha creates 7 routines in 0.261 s
198198
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
199199
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
200200
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -234,10 +234,10 @@ Type "launch" to generate events from this process, or see
234234
Run "open index.html" to see more information about this process.
235235
quit
236236

237-
real 0m3.845s
238-
user 0m1.829s
239-
sys 0m0.251s
240-
Code generation completed in 4 seconds
237+
real 0m2.993s
238+
user 0m1.803s
239+
sys 0m0.263s
240+
Code generation completed in 3 seconds
241241
************************************************************
242242
* *
243243
* W E L C O M E to *

epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ else ifeq ($(BACKEND),hip)
236236
GPUSUFFIX = hip
237237

238238
# Optimization flags
239+
override OPTFLAGS = -O2 # work around "Memory access fault" in gq_ttq for HIP #806: disable hipcc -O3 optimizations
239240
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
240241

241242
# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)

epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
5757
No model currently active, so we import the Standard Model
5858
INFO: load particles
5959
INFO: load vertices
60-
DEBUG: model prefixing takes 0.005699634552001953 
60+
DEBUG: model prefixing takes 0.005699872970581055 
6161
INFO: Restrict model sm with file models/sm/restrict_default.dat .
6262
DEBUG: Simplifying conditional expressions 
6363
DEBUG: remove interactions: u s w+ at order: QED=1 
@@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes.
149149
INFO: Please specify coupling orders to bypass this step.
150150
INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1
151151
INFO: Process has 2 diagrams
152-
1 processes with 2 diagrams generated in 0.004 s
152+
1 processes with 2 diagrams generated in 0.005 s
153153
Total: 1 processes with 2 diagrams
154154
output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
155155
Load PLUGIN.CUDACPP_OUTPUT
@@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines
177177
ALOHA: aloha creates FFV2 routines
178178
ALOHA: aloha creates FFV4 routines
179179
ALOHA: aloha creates FFV2_4 routines
180-
ALOHA: aloha creates 4 routines in 0.276 s
180+
ALOHA: aloha creates 4 routines in 0.273 s
181181
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
182182
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
183183
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -196,7 +196,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
196196
INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/.
197197
quit
198198

199-
real 0m0.775s
200-
user 0m0.619s
201-
sys 0m0.043s
199+
real 0m0.661s
200+
user 0m0.607s
201+
sys 0m0.048s
202202
Code generation completed in 1 seconds

epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ else ifeq ($(BACKEND),hip)
236236
GPUSUFFIX = hip
237237

238238
# Optimization flags
239+
override OPTFLAGS = -O2 # work around "Memory access fault" in gq_ttq for HIP #806: disable hipcc -O3 optimizations
239240
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
240241

241242
# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)

epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ generate g g > t t~
5757
No model currently active, so we import the Standard Model
5858
INFO: load particles
5959
INFO: load vertices
60-
DEBUG: model prefixing takes 0.0057220458984375 
60+
DEBUG: model prefixing takes 0.005692720413208008 
6161
INFO: Restrict model sm with file models/sm/restrict_default.dat .
6262
DEBUG: Simplifying conditional expressions 
6363
DEBUG: remove interactions: u s w+ at order: QED=1 
@@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step.
150150
INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED
151151
INFO: Trying process: g g > t t~ WEIGHTED<=2 @1
152152
INFO: Process has 3 diagrams
153-
1 processes with 3 diagrams generated in 0.008 s
153+
1 processes with 3 diagrams generated in 0.009 s
154154
Total: 1 processes with 3 diagrams
155155
output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
156156
Load PLUGIN.CUDACPP_OUTPUT
@@ -183,16 +183,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
183183
DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547] 
184184
DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548] 
185185
Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
186-
Wrote files for 10 helas calls in 0.074 s
186+
Wrote files for 10 helas calls in 0.073 s
187187
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
188188
ALOHA: aloha starts to compute helicity amplitudes
189189
ALOHA: aloha creates VVV1 set of routines with options: P0
190190
ALOHA: aloha creates FFV1 routines
191-
ALOHA: aloha creates 2 routines in 0.149 s
191+
ALOHA: aloha creates 2 routines in 0.147 s
192192
ALOHA: aloha starts to compute helicity amplitudes
193193
ALOHA: aloha creates VVV1 set of routines with options: P0
194194
ALOHA: aloha creates FFV1 routines
195-
ALOHA: aloha creates 4 routines in 0.137 s
195+
ALOHA: aloha creates 4 routines in 0.135 s
196196
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
197197
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
198198
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -228,10 +228,10 @@ Type "launch" to generate events from this process, or see
228228
Run "open index.html" to see more information about this process.
229229
quit
230230

231-
real 0m2.049s
232-
user 0m1.643s
233-
sys 0m0.271s
234-
Code generation completed in 2 seconds
231+
real 0m3.852s
232+
user 0m1.633s
233+
sys 0m0.277s
234+
Code generation completed in 4 seconds
235235
************************************************************
236236
* *
237237
* W E L C O M E to *

epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ else ifeq ($(BACKEND),hip)
236236
GPUSUFFIX = hip
237237

238238
# Optimization flags
239+
override OPTFLAGS = -O2 # work around "Memory access fault" in gq_ttq for HIP #806: disable hipcc -O3 optimizations
239240
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
240241

241242
# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)

epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ generate g g > t t~
5757
No model currently active, so we import the Standard Model
5858
INFO: load particles
5959
INFO: load vertices
60-
DEBUG: model prefixing takes 0.005597114562988281 
60+
DEBUG: model prefixing takes 0.005693674087524414 
6161
INFO: Restrict model sm with file models/sm/restrict_default.dat .
6262
DEBUG: Simplifying conditional expressions 
6363
DEBUG: remove interactions: u s w+ at order: QED=1 
@@ -176,7 +176,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
176176
ALOHA: aloha starts to compute helicity amplitudes
177177
ALOHA: aloha creates VVV1 set of routines with options: P0
178178
ALOHA: aloha creates FFV1 routines
179-
ALOHA: aloha creates 2 routines in 0.146 s
179+
ALOHA: aloha creates 2 routines in 0.147 s
180180
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
181181
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
182182
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -191,7 +191,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
191191
INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/.
192192
quit
193193

194-
real 0m0.897s
195-
user 0m0.476s
196-
sys 0m0.056s
197-
Code generation completed in 1 seconds
194+
real 0m1.981s
195+
user 0m0.486s
196+
sys 0m0.050s
197+
Code generation completed in 2 seconds

epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ else ifeq ($(BACKEND),hip)
236236
GPUSUFFIX = hip
237237

238238
# Optimization flags
239+
override OPTFLAGS = -O2 # work around "Memory access fault" in gq_ttq for HIP #806: disable hipcc -O3 optimizations
239240
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
240241

241242
# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)

epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ generate g g > t t~
5757
No model currently active, so we import the Standard Model
5858
INFO: load particles
5959
INFO: load vertices
60-
DEBUG: model prefixing takes 0.005770444869995117 
60+
DEBUG: model prefixing takes 0.005487680435180664 
6161
INFO: Restrict model sm with file models/sm/restrict_default.dat .
6262
DEBUG: Simplifying conditional expressions 
6363
DEBUG: remove interactions: u s w+ at order: QED=1 
@@ -203,23 +203,23 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
203203
DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1523] 
204204
DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547] 
205205
DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548] 
206-
Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
207-
Wrote files for 46 helas calls in 0.194 s
206+
Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s
207+
Wrote files for 46 helas calls in 0.190 s
208208
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
209209
ALOHA: aloha starts to compute helicity amplitudes
210210
ALOHA: aloha creates VVV1 routines
211211
ALOHA: aloha creates FFV1 routines
212212
ALOHA: aloha creates VVVV1 set of routines with options: P0
213213
ALOHA: aloha creates VVVV3 set of routines with options: P0
214214
ALOHA: aloha creates VVVV4 set of routines with options: P0
215-
ALOHA: aloha creates 5 routines in 0.332 s
215+
ALOHA: aloha creates 5 routines in 0.334 s
216216
ALOHA: aloha starts to compute helicity amplitudes
217217
ALOHA: aloha creates VVV1 routines
218218
ALOHA: aloha creates FFV1 routines
219219
ALOHA: aloha creates VVVV1 set of routines with options: P0
220220
ALOHA: aloha creates VVVV3 set of routines with options: P0
221221
ALOHA: aloha creates VVVV4 set of routines with options: P0
222-
ALOHA: aloha creates 10 routines in 0.319 s
222+
ALOHA: aloha creates 10 routines in 0.316 s
223223
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
224224
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
225225
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -267,10 +267,10 @@ Type "launch" to generate events from this process, or see
267267
Run "open index.html" to see more information about this process.
268268
quit
269269

270-
real 0m2.887s
271-
user 0m2.344s
272-
sys 0m0.296s
273-
Code generation completed in 3 seconds
270+
real 0m2.703s
271+
user 0m2.310s
272+
sys 0m0.313s
273+
Code generation completed in 2 seconds
274274
************************************************************
275275
* *
276276
* W E L C O M E to *

epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ else ifeq ($(BACKEND),hip)
236236
GPUSUFFIX = hip
237237

238238
# Optimization flags
239+
override OPTFLAGS = -O2 # work around "Memory access fault" in gq_ttq for HIP #806: disable hipcc -O3 optimizations
239240
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))
240241

241242
# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)

0 commit comments

Comments
 (0)