Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ index 348c283be..74db44d84 100644
+CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)")
+###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV))
+###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))"))
+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//)
+ifeq ($(CUDACPP_BUILDDIR),)
+$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!)
+else
+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>/dev/null | awk '/Building/{print $$3}' | sed s/BUILDDIR=//)
+#ifeq ($(CUDACPP_BUILDDIR),)
+#$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!)
+#else
+$(info CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)')
+endif
+#endif
+CUDACPP_COMMONLIB=mg5amc_common
+CUDACPP_CXXLIB=mg5amc_$(processid_short)_cpp
+CUDACPP_CULIB=mg5amc_$(processid_short)_cuda
Expand All @@ -110,7 +110,7 @@ index 348c283be..74db44d84 100644

-$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX)
- $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp
+LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458
+#LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (flag not universal -> skip?)

-$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL)
- $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp
Expand Down
38 changes: 38 additions & 0 deletions epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

import madgraph.various.misc as misc
import madgraph.interface.extended_cmd as extended_cmd
import logging

logger = logging.getLogger('cmdprint') # for stdout

try:
import madgraph
except:
import internal.madevent_interface as madevent_interface
else:
import madgraph.interface.madevent_interface as madevent_interface

class CPPMEInterface(madevent_interface.MadEventCmdShell):

def compile(self, *args, **opts):
""" """
import multiprocessing
if not self.options['nb_core'] or self.options['nb_core'] == 'None':
self.options['nb_core'] = multiprocessing.cpu_count()

if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):
cudacpp_backend = self.run_card['cudacpp_backend'] # the default value is defined in banner.py
logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend)
if cudacpp_backend == 'FORTRAN':
args[0][0] = 'madevent_fortran_link'
elif cudacpp_backend == 'CPP':
args[0][0] = 'madevent_cpp_link'
elif cudacpp_backend == 'CUDA':
args[0][0] = 'madevent_cuda_link'
else:
raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported")
return misc.compile(nb_core=self.options['nb_core'], *args, **opts)
else:
return misc.compile(nb_core=self.options['nb_core'], *args, **opts)

MEINTERFACE = CPPMEInterface
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,17 @@ namespace mg5amcCpu
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
bool ok = true; // this is just an assumption!
const std::string tag = "arm neon (128bit as in SSE4.2)";
#else
#elif defined(__x86_64__) || defined(__i386__)
bool known = true;
bool ok = __builtin_cpu_supports( "sse4.2" );
const std::string tag = "nehalem (SSE4.2)";
#else
bool known = false; // __builtin_cpu_supports is not supported
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
// See https://stackoverflow.com/q/62783908
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
bool ok = true; // this is just an assumption!
const std::string tag = "arm neon (128bit as in SSE4.2)";
#endif
#else
bool known = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ UNAME_S := $(shell uname -s)
UNAME_P := $(shell uname -p)
###$(info UNAME_P='$(UNAME_P)')

include ../../Source/make_opts
#-------------------------------------------------------------------------------

#=== Configure common compiler flags for C++ and CUDA
Expand Down Expand Up @@ -220,7 +221,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),)
override OMPFLAGS = -fopenmp
###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578)
else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),)
override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578)
override OMPFLAGS = -fopenmp # disable OpenMP MT on Apple clang (builds fail in the CI #578)
else
override OMPFLAGS = -fopenmp
###override OMPFLAGS = # disable OpenMP MT (default before #575)
Expand Down Expand Up @@ -554,7 +555,7 @@ endif
$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o
$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o
$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib)
$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp

ifneq ($(NVCC),)
$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,7 @@ namespace mg5amcCpu
{
// nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396)
constexpr int nprocesses = %(nproc)i;
static_assert( nprocesses == 1, "Assume nprocesses == 1" );
// process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2)
static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" );
Copy link
Member

@valassi valassi Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably related to #872.

Note that nprocesses=2 was introduced back into cudacpp by PR #754.

I will propose to undo both these changes (from 754 and this 764) to fix #872 in PR #935

constexpr int process_id = %(proc_id)i; // code generation source: %(proc_id_source)s
static_assert( process_id == 1, "Assume process_id == 1" );
}
Expand Down
82 changes: 43 additions & 39 deletions epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,6 @@ def write_hardcoded_parameters(self, params):
for par in pars.split(','):
###print(len(pardef_lines), par) # for debugging
pardef_lines[par] = ( 'constexpr ' + type + ' ' + par )
misc.sprint( 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) )
###print( pardef_lines ) # for debugging
###for line in pardef_lines: misc.sprint(line) # for debugging
parset_pars = []
Expand Down Expand Up @@ -1036,14 +1035,14 @@ class PLUGIN_OneProcessExporter(PLUGIN_export_cpp.OneProcessExporterGPU):

# AV - overload export_cpp.OneProcessExporterGPU constructor (rename gCPPProcess to CPPProcess, set include_multi_channel)
def __init__(self, *args, **kwargs):
misc.sprint('Entering PLUGIN_OneProcessExporter.__init__')
# misc.sprint('Entering PLUGIN_OneProcessExporter.__init__')
for kwarg in kwargs: misc.sprint( 'kwargs[%s] = %s' %( kwarg, kwargs[kwarg] ) )
super().__init__(*args, **kwargs)
self.process_class = 'CPPProcess'
###if self.in_madevent_mode: proc_id = kwargs['prefix']+1 # madevent+cudacpp (NB: HERE SELF.IN_MADEVENT_MODE DOES NOT WORK!)
if 'prefix' in kwargs: proc_id = kwargs['prefix']+1 # madevent+cudacpp (ime+1 from ProcessExporterFortranMEGroup.generate_subprocess_directory)
else: proc_id = 0 # standalone_cudacpp
misc.sprint(proc_id)
# misc.sprint(proc_id)
self.proc_id = proc_id

# AV - overload export_cpp.OneProcessExporterGPU method (indent comments in process_lines)
Expand Down Expand Up @@ -1141,12 +1140,19 @@ def get_process_function_definitions(self, write=True):

# AV - modify export_cpp.OneProcessExporterGPU method (add debug printouts for multichannel #342)
def get_sigmaKin_lines(self, color_amplitudes, write=True):
misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines')
misc.sprint(self.include_multi_channel)
misc.sprint(self.support_multichannel)
# misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines')
# misc.sprint(self.include_multi_channel)
# misc.sprint(self.support_multichannel)
replace_dict = super().get_sigmaKin_lines(color_amplitudes, write=False)
replace_dict['proc_id'] = self.proc_id if self.proc_id>0 else 1
replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp' # FIXME? use self.in_madevent_mode instead?

# Extract denominator (avoid to extend size for mirroring)
den_factors = [str(me.get_denominator_factor()) for me in \
self.matrix_elements]
replace_dict['den_factors'] = ",".join(den_factors)
misc.sprint(replace_dict['den_factors'])

if write:
file = self.read_template_file(self.process_sigmaKin_function_template) % replace_dict
file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_sigmaKin_function.inc (copyright)
Expand All @@ -1160,14 +1166,14 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
ret_lines = []
if self.single_helicities:
###assert self.include_multi_channel # remove this assert: must handle both cases and produce two different code bases (#473)
misc.sprint(type(self.helas_call_writer))
misc.sprint(self.support_multichannel, self.include_multi_channel)
# misc.sprint(type(self.helas_call_writer))
# misc.sprint(self.support_multichannel, self.include_multi_channel)
multi_channel = None
if self.include_multi_channel:
if not self.support_multichannel:
raise Exception("link with madevent not supported")
multi_channel = self.get_multi_channel_dictionary(self.matrix_elements[0].get('diagrams'), self.include_multi_channel)
misc.sprint(multi_channel)
# misc.sprint(multi_channel)
###misc.sprint( 'before get_matrix_element_calls', self.matrix_elements[0].get_number_of_wavefunctions() ) # WRONG value of nwf, eg 7 for gg_tt
helas_calls = self.helas_call_writer.get_matrix_element_calls(\
self.matrix_elements[0],
Expand Down Expand Up @@ -1295,14 +1301,12 @@ def get_process_info_lines(self, matrix_element):
# AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts)
def generate_process_files(self):
"""Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links"""
misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files')
if self.include_multi_channel:
misc.sprint('self.include_multi_channel is already defined: this is madevent+second_exporter mode') # FIXME? use self.in_madevent_mode instead?
else:
misc.sprint('self.include_multi_channel is not yet defined: this is standalone_cudacpp mode') # see issue #473
if self.matrix_elements[0].get('has_mirror_process'):
self.matrix_elements[0].set('has_mirror_process', False)
self.nprocesses/=2
# misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files')
if not self.include_multi_channel:
#this condition is likely wrong and need to be removed
if self.matrix_elements[0].get('has_mirror_process'):
self.matrix_elements[0].set('has_mirror_process', False)
self.nprocesses/=2
super(PLUGIN_export_cpp.OneProcessExporterGPU, self).generate_process_files()
self.edit_CMakeLists()
self.edit_check_sa()
Expand Down Expand Up @@ -1333,15 +1337,15 @@ def generate_process_files(self):
###template_ref = 'dump_CPUTest.'+self.process_name+'.txt'
template_ref = self.template_path + '/../../../test/ref/' + 'dump_CPUTest.' + self.process_name + '.txt'
if os.path.exists( template_ref ):
misc.sprint( 'Copying test reference file: ', template_ref )
# misc.sprint( 'Copying test reference file: ', template_ref )
PLUGIN_export_cpp.cp( template_ref, self.path + '/../../test/ref' )
else:
misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref )
# else:
# misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref )

# SR - generate CMakeLists.txt file inside the P* directory
def edit_CMakeLists(self):
"""Generate CMakeLists.txt"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists')
template = open(pjoin(self.template_path,'CMake/SubProcesses/CMakeLists_P.txt'),'r').read()
ff = open(pjoin(self.path, 'CMakeLists.txt'),'w')
ff.write(template)
Expand All @@ -1350,7 +1354,7 @@ def edit_CMakeLists(self):
# AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts)
def edit_check_sa(self):
"""Generate check_sa.cc and fcheck_sa.f"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa')
ff = open(pjoin(self.path, 'check_sa.cc'),'w')
template = open(pjoin(self.template_path,'gpu','check_sa.cc'),'r').read()
ff.write(template) # nothing to replace in check_sa.cc
Expand All @@ -1367,7 +1371,7 @@ def edit_check_sa(self):
# AV - replace the export_cpp.OneProcessExporterGPU method (add debug printouts and multichannel handling #473)
def edit_mgonGPU(self):
"""Generate mgOnGpuConfig.h"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU')
template = open(pjoin(self.template_path,'gpu','mgOnGpuConfig.h'),'r').read()
replace_dict = {}
nexternal, nincoming = self.matrix_elements[0].get_nexternal_ninitial()
Expand All @@ -1387,7 +1391,7 @@ def edit_mgonGPU(self):
# AV - new method
def edit_processidfile(self):
"""Generate epoch_process_id.h"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile')
template = open(pjoin(self.template_path,'gpu','epoch_process_id.h'),'r').read()
replace_dict = {}
replace_dict['processid'] = self.get_process_name()
Expand All @@ -1399,7 +1403,7 @@ def edit_processidfile(self):
# AV - new method
def edit_coloramps(self):
"""Generate coloramps.h"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps')
template = open(pjoin(self.template_path,'gpu','coloramps.h'),'r').read()
ff = open(pjoin(self.path, 'coloramps.h'),'w')
# The following five lines from OneProcessExporterCPP.get_sigmaKin_lines (using OneProcessExporterCPP.get_icolamp_lines)
Expand All @@ -1417,7 +1421,7 @@ def edit_coloramps(self):
# AV - new method
def edit_testxxx(self):
"""Generate testxxx.cc"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx')
template = open(pjoin(self.template_path,'gpu','testxxx.cc'),'r').read()
replace_dict = {}
replace_dict['model_name'] = self.model_name
Expand All @@ -1428,7 +1432,7 @@ def edit_testxxx(self):
# AV - new method
def edit_memorybuffers(self):
"""Generate MemoryBuffers.h"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers')
template = open(pjoin(self.template_path,'gpu','MemoryBuffers.h'),'r').read()
replace_dict = {}
replace_dict['model_name'] = self.model_name
Expand All @@ -1439,7 +1443,7 @@ def edit_memorybuffers(self):
# AV - new method
def edit_memoryaccesscouplings(self):
"""Generate MemoryAccessCouplings.h"""
misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings')
# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings')
template = open(pjoin(self.template_path,'gpu','MemoryAccessCouplings.h'),'r').read()
replace_dict = {}
replace_dict['model_name'] = self.model_name
Expand All @@ -1451,7 +1455,7 @@ def edit_memoryaccesscouplings(self):
# [*NB export_cpp.UFOModelConverterGPU.write_process_h_file is not called!*]
def write_process_h_file(self, writer):
"""Generate final gCPPProcess.h"""
misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file')
# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file')
out = super().write_process_h_file(writer)
writer.seek(-1, os.SEEK_CUR)
writer.truncate()
Expand All @@ -1473,7 +1477,7 @@ def super_write_process_cc_file(self, writer):
# AV - overload the export_cpp.OneProcessExporterGPU method (add debug printout and truncate last \n)
def write_process_cc_file(self, writer):
"""Generate CPPProcess.cc"""
misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file')
# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file')
###out = super().write_process_cc_file(writer)
out = self.super_write_process_cc_file(writer)
writer.seek(-1, os.SEEK_CUR)
Expand Down Expand Up @@ -1652,7 +1656,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi
color[namp][njamp] = coeff
me = matrix_element.get('diagrams')
matrix_element.reuse_outdated_wavefunctions(me)
misc.sprint(multi_channel_map)
# misc.sprint(multi_channel_map)
res = []
###res.append('for(int i=0;i<%s;i++){jamp[i] = cxtype(0.,0.);}' % len(color_amplitudes))
res.append("""constexpr size_t nxcoup = ndcoup + nicoup; // both dependent and independent couplings
Expand Down Expand Up @@ -1707,7 +1711,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi
sum([diagrams[idiag].get('amplitudes') for \
idiag in multi_channel_map[config]], [])]
diag_to_config[amp[0]] = config
misc.sprint(diag_to_config)
# misc.sprint(diag_to_config)
id_amp = 0
for diagram in matrix_element.get('diagrams'):
###print('DIAGRAM %3d: #wavefunctions=%3d, #diagrams=%3d' %
Expand Down Expand Up @@ -1819,13 +1823,13 @@ def get_external_line(self, wf, argument):
wf.get('me_id')-1,
wf.get('number_external')-1)
elif argument.is_boson():
misc.sprint(call)
misc.sprint( (wf.get('mass'),
wf.get('number_external')-1,
# For boson, need initial/final here
(-1) ** (wf.get('state') == 'initial'),
wf.get('me_id')-1,
wf.get('number_external')-1))
# misc.sprint(call)
# misc.sprint( (wf.get('mass'),
# wf.get('number_external')-1,
# # For boson, need initial/final here
# (-1) ** (wf.get('state') == 'initial'),
# wf.get('me_id')-1,
# wf.get('number_external')-1))
return self.format_coupling(call % \
(wf.get('mass'),
wf.get('number_external')-1,
Expand Down
Loading