@@ -1080,7 +1080,7 @@ def get_process_function_definitions(self, write=True):
10801080 % (len (coupling_indep ), ' ), cxmake( m_pars->' .join (coupling_indep )) # AV only indep!
10811081 replace_dict ['cipcdevice' ] = '__device__ __constant__ fptype cIPC[%i];' % (2 * len (coupling_indep ))
10821082 replace_dict ['cipcstatic' ] = 'static fptype cIPC[%i];' % (2 * len (coupling_indep ))
1083- replace_dict ['cipc2tipcSym' ] = 'checkCuda( cudaMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) ) );' % len (coupling_indep )
1083+ replace_dict ['cipc2tipcSym' ] = 'gpuMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) );' % len (coupling_indep )
10841084 replace_dict ['cipc2tipc' ] = 'memcpy( cIPC, tIPC, %i * sizeof( cxtype ) );' % len (coupling_indep )
10851085 replace_dict ['cipcdump' ] = '\n //for ( i=0; i<%i; i++ ) std::cout << std::setprecision(17) << "tIPC[i] = " << tIPC[i] << std::endl;' % len (coupling_indep )
10861086 coup_str_hrd = '__device__ const fptype cIPC[%s] = { ' % (len (coupling_indep )* 2 )
@@ -1091,7 +1091,7 @@ def get_process_function_definitions(self, write=True):
10911091 replace_dict ['cipcassign' ] = '//const cxtype tIPC[0] = { ... }; // nicoup=0'
10921092 replace_dict ['cipcdevice' ] = '__device__ __constant__ fptype* cIPC = nullptr; // unused as nicoup=0'
10931093 replace_dict ['cipcstatic' ] = 'static fptype* cIPC = nullptr; // unused as nicoup=0'
1094- replace_dict ['cipc2tipcSym' ] = '//checkCuda( cudaMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) ) ); // nicoup=0' % len (coupling_indep )
1094+ replace_dict ['cipc2tipcSym' ] = '//gpuMemcpyToSymbol( cIPC, tIPC, %i * sizeof( cxtype ) ); // nicoup=0' % len (coupling_indep )
10951095 replace_dict ['cipc2tipc' ] = '//memcpy( cIPC, tIPC, %i * sizeof( cxtype ) ); // nicoup=0' % len (coupling_indep )
10961096 replace_dict ['cipcdump' ] = ''
10971097 replace_dict ['cipchrdcod' ] = '__device__ const fptype* cIPC = nullptr; // unused as nicoup=0'
@@ -1100,7 +1100,7 @@ def get_process_function_definitions(self, write=True):
11001100 % (len (params ), ', (fptype)m_pars->' .join (params ))
11011101 replace_dict ['cipddevice' ] = '__device__ __constant__ fptype cIPD[%i];' % (len (params ))
11021102 replace_dict ['cipdstatic' ] = 'static fptype cIPD[%i];' % (len (params ))
1103- replace_dict ['cipd2tipdSym' ] = 'checkCuda( cudaMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) ) );' % len (params )
1103+ replace_dict ['cipd2tipdSym' ] = 'gpuMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) );' % len (params )
11041104 replace_dict ['cipd2tipd' ] = 'memcpy( cIPD, tIPD, %i * sizeof( fptype ) );' % len (params )
11051105 replace_dict ['cipddump' ] = '\n //for ( i=0; i<%i; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl;' % len (params )
11061106 param_str_hrd = '__device__ const fptype cIPD[%s] = { ' % len (params )
@@ -1111,7 +1111,7 @@ def get_process_function_definitions(self, write=True):
11111111 replace_dict ['cipdassign' ] = '//const fptype tIPD[0] = { ... }; // nparam=0'
11121112 replace_dict ['cipddevice' ] = '//__device__ __constant__ fptype* cIPD = nullptr; // unused as nparam=0'
11131113 replace_dict ['cipdstatic' ] = '//static fptype* cIPD = nullptr; // unused as nparam=0'
1114- replace_dict ['cipd2tipdSym' ] = '//checkCuda( cudaMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) ) ); // nparam=0' % len (params )
1114+ replace_dict ['cipd2tipdSym' ] = '//gpuMemcpyToSymbol( cIPD, tIPD, %i * sizeof( fptype ) ); // nparam=0' % len (params )
11151115 replace_dict ['cipd2tipd' ] = '//memcpy( cIPD, tIPD, %i * sizeof( fptype ) ); // nparam=0' % len (params )
11161116 replace_dict ['cipddump' ] = ''
11171117 replace_dict ['cipdhrdcod' ] = '//__device__ const fptype* cIPD = nullptr; // unused as nparam=0'
@@ -1183,13 +1183,13 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
11831183 fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities
11841184#endif
11851185 fptype_sv* jamp2_sv // output: jamp2[nParity][ncolor][neppV] for color choice (nullptr if disabled)
1186- #ifndef __CUDACC__
1186+ #ifndef MGONGPUCPP_GPUIMPL
11871187 , const int ievt00 // input: first event number in current C++ event page (for CUDA, ievt depends on threadid)
11881188#endif
11891189 )
11901190 //ALWAYS_INLINE // attributes are not permitted in a function definition
11911191 {
1192- #ifdef __CUDACC__
1192+ #ifdef MGONGPUCPP_GPUIMPL
11931193 using namespace mg5amcGpu;
11941194 using M_ACCESS = DeviceAccessMomenta; // non-trivial access: buffer includes all events
11951195 using E_ACCESS = DeviceAccessMatrixElements; // non-trivial access: buffer includes all events
@@ -1216,7 +1216,7 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
12161216#endif /* clang-format on */
12171217 mgDebug( 0, __FUNCTION__ );
12181218 //printf( \" calculate_wavefunctions: ihel=%2d\\ n\" , ihel );
1219- #ifndef __CUDACC__
1219+ #ifndef MGONGPUCPP_GPUIMPL
12201220 //printf( \" calculate_wavefunctions: ievt00=%d\\ n\" , ievt00 );
12211221#endif""" )
12221222 nwavefuncs = self .matrix_elements [0 ].get_number_of_wavefunctions ()
@@ -1253,7 +1253,7 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
12531253#endif
12541254 for( int iParity = 0; iParity < nParity; ++iParity )
12551255 { // START LOOP ON IPARITY
1256- #ifndef __CUDACC__
1256+ #ifndef MGONGPUCPP_GPUIMPL
12571257 const int ievt0 = ievt00 + iParity * neppV;
12581258#endif""" )
12591259 ret_lines += helas_calls
@@ -1653,8 +1653,10 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi
16531653 allCOUPs[idcoup] = CD_ACCESS::idcoupAccessBufferConst( allcouplings, idcoup ); // dependent couplings, vary event-by-event
16541654 for( size_t iicoup = 0; iicoup < nicoup; iicoup++ )
16551655 allCOUPs[ndcoup + iicoup] = CI_ACCESS::iicoupAccessBufferConst( cIPC, iicoup ); // independent couplings, fixed for all events
1656+ #ifdef MGONGPUCPP_GPUIMPL
16561657#ifdef __CUDACC__
16571658#pragma nv_diagnostic pop
1659+ #endif
16581660 // CUDA kernels take input/output buffers with momenta/MEs for all events
16591661 const fptype* momenta = allmomenta;
16601662 const fptype* COUPs[nxcoup];
@@ -1770,7 +1772,7 @@ def get_external(self, wf, argument):
17701772 split_line2 = [ str .lstrip (' ' ).rstrip (' ' ) for str in split_line2 ] # AV
17711773 split_line2 .insert (2 , '0' ) # add parameter fmass=0
17721774 line2 = ', ' .join (split_line2 )
1773- text = '#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE )\n %s\n #else\n if( ( blockDim.x * blockIdx.x + threadIdx.x ) %% 2 == 0 )\n %s\n else\n %s\n #endif\n ' # AV
1775+ text = '#if not( defined MGONGPUCPP_GPUIMPL and defined MGONGPU_TEST_DIVERGENCE )\n %s\n #else\n if( ( blockDim.x * blockIdx.x + threadIdx.x ) %% 2 == 0 )\n %s\n else\n %s\n #endif\n ' # AV
17741776 return text % (line , line , line2 )
17751777 text = '%s\n ' # AV
17761778 return text % line
0 commit comments