Skip to content

Commit 982f0e9

Browse files
committed
[hack_ihel] in gg_tt.sa (MemoryAccessMatrixElements.h, MemoryBuffers.h, check_sa.cc), WIP towards fixing cuda builds
These changes essentially change the ME buffer from a simple array to an AOSOA (from 1 to ncomb+1 elements per event) However this now breaks C++ builds too - now this fails: "CUDA_HOME=none make -j"
1 parent cabcb0d commit 982f0e9

File tree

3 files changed

+53
-30
lines changed

3 files changed

+53
-30
lines changed

epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryAccessMatrixElements.h

Lines changed: 49 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,32 @@ namespace mg5amcCpu
2222
//----------------------------------------------------------------------------
2323

2424
// A class describing the internal layout of memory buffers for matrix elements
25-
// This implementation uses a plain ARRAY[nevt]
26-
// [If many implementations are used, a suffix _ARRAYv1 should be appended to the class name]
27-
class MemoryAccessMatrixElementsBase //_ARRAYv1
25+
// This implementation uses an AOSOA[npagME][ncomb+1][neppME] where nevt=npagME*neppME
26+
// [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name]
27+
class MemoryAccessMatrixElementsBase //_AOSOAv1
2828
{
29+
public:
30+
31+
// Number of Events Per Page in the matrix element AOSOA memory buffer layout
32+
#ifdef MGONGPUCPP_GPUIMPL
33+
static constexpr int neppME = 32/sizeof(fptype); // (DEFAULT) 32-byte GPU cache line (256 bits): 4 (DOUBLE) or 8 (FLOAT)
34+
#else
35+
#ifdef MGONGPU_CPPSIMD
36+
static constexpr int neppME = MGONGPU_CPPSIMD; // (DEFAULT) neppME=neppV for optimal performance
37+
#else
38+
static constexpr int neppME = 1; // (DEFAULT) neppM=neppV for optimal performance (NB: this is equivalent to AOS)
39+
#endif
40+
#endif
41+
2942
private:
3043

3144
friend class MemoryAccessHelper<MemoryAccessMatrixElementsBase>;
3245
friend class KernelAccessHelper<MemoryAccessMatrixElementsBase, true>;
3346
friend class KernelAccessHelper<MemoryAccessMatrixElementsBase, false>;
3447

48+
// The number of (good and bad) helicity combinations
49+
static constexpr int ncomb = CPPProcess::ncomb;
50+
3551
//--------------------------------------------------------------------------
3652
// NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )"
3753
// (in other words: first locate the event record for a given event, then locate an element in that record)
@@ -43,19 +59,24 @@ namespace mg5amcCpu
4359
ieventAccessRecord( fptype* buffer,
4460
const int ievt )
4561
{
46-
return &( buffer[ievt] ); // ARRAY[nevt]
62+
const int ipagME = ievt / neppME; // #event "ME-page"
63+
const int ieppME = ievt % neppME; // #event in the current event ME-page
64+
constexpr int icomb = 0;
65+
return &( buffer[ipagME * ( ncomb + 1 ) * neppME + icomb * neppME + ieppME] ); // AOSOA[ipagME][icomb][ieppME]
4766
}
4867

4968
//--------------------------------------------------------------------------
5069

5170
// Locate a field (output) of an event record (input) from the given field indexes (input)
5271
// [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===]
53-
// [NB: expand variadic template "Ts... args" to empty and rename "Field" as empty]
72+
// [NB: expand variadic template "Ts... args" to "const int icomb" and rename "Field" as "Icomb"]
5473
static __host__ __device__ inline fptype&
55-
decodeRecord( fptype* buffer )
74+
decodeRecord( fptype* buffer,
75+
const int icomb )
5676
{
57-
constexpr int ievt = 0;
58-
return buffer[ievt]; // ARRAY[nevt]
77+
constexpr int ipagME = 0;
78+
constexpr int ieppME = 0;
79+
return buffer[ipagME * ( ncomb + 1 ) * neppME + icomb * neppME + ieppME]; // AOSOA[ipagME][icomb][ieppME]
5980
}
6081
};
6182

@@ -76,23 +97,23 @@ namespace mg5amcCpu
7697
static constexpr auto ieventAccessRecordConst = MemoryAccessHelper<MemoryAccessMatrixElementsBase>::ieventAccessRecordConst;
7798

7899
// Locate a field (output) of an event record (input) from the given field indexes (input)
79-
// [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer ) <===]
80-
static constexpr auto decodeRecord = MemoryAccessHelper<MemoryAccessMatrixElementsBase>::decodeRecord;
100+
// [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int icomb ) <===]
101+
static constexpr auto decodeRecordIcomb = MemoryAccessHelper<MemoryAccessMatrixElementsBase>::decodeRecord;
81102

82103
// Locate a field (output) of an event record (input) from the given field indexes (input)
83-
// [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer ) <===]
84-
static constexpr auto decodeRecordConst =
85-
MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template decodeRecordConst<>;
104+
// [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int icomb ) <===]
105+
static constexpr auto decodeRecordIcombConst =
106+
MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template decodeRecordConst<int>;
86107

87108
// Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input)
88-
// [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt ) <===]
89-
static constexpr auto ieventAccess =
90-
MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessField<>;
109+
// [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt, const icomb ) <===]
110+
static constexpr auto ieventAccessIcomb =
111+
MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessField<int>;
91112

92113
// Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input)
93-
// [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt ) <===]
94-
static constexpr auto ieventAccessConst =
95-
MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessFieldConst<>;
114+
// [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt, const icomb ) <===]
115+
static constexpr auto ieventAccessIcombConst =
116+
MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessFieldConst<int>;
96117
};
97118

98119
//----------------------------------------------------------------------------
@@ -108,16 +129,16 @@ namespace mg5amcCpu
108129
static constexpr auto ieventAccessRecord = MemoryAccessMatrixElements::ieventAccessRecord;
109130

110131
// Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input)
111-
// [Signature (non-const, SCALAR) ===> fptype& kernelAccess_s( fptype* buffer ) <===]
112-
static constexpr auto kernelAccess_s =
113-
KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessField<>; // requires cuda 11.4
132+
// [Signature (non-const, SCALAR) ===> fptype& kernelAccess_s( fptype* buffer, const int icomb ) <===]
133+
static constexpr auto kernelAccessIcomb_s =
134+
KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessField<int>; // requires cuda 11.4
114135

115136
// Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal)
116-
// [Signature (non const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccess( const fptype* buffer ) <===]
137+
// [Signature (non const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccess( const fptype* buffer, const int icomb ) <===]
117138
static __host__ __device__ inline fptype_sv&
118-
kernelAccess( fptype* buffer )
139+
kernelAccessIcomb( fptype* buffer )
119140
{
120-
fptype& out = kernelAccess_s( buffer );
141+
fptype& out = kernelAccessIcomb_s( buffer );
121142
#ifndef MGONGPU_CPPSIMD
122143
return out;
123144
#else
@@ -129,9 +150,9 @@ namespace mg5amcCpu
129150
}
130151

131152
// Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input)
132-
// [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===]
133-
static constexpr auto kernelAccessConst =
134-
KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessFieldConst<>; // requires cuda 11.4
153+
// [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer, const int icomb ) <===]
154+
static constexpr auto kernelAccessIcombConst =
155+
KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessFieldConst<int>; // requires cuda 11.4
135156
};
136157

137158
//----------------------------------------------------------------------------

epochX/cudacpp/gg_tt.sa/SubProcesses/MemoryBuffers.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ namespace mg5amcCpu
3434
static constexpr size_t nparf = CPPProcess::nparf;
3535
static constexpr size_t npar = CPPProcess::npar;
3636
static constexpr size_t ndcoup = Parameters_sm_dependentCouplings::ndcoup;
37+
static constexpr size_t ncomb = CPPProcess::ncomb;
3738
}
3839

3940
//--------------------------------------------------------------------------
@@ -368,7 +369,7 @@ namespace mg5amcCpu
368369
typedef BufferBase<fptype> BufferMatrixElements;
369370

370371
// The size (number of elements) per event in a memory buffer for matrix elements
371-
constexpr size_t sizePerEventMatrixElements = 1;
372+
constexpr size_t sizePerEventMatrixElements = MemoryBuffers::ncomb + 1; // one per (good or bad) helicity, plus total
372373

373374
#ifndef MGONGPUCPP_GPUIMPL
374375
// A class encapsulating a C++ host buffer for matrix elements

epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,8 @@ main( int argc, char** argv )
745745
}
746746
std::cout << std::string( SEP79, '-' ) << std::endl;
747747
// Display matrix elements
748-
std::cout << " Matrix element = " << MemoryAccessMatrixElements::ieventAccessConst( hstMatrixElements.data(), ievt )
748+
std::cout << " Matrix element = "
749+
<< MemoryAccessMatrixElements::ieventAccessIcombConst( hstMatrixElements.data(), ievt, CPPProcess::ncomb )
749750
<< " GeV^" << meGeVexponent << std::endl;
750751
std::cout << std::string( SEP79, '-' ) << std::endl;
751752
}

0 commit comments

Comments
 (0)