@@ -22,16 +22,32 @@ namespace mg5amcCpu
2222 // ----------------------------------------------------------------------------
2323
2424 // A class describing the internal layout of memory buffers for matrix elements
25- // This implementation uses a plain ARRAY[nevt]
26- // [If many implementations are used, a suffix _ARRAYv1 should be appended to the class name]
27- class MemoryAccessMatrixElementsBase // _ARRAYv1
25+ // This implementation uses an AOSOA[npagME][ncomb+1][neppME] where nevt=npagME*neppME
26+ // [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name]
27+ class MemoryAccessMatrixElementsBase // _AOSOAv1
2828 {
29+ public:
30+
31+ // Number of Events Per Page in the matrix element AOSOA memory buffer layout
32+ #ifdef MGONGPUCPP_GPUIMPL
33+ static constexpr int neppME = 32 /sizeof (fptype); // (DEFAULT) 32-byte GPU cache line (256 bits): 4 (DOUBLE) or 8 (FLOAT)
34+ #else
35+ #ifdef MGONGPU_CPPSIMD
36+ static constexpr int neppME = MGONGPU_CPPSIMD; // (DEFAULT) neppME=neppV for optimal performance
37+ #else
38+ static constexpr int neppME = 1 ; // (DEFAULT) neppM=neppV for optimal performance (NB: this is equivalent to AOS)
39+ #endif
40+ #endif
41+
2942 private:
3043
3144 friend class MemoryAccessHelper <MemoryAccessMatrixElementsBase>;
3245 friend class KernelAccessHelper <MemoryAccessMatrixElementsBase, true >;
3346 friend class KernelAccessHelper <MemoryAccessMatrixElementsBase, false >;
3447
48+ // The number of (good and bad) helicity combinations
49+ static constexpr int ncomb = CPPProcess::ncomb;
50+
3551 // --------------------------------------------------------------------------
3652 // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )"
3753 // (in other words: first locate the event record for a given event, then locate an element in that record)
@@ -43,19 +59,24 @@ namespace mg5amcCpu
4359 ieventAccessRecord ( fptype* buffer,
4460 const int ievt )
4561 {
46- return &( buffer[ievt] ); // ARRAY[nevt]
62+ const int ipagME = ievt / neppME; // #event "ME-page"
63+ const int ieppME = ievt % neppME; // #event in the current event ME-page
64+ constexpr int icomb = 0 ;
65+ return &( buffer[ipagME * ( ncomb + 1 ) * neppME + icomb * neppME + ieppME] ); // AOSOA[ipagME][icomb][ieppME]
4766 }
4867
4968 // --------------------------------------------------------------------------
5069
5170 // Locate a field (output) of an event record (input) from the given field indexes (input)
5271 // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===]
53- // [NB: expand variadic template "Ts... args" to empty and rename "Field" as empty ]
72+ // [NB: expand variadic template "Ts... args" to "const int icomb" and rename "Field" as "Icomb" ]
5473 static __host__ __device__ inline fptype&
55- decodeRecord ( fptype* buffer )
74+ decodeRecord ( fptype* buffer,
75+ const int icomb )
5676 {
57- constexpr int ievt = 0 ;
58- return buffer[ievt]; // ARRAY[nevt]
77+ constexpr int ipagME = 0 ;
78+ constexpr int ieppME = 0 ;
79+ return buffer[ipagME * ( ncomb + 1 ) * neppME + icomb * neppME + ieppME]; // AOSOA[ipagME][icomb][ieppME]
5980 }
6081 };
6182
@@ -76,23 +97,23 @@ namespace mg5amcCpu
7697 static constexpr auto ieventAccessRecordConst = MemoryAccessHelper<MemoryAccessMatrixElementsBase>::ieventAccessRecordConst;
7798
7899 // Locate a field (output) of an event record (input) from the given field indexes (input)
79- // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer ) <===]
80- static constexpr auto decodeRecord = MemoryAccessHelper<MemoryAccessMatrixElementsBase>::decodeRecord;
100+ // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int icomb ) <===]
101+ static constexpr auto decodeRecordIcomb = MemoryAccessHelper<MemoryAccessMatrixElementsBase>::decodeRecord;
81102
82103 // Locate a field (output) of an event record (input) from the given field indexes (input)
83- // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer ) <===]
84- static constexpr auto decodeRecordConst =
85- MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template decodeRecordConst<>;
104+ // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int icomb ) <===]
105+ static constexpr auto decodeRecordIcombConst =
106+ MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template decodeRecordConst<int >;
86107
87108 // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input)
88- // [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt ) <===]
89- static constexpr auto ieventAccess =
90- MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessField<>;
109+ // [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt, const icomb ) <===]
110+ static constexpr auto ieventAccessIcomb =
111+ MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessField<int >;
91112
92113 // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input)
93- // [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt ) <===]
94- static constexpr auto ieventAccessConst =
95- MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessFieldConst<>;
114+ // [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt, const icomb ) <===]
115+ static constexpr auto ieventAccessIcombConst =
116+ MemoryAccessHelper<MemoryAccessMatrixElementsBase>::template ieventAccessFieldConst<int >;
96117 };
97118
98119 // ----------------------------------------------------------------------------
@@ -108,16 +129,16 @@ namespace mg5amcCpu
108129 static constexpr auto ieventAccessRecord = MemoryAccessMatrixElements::ieventAccessRecord;
109130
110131 // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input)
111- // [Signature (non-const, SCALAR) ===> fptype& kernelAccess_s( fptype* buffer ) <===]
112- static constexpr auto kernelAccess_s =
113- KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessField<>; // requires cuda 11.4
132+ // [Signature (non-const, SCALAR) ===> fptype& kernelAccess_s( fptype* buffer, const int icomb ) <===]
133+ static constexpr auto kernelAccessIcomb_s =
134+ KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessField<int >; // requires cuda 11.4
114135
115136 // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal)
116- // [Signature (non const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccess( const fptype* buffer ) <===]
137+ // [Signature (non const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccess( const fptype* buffer, const int icomb ) <===]
117138 static __host__ __device__ inline fptype_sv&
118- kernelAccess ( fptype* buffer )
139+ kernelAccessIcomb ( fptype* buffer )
119140 {
120- fptype& out = kernelAccess_s ( buffer );
141+ fptype& out = kernelAccessIcomb_s ( buffer );
121142#ifndef MGONGPU_CPPSIMD
122143 return out;
123144#else
@@ -129,9 +150,9 @@ namespace mg5amcCpu
129150 }
130151
131152 // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input)
132- // [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===]
133- static constexpr auto kernelAccessConst =
134- KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessFieldConst<>; // requires cuda 11.4
153+ // [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer, const int icomb ) <===]
154+ static constexpr auto kernelAccessIcombConst =
155+ KernelAccessHelper<MemoryAccessMatrixElementsBase, onDevice>::template kernelAccessFieldConst<int >; // requires cuda 11.4
135156 };
136157
137158 // ----------------------------------------------------------------------------
0 commit comments