Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
034987b
start adding navi21 GEMM
j4yan Apr 13, 2022
4f5817d
navi_gemm_km_kn_mn_fp32 compiles and passes one test.
j4yan Apr 14, 2022
0d46b40
rename variables and functions in gridwise_gemm_dlops_v1r3
j4yan Apr 14, 2022
27b1c45
add other 3 layouts; format instance
j4yan Apr 14, 2022
e10a262
adding more tuning parameters
j4yan Apr 15, 2022
8450124
add gemm_dlops_f16
j4yan Apr 19, 2022
6b2ef39
tmp
j4yan Apr 20, 2022
2957754
add dependence of DeviceGemm::IsSupportedArg() on arch
j4yan Apr 20, 2022
f70ad26
minor changes
j4yan Apr 20, 2022
6baedf3
minor changes
j4yan Apr 20, 2022
1bcb8cd
minor changes
j4yan Apr 20, 2022
62a792b
minor changes
j4yan Apr 20, 2022
999321d
minor changes
j4yan Apr 20, 2022
45e9862
minor changes
j4yan Apr 20, 2022
d3f3fac
minor changes
j4yan Apr 20, 2022
e5ea6c7
push gemm_dlops into profiler
j4yan Apr 21, 2022
c695dfa
minor changes
j4yan Apr 21, 2022
fc97e9d
if using xdl or dlops is moved into profiler_gemm_impl
j4yan Apr 21, 2022
cd2ce92
minor changes
j4yan Apr 21, 2022
bf8cea0
minor changes
j4yan Apr 22, 2022
2f70506
remove is_xdl from profile_gemm_impl
j4yan Apr 22, 2022
4ba880e
make IsSupportedArg dependent on arch for other device_gemm
j4yan Apr 22, 2022
5fd0997
minor changes
j4yan Apr 22, 2022
78ade2d
minor changes
j4yan Apr 22, 2022
1d58d7e
fix a bug in f_generate_tensor_value
j4yan Apr 22, 2022
f06ba36
add 64x64x64 for gemm_dlops_int8
j4yan Apr 22, 2022
0c3f0ba
add 64x64x64 for gemm_dlops_int8
j4yan Apr 22, 2022
578eec7
comment out 3 layouts in gemm_dlops_int8; add 32x32x32 for gemm_dlops…
j4yan Apr 25, 2022
aa0acfa
fix
Apr 30, 2022
2ca774b
start fixing tuning parameters
j4yan May 3, 2022
d9cd2e5
monir
j4yan May 5, 2022
f3bd93a
minor changes
j4yan May 5, 2022
9da908f
minor changes
j4yan May 5, 2022
1ea2ef5
minor changes
j4yan May 5, 2022
90438ea
Merge remote-tracking branch 'origin/develop' into navi21_gemm
May 8, 2022
3623f9c
fixing
May 11, 2022
e95e1bf
adding example
May 12, 2022
3a122cb
adding example
May 12, 2022
0eb6b99
adding example
May 12, 2022
217b836
add gemm fp32 example
May 12, 2022
55ff2c5
Merge remote-tracking branch 'origin/develop' into navi21_gemm_v2
May 12, 2022
9eb5eb2
Merge remote-tracking branch 'origin/develop' into navi21_gemm
May 17, 2022
162ac1d
clean up
May 17, 2022
f4f890a
use 128x128x16 as MNK tile in navi21 gemm example
shaojiewang May 18, 2022
9f602fa
bug fix
May 19, 2022
15c5b67
fix test
May 20, 2022
e79f340
Merge remote-tracking branch 'origin/develop' into navi21_gemm
May 20, 2022
39131c6
use new block c tile
May 20, 2022
a838cb9
clean
May 20, 2022
a295bbf
Merge remote-tracking branch 'origin/develop' into navi21_gemm
May 23, 2022
7c7904e
fix build
May 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions example/11_conv2d_bwd_weight/conv2d_bwd_weight_xdl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,13 @@ using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
8>; // CBlockTransferScalarPerVector_NWaveNPerXdl
// clang-format on

using ReferenceConvBwdWeightInstance = ck::tensor_operation::host::
ReferenceConvBwdWeight<InDataType, WeiDataType, OutDataType, InElementOp, WeiElementOp, OutElementOp>;
using ReferenceConvBwdWeightInstance =
ck::tensor_operation::host::ReferenceConvBwdWeight<InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>;

int main(int argc, char* argv[])
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "common_header.hpp"
#include "tensor_adaptor.hpp"
#include "threadwise_tensor_slice_transfer_v2.hpp"
#include "threadwise_tensor_slice_transfer_v4r1.hpp"
#include "threadwise_contraction_dlops.hpp"

namespace ck {
Expand Down Expand Up @@ -175,6 +175,7 @@ struct BlockwiseGemmDlops_A_BK0_BM_BK1_B_BK0_BN_BK1_C_BM0_BM1_BN0_BN1_pipeline_B
"wrong!");

// TODO: remove this restriction
static_assert(BM0 == 2, "wrong");
static_assert(BM0 == 2 && BN0 == 2, "wrong");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ struct BlockwiseTensorSliceTransfer_v5r1
}
}

template <typename SrcBuffer>
__device__ void RunRead(const SrcDesc& src_desc, const SrcBuffer& src_buf)
{
if(BlockSize == thread_cluster_desc_.GetElementSize() or
get_thread_local_1d_id() < thread_cluster_desc_.GetElementSize())
{
threadwise_transfer_.RunRead(src_desc, src_buf);
}
}

template <typename DstBuffer>
__device__ void RunWrite(const DstDesc& dst_desc, DstBuffer& dst_buf)
{
Expand Down
Loading