Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion example/19_binary_elementwise/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
add_example_executable(example_broadcast_add_2d broadcast_add_2d.cpp)
add_example_executable(example_broadcast_add_2d_amn_bn broadcast_add_2d_amn_bn.cpp)
add_example_executable(example_broadcast_add_3d_am_bmnk broadcast_add_3d_am_bmnk.cpp)
add_example_executable(example_elementwise_add_1d elementwise_add_1d.cpp)
add_example_executable(example_elementwise_add_4d elementwise_add_4d.cpp)
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,17 @@ using EltwiseComputeDataType = F32;

using Add = ck::tensor_operation::binary_element_wise::Add;

using DeviceElementwiseAddInstance = ck::tensor_operation::device::
DeviceBinaryElementwise<ABDataType, ABDataType, CDataType, EltwiseComputeDataType, Add, 2, 8>;
using DeviceElementwiseAddInstance =
ck::tensor_operation::device::DeviceBinaryElementwise<ABDataType,
ABDataType,
CDataType,
EltwiseComputeDataType,
Add,
2,
8,
8,
8,
8>;

template <typename HostTensorA,
typename HostTensorB,
Expand Down Expand Up @@ -100,7 +109,7 @@ int main()
if(!broadcastAdd.IsSupportedArgument(argument.get()))
{
throw std::runtime_error("The runtime parameters seems not supported by the "
"DeviceBinaryElementwise_2D instance, exiting!");
"DeviceBinaryElementwise instance, exiting!");
};

auto broadcastAdd_invoker_ptr = broadcastAdd.MakeInvokerPointer();
Expand All @@ -123,7 +132,7 @@ int main()
0>(host_c_m_n, a_m_n, b_n, M, N, Add{});

pass &= ck::utils::check_err(
c_m_n.mData, host_c_m_n.mData, "Error: Incorrect results d1", 1e-3, 1e-3);
c_m_n.mData, host_c_m_n.mData, "Error: Incorrect results c", 1e-3, 1e-3);
}

return pass ? 0 : 1;
Expand Down
123 changes: 123 additions & 0 deletions example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include <iostream>
#include <cstdlib>
#include "check_err.hpp"
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"

#include "device_tensor.hpp"
#include "binary_element_wise_operation.hpp"
#include "device_binary_elementwise.hpp"

using F16 = ck::half_t;
using F32 = float;

using ABDataType = F16;
using CDataType = F16;
using EltwiseComputeDataType = F32;

using Add = ck::tensor_operation::binary_element_wise::Add;

using DeviceElementwiseAddInstance =
ck::tensor_operation::device::DeviceBinaryElementwise<ABDataType,
ABDataType,
CDataType,
EltwiseComputeDataType,
Add,
3,
8,
1,
8,
8>;

template <typename HostTensorA,
typename HostTensorB,
typename HostTensorC,
typename ComputeDataType,
typename Functor>
void host_broadcast3D_am_bmnk(HostTensorC& C,
const HostTensorA& A,
const HostTensorB& B,
const std::vector<std::size_t>& shape,
Functor functor)
{
using ctype = ck::remove_reference_t<decltype(C(0, 0))>;

for(std::size_t m = 0; m < shape[0]; ++m)
for(std::size_t n = 0; n < shape[1]; ++n)
for(std::size_t k = 0; k < shape[2]; ++k)
{
ComputeDataType a_val = static_cast<ComputeDataType>(A(m));
ComputeDataType b_val = static_cast<ComputeDataType>(B(m, n, k));
ComputeDataType c_val = 0;
functor(c_val, a_val, b_val);
C(m, n, k) = static_cast<ctype>(c_val);
}
}

int main()
{
bool do_verification = true;
bool time_kernel = false;

std::vector<std::size_t> mnk = {4, 16, 32};
ck::index_t M = mnk[0];

Tensor<ABDataType> a_m({M});
Tensor<ABDataType> b_m_n_k(mnk);
Tensor<CDataType> c_m_n_k(mnk);

a_m.GenerateTensorValue(GeneratorTensor_3<ABDataType>{0.0, 1.0});
b_m_n_k.GenerateTensorValue(GeneratorTensor_3<ABDataType>{0.0, 1.0});

DeviceMem a_m_device_buf(sizeof(ABDataType) * a_m.mDesc.GetElementSpace());
DeviceMem b_m_n_k_device_buf(sizeof(ABDataType) * b_m_n_k.mDesc.GetElementSpace());
DeviceMem c_m_n_k_device_buf(sizeof(CDataType) * c_m_n_k.mDesc.GetElementSpace());

a_m_device_buf.ToDevice(a_m.mData.data());
b_m_n_k_device_buf.ToDevice(b_m_n_k.mData.data());

auto broadcastAdd = DeviceElementwiseAddInstance{};
auto argument = broadcastAdd.MakeArgumentPointer(
a_m_device_buf.GetDeviceBuffer(),
b_m_n_k_device_buf.GetDeviceBuffer(),
c_m_n_k_device_buf.GetDeviceBuffer(),
std::vector<ck::index_t>{mnk.begin(), mnk.end()},
{1, 0, 0}, // broadcast A on second and third dimension
std::vector<ck::index_t>{b_m_n_k.mDesc.GetStrides().begin(),
b_m_n_k.mDesc.GetStrides().end()},
std::vector<ck::index_t>{c_m_n_k.mDesc.GetStrides().begin(),
c_m_n_k.mDesc.GetStrides().end()},
Add{});

if(!broadcastAdd.IsSupportedArgument(argument.get()))
{
throw std::runtime_error("The runtime parameters seems not supported by the "
"DeviceBinaryElementwise instance, exiting!");
};

auto broadcastAdd_invoker_ptr = broadcastAdd.MakeInvokerPointer();
float ave_time =
broadcastAdd_invoker_ptr->Run(argument.get(), StreamConfig{nullptr, time_kernel});

std::cout << "Perf: " << ave_time << " ms" << std::endl;

bool pass = true;
if(do_verification)
{
c_m_n_k_device_buf.FromDevice(c_m_n_k.mData.data());
Tensor<CDataType> host_c_m_n_k(mnk);

host_broadcast3D_am_bmnk<Tensor<ABDataType>,
Tensor<ABDataType>,
Tensor<CDataType>,
EltwiseComputeDataType,
Add>(host_c_m_n_k, a_m, b_m_n_k, mnk, Add{});

pass &= ck::utils::check_err(
c_m_n_k.mData, host_c_m_n_k.mData, "Error: Incorrect results c", 1e-3, 1e-3);
}

return pass ? 0 : 1;
}
17 changes: 13 additions & 4 deletions example/19_binary_elementwise/elementwise_add_1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,17 @@ using EltwiseComputeDataType = F32;

using Add = ck::tensor_operation::binary_element_wise::Add;

using DeviceElementwiseAddInstance = ck::tensor_operation::device::
DeviceBinaryElementwise<ABDataType, ABDataType, CDataType, EltwiseComputeDataType, Add, 1, 8>;
using DeviceElementwiseAddInstance =
ck::tensor_operation::device::DeviceBinaryElementwise<ABDataType,
ABDataType,
CDataType,
EltwiseComputeDataType,
Add,
1,
8,
8,
8,
8>;

template <typename HostTensorA,
typename HostTensorB,
Expand Down Expand Up @@ -81,7 +90,7 @@ int main()
if(!broadcastAdd.IsSupportedArgument(argument.get()))
{
throw std::runtime_error("The runtime parameters seems not supported by the "
"DeviceBinaryElementwise_2D instance, exiting!");
"DeviceBinaryElementwise instance, exiting!");
};

auto broadcastAdd_invoker_ptr = broadcastAdd.MakeInvokerPointer();
Expand All @@ -103,7 +112,7 @@ int main()
Add>(host_c_m, a_m, b_m, M, Add{});

pass &= ck::utils::check_err(
c_m.mData, host_c_m.mData, "Error: Incorrect results d1", 1e-3, 1e-3);
c_m.mData, host_c_m.mData, "Error: Incorrect results c", 1e-3, 1e-3);
}

return pass ? 0 : 1;
Expand Down
17 changes: 13 additions & 4 deletions example/19_binary_elementwise/elementwise_add_4d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,17 @@ using EltwiseComputeDataType = F32;

using Add = ck::tensor_operation::binary_element_wise::Add;

using DeviceElementwiseAddInstance = ck::tensor_operation::device::
DeviceBinaryElementwise<ABDataType, ABDataType, CDataType, EltwiseComputeDataType, Add, 4, 8>;
using DeviceElementwiseAddInstance =
ck::tensor_operation::device::DeviceBinaryElementwise<ABDataType,
ABDataType,
CDataType,
EltwiseComputeDataType,
Add,
4,
8,
8,
8,
8>;

template <typename HostTensorA,
typename HostTensorB,
Expand Down Expand Up @@ -83,7 +92,7 @@ int main()
if(!broadcastAdd.IsSupportedArgument(argument.get()))
{
throw std::runtime_error("The runtime parameters seems not supported by the "
"DeviceBinaryElementwise_2D instance, exiting!");
"DeviceBinaryElementwise instance, exiting!");
};

auto broadcastAdd_invoker_ptr = broadcastAdd.MakeInvokerPointer();
Expand All @@ -105,7 +114,7 @@ int main()
Add>(host_c, a, b, nchw, Add{});

pass &=
ck::utils::check_err(c.mData, host_c.mData, "Error: Incorrect results d1", 1e-3, 1e-3);
ck::utils::check_err(c.mData, host_c.mData, "Error: Incorrect results c", 1e-3, 1e-3);
}

return pass ? 0 : 1;
Expand Down
Loading