Skip to content

Commit a195f85

Browse files
committed
add unified kernels to FFT example
1 parent bca15f4 commit a195f85

File tree

11 files changed

+168
-56
lines changed

11 files changed

+168
-56
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
252252
add_subdirectory(devices) # Basic device functionalities. Always compiled.
253253
add_subdirectory(common) # Import list of unified kernel source files
254254
if(GINKGO_BUILD_CUDA)
255+
enable_language(CUDA)
255256
add_subdirectory(cuda) # High-performance kernels for NVIDIA GPUs
256257
endif()
257258
if (GINKGO_BUILD_REFERENCE)

benchmark/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ endfunction(ginkgo_add_typed_benchmark_executables)
9696

9797

9898
if (GINKGO_BUILD_CUDA)
99-
enable_language(CUDA)
10099
ginkgo_benchmark_cusparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
101100
ginkgo_benchmark_cusparse_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
102101
ginkgo_benchmark_cusparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)

common/cuda_hip/base/math.hpp.inc

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,30 +38,3 @@ struct device_numeric_limits {
3838
static constexpr auto max = std::numeric_limits<T>::max();
3939
static constexpr auto min = std::numeric_limits<T>::min();
4040
};
41-
42-
43-
namespace detail {
44-
45-
46-
template <typename T>
47-
struct remove_complex_impl<thrust::complex<T>> {
48-
using type = T;
49-
};
50-
51-
52-
template <typename T>
53-
struct is_complex_impl<thrust::complex<T>>
54-
: public std::integral_constant<bool, true> {};
55-
56-
57-
template <typename T>
58-
struct is_complex_or_scalar_impl<thrust::complex<T>> : std::is_scalar<T> {};
59-
60-
61-
template <typename T>
62-
struct truncate_type_impl<thrust::complex<T>> {
63-
using type = thrust::complex<typename truncate_type_impl<T>::type>;
64-
};
65-
66-
67-
} // namespace detail

cuda/CMakeLists.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,3 @@
1-
enable_language(CUDA)
2-
3-
# Needed because of a known issue with CUDA while linking statically.
4-
# For details, see https://gitlab.kitware.com/cmake/cmake/issues/18614
5-
if (NOT BUILD_SHARED_LIBS)
6-
set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE ${CMAKE_CUDA_DEVICE_LINK_EXECUTABLE} PARENT_SCOPE)
7-
endif()
8-
91
if(MSVC)
102
# MSVC can not find CUDA automatically
113
# Use CUDA_COMPILER PATH to define the CUDA TOOLKIT ROOT DIR

cuda/base/math.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3737
#include <ginkgo/core/base/math.hpp>
3838

3939

40-
#include <thrust/complex.h>
40+
#include <ginkgo/kernels/cuda/types.hpp>
4141

4242

4343
namespace gko {

examples/schroedinger-splitting/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ set(target_name "schroedinger-splitting")
22
find_package(OpenCV)
33

44
if (OpenCV_FOUND)
5+
include(../../cmake/unified_kernels.cmake)
56
add_executable(${target_name} ${target_name}.cpp)
6-
target_link_libraries(${target_name} Ginkgo::ginkgo ${OpenCV_LIBS})
7+
ginkgo_add_unified_kernels(${target_name} kernels.cpp)
8+
target_link_libraries(${target_name} PRIVATE Ginkgo::ginkgo ${OpenCV_LIBS})
79
target_include_directories(${target_name} PRIVATE ${PROJECT_SOURCE_DIR})
810
configure_file(../../matrices/examples/gko_logo_2d.mtx data/gko_logo_2d.mtx COPYONLY)
911
configure_file(../../matrices/examples/gko_text_2d.mtx data/gko_text_2d.mtx COPYONLY)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*******************************<GINKGO LICENSE>******************************
2+
Copyright (c) 2017-2021, the Ginkgo authors
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions
7+
are met:
8+
9+
1. Redistributions of source code must retain the above copyright
10+
notice, this list of conditions and the following disclaimer.
11+
12+
2. Redistributions in binary form must reproduce the above copyright
13+
notice, this list of conditions and the following disclaimer in the
14+
documentation and/or other materials provided with the distribution.
15+
16+
3. Neither the name of the copyright holder nor the names of its
17+
contributors may be used to endorse or promote products derived from
18+
this software without specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21+
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22+
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
******************************<GINKGO LICENSE>*******************************/
32+
33+
#include <ginkgo/ginkgo.hpp>
34+
35+
36+
#include <ginkgo/kernels/kernel_launch.hpp>
37+
38+
39+
namespace GKO_DEVICE_NAMESPACE {
40+
41+
42+
using namespace gko::kernels::GKO_DEVICE_NAMESPACE;
43+
using std::cos;
44+
using std::sin;
45+
46+
template <typename T>
47+
struct err {};
48+
49+
50+
void linear_step(std::shared_ptr<const DefaultExecutor> exec, int n,
51+
double phase_scale,
52+
gko::matrix::Dense<std::complex<double>>* freq)
53+
{
54+
using device_complex = device_type<std::complex<double>>;
55+
run_kernel(
56+
exec,
57+
GKO_KERNEL(auto i, auto j, auto n, auto phase_scale,
58+
auto amplitude_scale, auto freq) {
59+
auto phase = -(i * i + j * j) * phase_scale;
60+
freq[i * n + j] *=
61+
device_complex{cos(phase), sin(phase)} * amplitude_scale;
62+
},
63+
gko::dim<2>{n, n}, n, phase_scale, 1.0 / (n * n), freq);
64+
}
65+
66+
67+
void nonlinear_step(std::shared_ptr<const DefaultExecutor> exec, int n,
68+
double nonlinear_scale, double potential_scale,
69+
double time_scale,
70+
const gko::matrix::Dense<double>* potential,
71+
gko::matrix::Dense<std::complex<double>>* ampl)
72+
{
73+
using device_complex = device_type<std::complex<double>>;
74+
run_kernel(
75+
exec,
76+
GKO_KERNEL(auto i, auto j, auto n, auto nonlinear_scale,
77+
auto potential_scale, auto time_scale, auto potential,
78+
auto ampl) {
79+
auto idx = i * n + j;
80+
auto phase = -(nonlinear_scale * gko::squared_norm(ampl[idx]) +
81+
potential_scale * potential[idx]) *
82+
time_scale;
83+
ampl[idx] *= device_complex{cos(phase), sin(phase)};
84+
},
85+
gko::dim<2>{n, n}, n, nonlinear_scale, potential_scale, time_scale,
86+
potential, ampl);
87+
}
88+
89+
90+
} // namespace GKO_DEVICE_NAMESPACE

examples/schroedinger-splitting/schroedinger-splitting.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,24 @@ to the non-linear part, which turns it into the Gross–Pitaevskii equation.
8787
#include <opencv2/videoio.hpp>
8888

8989

90+
#include <ginkgo/kernels/kernel_declaration.hpp>
91+
92+
93+
GKO_DECLARE_UNIFIED(
94+
void linear_step(std::shared_ptr<const DefaultExecutor> exec, int n,
95+
double phase_scale,
96+
gko::matrix::Dense<std::complex<double>>* freq);
97+
void nonlinear_step(std::shared_ptr<const DefaultExecutor> exec, int n,
98+
double nonlinear_scale, double potential_scale,
99+
double time_scale,
100+
const gko::matrix::Dense<double>* potential,
101+
gko::matrix::Dense<std::complex<double>>* ampl));
102+
103+
104+
GKO_REGISTER_UNIFIED_OPERATION(linear_step, linear_step);
105+
GKO_REGISTER_UNIFIED_OPERATION(nonlinear_step, nonlinear_step);
106+
107+
90108
// This function implements a simple Ginkgo-themed clamped color mapping for
91109
// values in the range [0,5].
92110
void set_val(unsigned char* data, double value)
@@ -189,24 +207,11 @@ int main(int argc, char* argv[])
189207
}
190208
// time step in linear part
191209
fft->apply(lend(amplitude), lend(frequency));
192-
for (int i = 0; i < n; i++) {
193-
for (int j = 0; j < n; j++) {
194-
frequency->at(idx(i, j)) *=
195-
std::polar(1.0, -h2 * (i * i + j * j) * tau * time_scale);
196-
// scale by FFT*iFFT normalization factor
197-
frequency->at(idx(i, j)) *= 1.0 / n2;
198-
}
199-
}
210+
exec->run(make_linear_step(n, h2 * tau * time_scale, lend(frequency)));
200211
ifft->apply(lend(frequency), lend(amplitude));
201212
// time step in non-linear part
202-
for (int i = 0; i < n; i++) {
203-
for (int j = 0; j < n; j++) {
204-
amplitude->at(idx(i, j)) *= std::polar(
205-
1.0, -(nonlinear_scale *
206-
gko::squared_norm(amplitude->at(idx(i, j))) +
207-
potential_scale * potential->at(idx(i, j))) *
208-
tau * time_scale);
209-
}
210-
}
213+
exec->run(make_nonlinear_step(n, nonlinear_scale, potential_scale,
214+
tau * time_scale, lend(potential),
215+
lend(amplitude)));
211216
}
212217
}

hip/base/math.hip.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3737
#include <ginkgo/core/base/math.hpp>
3838

3939

40-
#include <thrust/complex.h>
40+
#include <ginkgo/kernels/hip/types.hip.hpp>
4141

4242

4343
namespace gko {

include/ginkgo/kernels/cuda/types.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5151

5252

5353
namespace gko {
54+
namespace detail {
55+
56+
57+
template <typename T>
58+
struct remove_complex_impl<thrust::complex<T>> {
59+
using type = T;
60+
};
61+
62+
63+
template <typename T>
64+
struct is_complex_impl<thrust::complex<T>>
65+
: public std::integral_constant<bool, true> {};
66+
67+
68+
template <typename T>
69+
struct is_complex_or_scalar_impl<thrust::complex<T>> : std::is_scalar<T> {};
70+
71+
72+
template <typename T>
73+
struct truncate_type_impl<thrust::complex<T>> {
74+
using type = thrust::complex<typename truncate_type_impl<T>::type>;
75+
};
76+
77+
78+
} // namespace detail
5479

5580

5681
namespace kernels {

0 commit comments

Comments
 (0)