Skip to content
Merged
Show file tree
Hide file tree
Changes from 70 commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
8e19f16
Add a stage for CK
JehandadKhan Mar 24, 2022
28c25bb
remove git clone from Docker
JehandadKhan Mar 24, 2022
8d266d9
[ci-skip] add ck to docker, cmakelists.txt and
JehandadKhan Mar 24, 2022
cd639ef
update test to handle int8 output
JehandadKhan Mar 31, 2022
6c95f9b
fix clang tidy issues
junliume Mar 31, 2022
155daeb
Merge branch 'develop' into jd/ck_integration
JehandadKhan Apr 17, 2022
37c2358
More fixes for int8 enablement
JehandadKhan Apr 17, 2022
d30dab7
Merge branch 'develop' into jd/ck_integration
JehandadKhan Apr 17, 2022
886154e
Merge branch 'jd/ck_integration' of https://github.com/ROCmSoftwarePl…
JehandadKhan Apr 18, 2022
34cae74
code cleanup
JehandadKhan Apr 21, 2022
57f4af7
disable on OpenCL
JehandadKhan Apr 22, 2022
17c232e
fix formatting
JehandadKhan Apr 22, 2022
c9a617b
address tidy issues
JehandadKhan Apr 22, 2022
35cfc5b
address tidy issues
JehandadKhan Apr 22, 2022
d54cf7e
Update composable kernel hash for multi arch support
JehandadKhan Apr 24, 2022
ba0903e
add all architectures
JehandadKhan Apr 25, 2022
d34e389
reduce parallel threads
JehandadKhan Apr 27, 2022
93dcda1
move ck to rocm 5.1
JehandadKhan Apr 28, 2022
f0713e4
fix default value for int8_vectorize
JehandadKhan May 6, 2022
eb50868
Merge branch 'develop' into jd/ck_integration
JehandadKhan May 9, 2022
9ce28a2
Update docker commit
JehandadKhan May 13, 2022
7d30adb
handle vectorize flag for non int8 paths
JehandadKhan May 17, 2022
bbd9bb5
Merge branch 'develop' into jd/ck_integration
JehandadKhan May 17, 2022
7d38d69
update the invoker signature
JehandadKhan May 19, 2022
6ca8953
Update docker for CCACHE and move test arg to main file
JehandadKhan May 27, 2022
32a7007
Merge branch 'develop' into jd/ck_integration
JehandadKhan May 27, 2022
da89207
Merge branch 'develop' into jd/ck_integration
JehandadKhan Jun 6, 2022
5a23aa0
fix gemm solver bug
JehandadKhan Jun 6, 2022
ca5ed56
Update Jenkins file
JehandadKhan Jun 7, 2022
adcbeb7
Add Navi to CK build
JehandadKhan Jun 8, 2022
cfafefa
Add docker build stage
JehandadKhan Jun 9, 2022
9ed4674
Correct agent specification
JehandadKhan Jun 9, 2022
f15ca3b
Jenkins runs in Busy Box
JehandadKhan Jun 9, 2022
e1495e4
fix branch name
JehandadKhan Jun 9, 2022
1346b08
Change builder node
JehandadKhan Jun 10, 2022
741232f
Change docker stage label
JehandadKhan Jun 10, 2022
8091135
Enable BuildKit for Docker
JehandadKhan Jun 10, 2022
2a53986
remove redundant scm
JehandadKhan Jun 10, 2022
030cfea
remove buid number from docker tag
JehandadKhan Jun 10, 2022
844265f
Conditional docker build
JehandadKhan Jun 10, 2022
e4545a4
Add nc switch
JehandadKhan Jun 10, 2022
b6141cd
fix var spelling
JehandadKhan Jun 10, 2022
da11e7d
Merge branch 'develop' into jd/prebuilt_image
JehandadKhan Jun 15, 2022
9b26dfe
Update credentials
JehandadKhan Jun 15, 2022
17158b1
Add control expression to Build Docker stage
JehandadKhan Jun 15, 2022
f62c96e
Remove docker manifest inspect
JehandadKhan Jun 15, 2022
fcd6bcc
Merge branch 'develop' into jd/prebuilt_image
JehandadKhan Jun 15, 2022
e24eb8d
Merge branch 'jd/prebuilt_image' into jd/ck_integration
JehandadKhan Jun 16, 2022
10a873c
Make ctest verbose
JehandadKhan Jun 17, 2022
7af0c92
remove debug flag
JehandadKhan Jun 17, 2022
57d3ec1
Merge branch 'develop' into jd/ck_integration
JehandadKhan Jun 20, 2022
f48116f
remove verbose flag from tests
JehandadKhan Jun 21, 2022
1438038
Merge branch 'develop' into jd/ck_integration
JehandadKhan Jun 21, 2022
a42fdb9
Merge branch 'develop' into jd/ck_integration
junliume Jun 24, 2022
ffeebee
resolve merge conflicts
junliume Jun 24, 2022
8e3d9d0
fix solver.hpp format issue
junliume Jun 25, 2022
9e0930a
Merge branch 'develop' into jd/ck_integration
JehandadKhan Jul 14, 2022
7b5e73d
Add cmake var to control the inclusion of CK into MIOpen
JehandadKhan Jul 14, 2022
1200006
simplify CK dependency
JehandadKhan Jul 26, 2022
b3c1ac8
add debug override for docker build
JehandadKhan Jul 26, 2022
d89c08f
fix the max val for int8
JehandadKhan Jul 27, 2022
50dbe04
Merge branch 'develop' into jd/ck_integration
junliume Jul 29, 2022
244f0f1
Fix git merge issues
JehandadKhan Aug 3, 2022
47b45af
Merge branch 'develop' into jd/ck_integration
JehandadKhan Aug 3, 2022
6515f90
Update data type check and remove forced data type check from API layer
JehandadKhan Aug 4, 2022
06bea4b
fix inconsistencies in data type handling
JehandadKhan Aug 5, 2022
1392542
Merge branch 'develop' into jd/ck_integration
JehandadKhan Aug 10, 2022
949704b
fix Jenkinsfile typo
JehandadKhan Aug 10, 2022
c76adbb
Merge branch 'develop' into jd/ck_integration
junliume Aug 17, 2022
a52fb74
Merge branch 'develop' into jd/ck_integration
JehandadKhan Aug 18, 2022
827d276
address review comments
JehandadKhan Aug 26, 2022
28af26f
remove comment
JehandadKhan Aug 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ if( DEFINED MIOPEN_OVERRIDE_HIP_VERSION_PATCH )
message(STATUS "MIOPEN_hip_VERSION_PATCH overriden with ${MIOPEN_OVERRIDE_HIP_VERSION_PATCH}")
endif()

# Depend on Composable Kernels
option(MIOPEN_USE_COMPOSABLEKERNEL "Enable MIOpen to use composable kernels for various operations" Off)
if(MIOPEN_USE_COMPOSABLEKERNEL)
find_package(composable_kernel 1.0.0 COMPONENTS device_operations host_tensor)
endif()

set_var_to_condition(MIOPEN_USE_COMGR_DEFAULT (NOT DEFINED MIOPEN_BACKEND_OPENCL) AND (NOT (MIOPEN_BACKEND STREQUAL "HIPNOGPU")))
option(MIOPEN_USE_COMGR "Use comgr to build kernels instead of offline tools" ${MIOPEN_USE_COMGR_DEFAULT})

Expand Down
19 changes: 15 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:18.04 as miopen
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good!


ARG USE_MLIR="OFF"

Expand All @@ -8,7 +8,8 @@ RUN dpkg --add-architecture i386
# Add rocm repository
# Note: The ROCm version with $USE_MLIR should keep in sync with default ROCm version
# unless MLIR library is incompatible with current ROCm.

RUN apt-get update
RUN apt-get install -y wget gnupg
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

related to cmake

RUN if [ "$USE_MLIR" = "ON" ] ; \
then export ROCM_APT_VER=.apt_5.1;\
else \
Expand All @@ -17,6 +18,8 @@ RUN if [ "$USE_MLIR" = "ON" ] ; \
echo $ROCM_APT_VER &&\
sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/$ROCM_APT_VER/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu bionic main universe | tee -a /etc/apt/sources.list"
RUN wget --no-check-certificate -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add -
RUN sh -c "echo deb https://apt.kitware.com/ubuntu/ bionic main | tee -a /etc/apt/sources.list"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

related to cmake


#Add gpg keys
# Install dependencies
Expand All @@ -33,7 +36,8 @@ wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
build-essential \
cmake \
cmake-data=3.15.1-0kitware1 \
cmake=3.15.1-0kitware1 \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be deleted.
cmake=3.15.1 already in our deps.
if cmake-data is required, it should be placed in rbuild.ini
https://github.com/ROCmSoftwarePlatform/MIOpen/blob/ade0d435b1e41fb9c79f2a4e3898f4896ecaec03/rbuild.ini#L24

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JehandadKhan I thought these lines are needed since base OS is Ubuntu 18.04 and we need more up-to-date cmake? Could you confirm?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, these are part of the rbuild process, but since we could not build CK there due to CK's dependence on hipcc, I had to add them here as well.

In light of your update in CK I propose we merge this PR as is and then fix the dependency issues in a separate PR.

comgr \
clang-format-10 \
doxygen \
Expand Down Expand Up @@ -128,4 +132,11 @@ RUN if [ "$USE_TARGETID" = "ON" ] ; then export HIPCC_LINK_FLAGS_APPEND='-O3 -pa
ARG MIOTENSILE_VER="default"
RUN if [ "$USE_TARGETID" = "OFF" ] ; then echo "MIOpenTensile is not installed."; elif [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@94a9047741d16a8eccd290131b78fb1aa69cdcdf; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@94a9047741d16a8eccd290131b78fb1aa69cdcdf; fi

RUN groupadd -f render
ARG CK_COMMIT=91d8b7d67ae9dbf8a6e691ea3e17c0b9705c6ba7
RUN wget -O ck.tar.gz https://www.github.com/rocmsoftwareplatform/composable_kernel/archive/${CK_COMMIT}.tar.gz && \
tar zxvf ck.tar.gz &&\
cd composable_kernel-${CK_COMMIT} && \
mkdir build && cd build && \
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}" -DCMAKE_PREFIX_PATH=/opt/rocm -D CMAKE_CXX_FLAGS=" --offload-arch=gfx900 --offload-arch=gfx906 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 -O3 " .. && \
make -j $(nproc) install
RUN groupadd -f render
18 changes: 18 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ def buildDocker(install_prefix)
echo "Checking for image: ${image_name}"
sh "docker manifest inspect --insecure ${image_name}"
echo "Image: ${image_name} found!! Skipping building image"
if(params.DEBUG_FORCE_DOCKER_BUILD)
{
throw new Exception("Docker build override via DEBUG_FORCE_DOCKER_BUILD")
}
}
catch(Exception ex)
{
Expand Down Expand Up @@ -757,6 +761,20 @@ pipeline {
buildHipClangJobAndReboot(compiler: 'g++', setup_flags: Int8_flags, config_targets: Smoke_targets)
}
}
stage('Int8 Hip Debug gfx908 (ComposableKernel)') {
when {
beforeAgent true
expression { params.TARGET_GFX908}
}
agent{ label rocmnode("gfx908") }
// This stage should be removed when CK is enabled by default in MIOpen
environment{
Enable_CK = "-DMIOPEN_USE_COMPOSABLEKERNEL=On"
}
steps{
buildHipClangJobAndReboot( build_type: 'debug', setup_flags: Enable_CK + Int8_flags , build_env: extra_log_env, test_flags: ' --verbose ')
}
}
stage('Fp16 Hip Vega20') {
when {
beforeAgent true
Expand Down
1 change: 1 addition & 0 deletions include/miopen/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#cmakedefine01 MIOPEN_LOG_FUNC_TIME_ENABLE
#cmakedefine01 MIOPEN_ENABLE_SQLITE_BACKOFF
#cmakedefine01 MIOPEN_USE_MLIR
#cmakedefine01 MIOPEN_USE_COMPOSABLEKERNEL

// "_PACKAGE_" to avoid name contentions: the macros like
// HIP_VERSION_MAJOR are defined in hip_version.h.
Expand Down
8 changes: 7 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ set( MIOpen_Source
solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp
solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp
solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp
solver/conv_hip_implicit_gemm_fwd_xdlops.cpp
solver/conv_hip_implicit_gemm_nonxdlops_common.cpp
solver/conv_hip_implicit_gemm_wrw_v4r4.cpp
solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp
Expand Down Expand Up @@ -633,9 +634,14 @@ target_include_directories(MIOpen PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src/include>
)

set(MIOPEN_CK_LINK_FLAGS)
if(MIOPEN_USE_COMPOSABLEKERNEL)
set(MIOPEN_CK_LINK_FLAGS composable_kernel::device_operations composable_kernel::host_tensor hip::host)
endif()

target_include_directories(MIOpen SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
target_include_directories(MIOpen SYSTEM PRIVATE ${BZIP2_INCLUDE_DIR})
target_link_libraries(MIOpen PRIVATE ${CMAKE_THREAD_LIBS_INIT} ${BZIP2_LIBRARIES})
target_link_libraries(MIOpen PRIVATE ${CMAKE_THREAD_LIBS_INIT} ${BZIP2_LIBRARIES} ${MIOPEN_CK_LINK_FLAGS})
generate_export_header(MIOpen
EXPORT_FILE_NAME ${PROJECT_BINARY_DIR}/include/miopen/export.h
)
Expand Down
5 changes: 3 additions & 2 deletions src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,9 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
tensor_layout_to_strides(
out_lens, default_layout, yLayout, xDesc.GetVectorLength(), out_strides);
return {(xDesc.GetType() == miopenInt8 || xDesc.GetType() == miopenInt8x4
? (yType == miopenInt32 ? yType : miopenFloat)
: xDesc.GetType()),
? (yType)
: xDesc.GetType()), // TODO: This function overrides the output type with
// essentially the input which is incorrect.
xDesc.GetLayout_t(),
out_lens,
out_strides};
Expand Down
53 changes: 53 additions & 0 deletions src/include/miopen/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4108,6 +4108,59 @@ struct ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC final
const PerformanceConfigAsmImplicitGemmGTCFwdDlopsNCHWC& config) const override;
};

struct PerformanceConfigHipImplicitGemmFwdXdlops
: PerfConfigBase<PerformanceConfigHipImplicitGemmFwdXdlops>
{
int index;
std::string kernel_id;
int total_size;
PerformanceConfigHipImplicitGemmFwdXdlops(int idx, std::string kernl_id)
: index(idx), kernel_id(kernl_id), total_size(-1)
{
}
PerformanceConfigHipImplicitGemmFwdXdlops() : PerformanceConfigHipImplicitGemmFwdXdlops(0, "")
{
}
PerformanceConfigHipImplicitGemmFwdXdlops(bool)
: PerformanceConfigHipImplicitGemmFwdXdlops(0, "")
{
}
void HeuristicInit(const ConvolutionContext& ctx);
bool SetNextValue(const ConvolutionContext& ctx);
bool IsValidValue() const;
bool IsValid(const ConvolutionContext& ctx) const;
template <typename Self, typename F>
static void Visit(Self&& s, F f)
{
f(s.kernel_id, "kernel_id");
}
bool operator==(const PerformanceConfigHipImplicitGemmFwdXdlops& other) const;
};

struct ConvHipImplicitGemmFwdXdlops final
: ConvTunableSolver<PerformanceConfigHipImplicitGemmFwdXdlops>
{
const std::string& SolverDbId() const override
{
return GetSolverDbId<ConvHipImplicitGemmFwdXdlops>();
}

PerformanceConfigHipImplicitGemmFwdXdlops
GetDefaultPerformanceConfig(const ConvolutionContext&) const override;
bool IsValidPerformanceConfig(const ConvolutionContext&,
const PerformanceConfigHipImplicitGemmFwdXdlops&) const override;
PerformanceConfigHipImplicitGemmFwdXdlops
Search(const ConvolutionContext&, const AnyInvokeParams& invoke_ctx) const override;
size_t GetWorkspaceSize(const ConvolutionContext& ctx) const override;
bool MayNeedWorkspace() const override { return false; }
bool IsApplicable(const ConvolutionContext& ctx) const override;
bool IsDynamic() const override { return true; }
ConvSolution
GetSolution(const ConvolutionContext& ctx,
const PerformanceConfigHipImplicitGemmFwdXdlops& config) const override;
float GetWti(const ConvolutionContext&) const override { return 0.01f; };
Copy link
Contributor

@atamazov atamazov Aug 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally, the Solvers that unable to compute WTI should not have thit function implemented. -2.0 will be returned by the base class' GetWti(), which means wti_unknown. See #410

However, Naive convolutions have GetWti() that return very small value (0.01f). This allows MIOpen to use Naive Solvers if no other applicable Solvers have known WTIs. Right now this means that in case of find-db miss, the library will try to use Winograd or GEMM (whatever is faster according to their GetWti's), but if both are not applicable, the library will use Naive Solver. See
https://github.com/ROCmSoftwarePlatform/MIOpen/blob/e18c9383c6d01d417773fdb90269101f3543ad45/src/include/miopen/solver.hpp#L2609-L2611

If you would like to try CK before Naive, and use it instead (because we do expect that CK is faster than Naive), then please use value bigger than 0.01f, e.g. 0.02f. This is very rude heuristics, almost a hack, but may be better than nothing.

Bottom line: Please either remove GetWti() or make it returning 0.02f or alike (and add a comment that should explain why the fixed value is being returned).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification, I have updated the PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good, thanks for adding/re-using comments! Resolved.

};

struct AnySolver;

} // namespace solver
Expand Down
1 change: 1 addition & 0 deletions src/mlo_dir_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ static auto GetImplicitGemmSolvers()
miopen::solver::ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC,
miopen::solver::ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC,
miopen::solver::ConvCkIgemmFwdV6r1DlopsNchw,
miopen::solver::ConvHipImplicitGemmFwdXdlops,
miopen::solver::ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC>{};
}

Expand Down
7 changes: 1 addition & 6 deletions src/ocl/convolutionocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,10 +566,6 @@ void ValidateConvTensors(const ConvTensors& tensors)
const auto trivial_tensor_types_not_matched =
tensors.xDesc.GetType() != tensors.yDesc.GetType() &&
tensors.xDesc.GetType() != miopenInt8 && tensors.xDesc.GetType() != miopenInt8x4;
const auto int8_in8x4_tensor_not_matched =
(tensors.xDesc.GetType() == miopenInt8 && tensors.yDesc.GetType() != miopenInt32 &&
tensors.yDesc.GetType() != miopenFloat) ||
(tensors.xDesc.GetType() == miopenInt8x4 && tensors.yDesc.GetType() != miopenInt32);

// if(xDesc.GetLengths()[1] != wDesc.GetLengths()[1]) {
// MIOPEN_THROW(miopenStatusBadParm);
Expand All @@ -578,8 +574,7 @@ void ValidateConvTensors(const ConvTensors& tensors)
const auto x_tensor_invalid = tensors.xDesc.GetSize() < 3;

const auto bad_parameters = invalid_buffers || tensor_sizes_not_matched ||
trivial_tensor_types_not_matched || int8_in8x4_tensor_not_matched ||
x_tensor_invalid;
trivial_tensor_types_not_matched || x_tensor_invalid;

if(bad_parameters)
MIOPEN_THROW(miopenStatusBadParm);
Expand Down
2 changes: 2 additions & 0 deletions src/solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry)
++id,
ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC{},
miopenConvolutionAlgoImplicitGEMM);
RegisterWithSolver(
registry, ++id, ConvHipImplicitGemmFwdXdlops{}, miopenConvolutionAlgoImplicitGEMM);

// IMPORTANT: New solvers should be added to the end of the function!
}
Expand Down
Loading