Skip to content

Commit c08bd7b

Browse files
cshungjanvorliam11
authored
Getting vxsort working on Linux amd64 (#98712)
Co-authored-by: Jan Vorlicek <[email protected]> Co-authored-by: Adeel Mujahid <[email protected]>
1 parent 71f45aa commit c08bd7b

32 files changed

+142
-141
lines changed

src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ set(CORECLR_LIBRARIES
111111
gc_pal
112112
)
113113

114+
if(CLR_CMAKE_TARGET_ARCH_AMD64)
115+
list(APPEND CORECLR_LIBRARIES
116+
gc_vxsort
117+
)
118+
endif(CLR_CMAKE_TARGET_ARCH_AMD64)
119+
114120
if(CLR_CMAKE_TARGET_WIN32)
115121
list(APPEND CORECLR_LIBRARIES
116122
${STATIC_MT_CRT_LIB}

src/coreclr/gc/CMakeLists.txt

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,9 @@ else()
3636
windows/Native.rc)
3737
endif(CLR_CMAKE_HOST_UNIX)
3838

39-
if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
40-
set (GC_SOURCES
41-
${GC_SOURCES}
42-
vxsort/isa_detection.cpp
43-
vxsort/do_vxsort_avx2.cpp
44-
vxsort/do_vxsort_avx512.cpp
45-
vxsort/machine_traits.avx2.cpp
46-
vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
47-
vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
48-
vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
49-
vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
50-
vxsort/smallsort/avx2_load_mask_tables.cpp
51-
)
52-
endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
39+
if (CLR_CMAKE_TARGET_ARCH_AMD64)
40+
add_subdirectory(vxsort)
41+
endif (CLR_CMAKE_TARGET_ARCH_AMD64)
5342

5443
if (CLR_CMAKE_TARGET_WIN32)
5544
set(GC_HEADERS
@@ -87,7 +76,7 @@ if (CLR_CMAKE_TARGET_WIN32)
8776
handletablepriv.h
8877
objecthandle.h
8978
softwarewritewatch.h
90-
vxsort/do_vxsort.h)
79+
)
9180
endif(CLR_CMAKE_TARGET_WIN32)
9281

9382
if(CLR_CMAKE_HOST_WIN32)
@@ -100,6 +89,13 @@ endif(CLR_CMAKE_HOST_WIN32)
10089

10190
set (GC_LINK_LIBRARIES ${GC_LINK_LIBRARIES} gc_pal)
10291

92+
if(CLR_CMAKE_TARGET_ARCH_AMD64)
93+
list(APPEND GC_LINK_LIBRARIES
94+
gc_vxsort
95+
)
96+
endif(CLR_CMAKE_TARGET_ARCH_AMD64)
97+
98+
10399
list(APPEND GC_SOURCES ${GC_HEADERS})
104100

105101
convert_to_absolute_path(GC_SOURCES ${GC_SOURCES})

src/coreclr/gc/gc.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "gcpriv.h"
2020

21-
#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
21+
#ifdef TARGET_AMD64
2222
#define USE_VXSORT
2323
#else
2424
#define USE_INTROSORT
@@ -10305,11 +10305,11 @@ static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* rang
1030510305
{
1030610306
// above this threshold, using AVX2 for sorting will likely pay off
1030710307
// despite possible downclocking on some devices
10308-
const size_t AVX2_THRESHOLD_SIZE = 8 * 1024;
10308+
const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024;
1030910309

1031010310
// above this threshold, using AVX512F for sorting will likely pay off
1031110311
// despite possible downclocking on current devices
10312-
const size_t AVX512F_THRESHOLD_SIZE = 128 * 1024;
10312+
const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024;
1031310313

1031410314
if (item_count <= 1)
1031510315
return;

src/coreclr/gc/gcsvr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
#define SERVER_GC 1
2222

23-
#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
23+
#ifdef TARGET_AMD64
2424
#include "vxsort/do_vxsort.h"
2525
#endif
2626

src/coreclr/gc/gcwks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#undef SERVER_GC
2121
#endif
2222

23-
#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
23+
#ifdef TARGET_AMD64
2424
#include "vxsort/do_vxsort.h"
2525
#endif
2626

src/coreclr/gc/unix/gcenv.unix.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,6 @@
3535
#define __has_cpp_attribute(x) (0)
3636
#endif
3737

38-
#if __has_cpp_attribute(fallthrough)
39-
#define FALLTHROUGH [[fallthrough]]
40-
#else
41-
#define FALLTHROUGH
42-
#endif
43-
4438
#include <algorithm>
4539

4640
#if HAVE_SYS_TIME_H
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
set(CMAKE_INCLUDE_CURRENT_DIR ON)
2+
include_directories("../env")
3+
4+
if(CLR_CMAKE_HOST_UNIX)
5+
set_source_files_properties(isa_detection.cpp PROPERTIES COMPILE_FLAGS -mavx2)
6+
set_source_files_properties(do_vxsort_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
7+
set_source_files_properties(do_vxsort_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx2)
8+
set_source_files_properties(machine_traits.avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
9+
set_source_files_properties(smallsort/bitonic_sort.AVX2.int64_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
10+
set_source_files_properties(smallsort/bitonic_sort.AVX2.int32_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
11+
set_source_files_properties(smallsort/bitonic_sort.AVX512.int64_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
12+
set_source_files_properties(smallsort/bitonic_sort.AVX512.int32_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
13+
set_source_files_properties(smallsort/avx2_load_mask_tables.cpp PROPERTIES COMPILE_FLAGS -mavx2)
14+
endif(CLR_CMAKE_HOST_UNIX)
15+
16+
set (VXSORT_SOURCES
17+
isa_detection.cpp
18+
do_vxsort_avx2.cpp
19+
do_vxsort_avx512.cpp
20+
machine_traits.avx2.cpp
21+
smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
22+
smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
23+
smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
24+
smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
25+
smallsort/avx2_load_mask_tables.cpp
26+
do_vxsort.h
27+
)
28+
29+
add_library(gc_vxsort STATIC ${VXSORT_SOURCES})

src/coreclr/gc/vxsort/defs.h

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -45,36 +45,7 @@
4545
#define NOINLINE __attribute__((noinline))
4646
#endif
4747

48-
namespace std {
49-
template <class _Ty>
50-
class numeric_limits {
51-
public:
52-
static constexpr _Ty Max() { static_assert(sizeof(_Ty) != sizeof(_Ty), "func must be specialized!"); return _Ty(); }
53-
static constexpr _Ty Min() { static_assert(sizeof(_Ty) != sizeof(_Ty), "func must be specialized!"); return _Ty(); }
54-
};
55-
56-
template <>
57-
class numeric_limits<int32_t> {
58-
public:
59-
static constexpr int32_t Max() { return 0x7fffffff; }
60-
static constexpr int32_t Min() { return -0x7fffffff - 1; }
61-
};
62-
63-
template <>
64-
class numeric_limits<uint32_t> {
65-
public:
66-
static constexpr uint32_t Max() { return 0xffffffff; }
67-
static constexpr uint32_t Min() { return 0; }
68-
};
69-
70-
template <>
71-
class numeric_limits<int64_t> {
72-
public:
73-
static constexpr int64_t Max() { return 0x7fffffffffffffffi64; }
74-
75-
static constexpr int64_t Min() { return -0x7fffffffffffffffi64 - 1; }
76-
};
77-
} // namespace std
48+
#include <limits>
7849

7950
#ifndef max
8051
template <typename T>

src/coreclr/gc/vxsort/machine_traits.avx2.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <immintrin.h>
1414
#include <assert.h>
1515
#include <inttypes.h>
16+
#include <type_traits>
1617
#include "defs.h"
1718
#include "machine_traits.h"
1819

@@ -123,8 +124,7 @@ class vxsort_machine_traits<int64_t, AVX2> {
123124

124125
template <int Shift>
125126
static constexpr bool can_pack(T span) {
126-
const auto PACK_LIMIT = (((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift;
127-
return ((TU) span) < PACK_LIMIT;
127+
return ((TU) span) < ((((TU) std::numeric_limits<uint32_t>::max() + 1)) << Shift);
128128
}
129129

130130
static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); }

src/coreclr/gc/vxsort/machine_traits.avx512.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "vxsort_targets_enable_avx512.h"
1212

1313
#include <immintrin.h>
14+
#include <type_traits>
1415
#include "defs.h"
1516
#include "machine_traits.h"
1617

@@ -92,8 +93,7 @@ class vxsort_machine_traits<int64_t, AVX512> {
9293

9394
template <int Shift>
9495
static constexpr bool can_pack(T span) {
95-
const auto PACK_LIMIT = (((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift;
96-
return ((TU) span) < PACK_LIMIT;
96+
return ((TU) span) < ((((TU) std::numeric_limits<uint32_t>::max() + 1)) << Shift);
9797
}
9898

9999
static INLINE TV load_vec(TV* p) { return _mm512_loadu_si512(p); }

0 commit comments

Comments
 (0)