Skip to content

Commit 522a91e

Browse files
Merge pull request #1409 from SixLabors/js/Shuffle3Channel
3 <==> 4 Channel Shuffling with Hardware Intrinsics
2 parents 8539c9e + 3ee5a38 commit 522a91e

23 files changed

+1866
-318
lines changed

src/ImageSharp/Common/Helpers/IComponentShuffle.cs renamed to src/ImageSharp/Common/Helpers/Shuffle/IComponentShuffle.cs

Lines changed: 70 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
88

9+
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
10+
// and ROTR (Rotate Right) emitting efficient CPU instructions:
11+
// https://github.com/dotnet/coreclr/pull/1830
912
namespace SixLabors.ImageSharp
1013
{
1114
/// <summary>
@@ -28,28 +31,44 @@ internal interface IComponentShuffle
2831
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
2932
}
3033

31-
internal readonly struct DefaultShuffle4 : IComponentShuffle
34+
/// <inheritdoc/>
35+
internal interface IShuffle4 : IComponentShuffle
3236
{
37+
}
38+
39+
internal readonly struct DefaultShuffle4 : IShuffle4
40+
{
41+
private readonly byte p3;
42+
private readonly byte p2;
43+
private readonly byte p1;
44+
private readonly byte p0;
45+
3346
public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
34-
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
3547
{
48+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
49+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
50+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
51+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
52+
53+
this.p3 = p3;
54+
this.p2 = p2;
55+
this.p1 = p1;
56+
this.p0 = p0;
57+
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
3658
}
3759

38-
public DefaultShuffle4(byte control) => this.Control = control;
39-
4060
public byte Control { get; }
4161

4262
[MethodImpl(InliningOptions.ShortMethod)]
4363
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
4464
{
4565
ref byte sBase = ref MemoryMarshal.GetReference(source);
4666
ref byte dBase = ref MemoryMarshal.GetReference(dest);
47-
SimdUtils.Shuffle.InverseMmShuffle(
48-
this.Control,
49-
out int p3,
50-
out int p2,
51-
out int p1,
52-
out int p0);
67+
68+
int p3 = this.p3;
69+
int p2 = this.p2;
70+
int p1 = this.p1;
71+
int p0 = this.p0;
5372

5473
for (int i = 0; i < source.Length; i += 4)
5574
{
@@ -61,22 +80,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
6180
}
6281
}
6382

64-
internal readonly struct WXYZShuffle4 : IComponentShuffle
83+
internal readonly struct WXYZShuffle4 : IShuffle4
6584
{
66-
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
85+
public byte Control
86+
{
87+
[MethodImpl(InliningOptions.ShortMethod)]
88+
get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
89+
}
6790

6891
[MethodImpl(InliningOptions.ShortMethod)]
6992
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
7093
{
71-
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
72-
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
73-
ref uint sBase = ref MemoryMarshal.GetReference(s);
74-
ref uint dBase = ref MemoryMarshal.GetReference(d);
75-
76-
// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
77-
// and ROTR (Rotate Right) emitting efficient CPU instructions:
78-
// https://github.com/dotnet/coreclr/pull/1830
79-
for (int i = 0; i < s.Length; i++)
94+
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
95+
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
96+
int n = source.Length / 4;
97+
98+
for (int i = 0; i < n; i++)
8099
{
81100
uint packed = Unsafe.Add(ref sBase, i);
82101

@@ -87,19 +106,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
87106
}
88107
}
89108

90-
internal readonly struct WZYXShuffle4 : IComponentShuffle
109+
internal readonly struct WZYXShuffle4 : IShuffle4
91110
{
92-
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
111+
public byte Control
112+
{
113+
[MethodImpl(InliningOptions.ShortMethod)]
114+
get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
115+
}
93116

94117
[MethodImpl(InliningOptions.ShortMethod)]
95118
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
96119
{
97-
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
98-
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
99-
ref uint sBase = ref MemoryMarshal.GetReference(s);
100-
ref uint dBase = ref MemoryMarshal.GetReference(d);
120+
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
121+
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
122+
int n = source.Length / 4;
101123

102-
for (int i = 0; i < s.Length; i++)
124+
for (int i = 0; i < n; i++)
103125
{
104126
uint packed = Unsafe.Add(ref sBase, i);
105127

@@ -110,19 +132,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
110132
}
111133
}
112134

113-
internal readonly struct YZWXShuffle4 : IComponentShuffle
135+
internal readonly struct YZWXShuffle4 : IShuffle4
114136
{
115-
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
137+
public byte Control
138+
{
139+
[MethodImpl(InliningOptions.ShortMethod)]
140+
get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
141+
}
116142

117143
[MethodImpl(InliningOptions.ShortMethod)]
118144
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
119145
{
120-
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
121-
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
122-
ref uint sBase = ref MemoryMarshal.GetReference(s);
123-
ref uint dBase = ref MemoryMarshal.GetReference(d);
146+
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
147+
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
148+
int n = source.Length / 4;
124149

125-
for (int i = 0; i < s.Length; i++)
150+
for (int i = 0; i < n; i++)
126151
{
127152
uint packed = Unsafe.Add(ref sBase, i);
128153

@@ -133,19 +158,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
133158
}
134159
}
135160

136-
internal readonly struct ZYXWShuffle4 : IComponentShuffle
161+
internal readonly struct ZYXWShuffle4 : IShuffle4
137162
{
138-
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
163+
public byte Control
164+
{
165+
[MethodImpl(InliningOptions.ShortMethod)]
166+
get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
167+
}
139168

140169
[MethodImpl(InliningOptions.ShortMethod)]
141170
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
142171
{
143-
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
144-
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
145-
ref uint sBase = ref MemoryMarshal.GetReference(s);
146-
ref uint dBase = ref MemoryMarshal.GetReference(d);
172+
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
173+
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
174+
int n = source.Length / 4;
147175

148-
for (int i = 0; i < s.Length; i++)
176+
for (int i = 0; i < n; i++)
149177
{
150178
uint packed = Unsafe.Add(ref sBase, i);
151179

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
using System.Runtime.InteropServices;
7+
8+
namespace SixLabors.ImageSharp
9+
{
10+
/// <inheritdoc/>
11+
internal interface IPad3Shuffle4 : IComponentShuffle
12+
{
13+
}
14+
15+
internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
16+
{
17+
private readonly byte p3;
18+
private readonly byte p2;
19+
private readonly byte p1;
20+
private readonly byte p0;
21+
22+
public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
23+
{
24+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
25+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
26+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
27+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));
28+
29+
this.p3 = p3;
30+
this.p2 = p2;
31+
this.p1 = p1;
32+
this.p0 = p0;
33+
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
34+
}
35+
36+
public byte Control { get; }
37+
38+
[MethodImpl(InliningOptions.ShortMethod)]
39+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
40+
{
41+
ref byte sBase = ref MemoryMarshal.GetReference(source);
42+
ref byte dBase = ref MemoryMarshal.GetReference(dest);
43+
44+
int p3 = this.p3;
45+
int p2 = this.p2;
46+
int p1 = this.p1;
47+
int p0 = this.p0;
48+
49+
Span<byte> temp = stackalloc byte[4];
50+
ref byte t = ref MemoryMarshal.GetReference(temp);
51+
ref uint tu = ref Unsafe.As<byte, uint>(ref t);
52+
53+
for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
54+
{
55+
ref var s = ref Unsafe.Add(ref sBase, i);
56+
tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;
57+
58+
Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
59+
Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
60+
Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
61+
Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
62+
}
63+
}
64+
}
65+
66+
internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
67+
{
68+
public byte Control
69+
{
70+
[MethodImpl(InliningOptions.ShortMethod)]
71+
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
72+
}
73+
74+
[MethodImpl(InliningOptions.ShortMethod)]
75+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
76+
{
77+
ref byte sBase = ref MemoryMarshal.GetReference(source);
78+
ref byte dBase = ref MemoryMarshal.GetReference(dest);
79+
80+
ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length);
81+
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);
82+
83+
while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
84+
{
85+
Unsafe.As<byte, uint>(ref dBase) = Unsafe.As<byte, uint>(ref sBase) | 0xFF000000;
86+
87+
sBase = ref Unsafe.Add(ref sBase, 3);
88+
dBase = ref Unsafe.Add(ref dBase, 4);
89+
}
90+
91+
while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
92+
{
93+
Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0);
94+
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1);
95+
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2);
96+
Unsafe.Add(ref dBase, 3) = byte.MaxValue;
97+
98+
sBase = ref Unsafe.Add(ref sBase, 3);
99+
dBase = ref Unsafe.Add(ref dBase, 4);
100+
}
101+
}
102+
}
103+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
using System.Runtime.InteropServices;
7+
8+
namespace SixLabors.ImageSharp
9+
{
10+
/// <inheritdoc/>
11+
internal interface IShuffle3 : IComponentShuffle
12+
{
13+
}
14+
15+
internal readonly struct DefaultShuffle3 : IShuffle3
16+
{
17+
private readonly byte p2;
18+
private readonly byte p1;
19+
private readonly byte p0;
20+
21+
public DefaultShuffle3(byte p2, byte p1, byte p0)
22+
{
23+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
24+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
25+
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));
26+
27+
this.p2 = p2;
28+
this.p1 = p1;
29+
this.p0 = p0;
30+
this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
31+
}
32+
33+
public byte Control { get; }
34+
35+
[MethodImpl(InliningOptions.ShortMethod)]
36+
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
37+
{
38+
ref byte sBase = ref MemoryMarshal.GetReference(source);
39+
ref byte dBase = ref MemoryMarshal.GetReference(dest);
40+
41+
int p2 = this.p2;
42+
int p1 = this.p1;
43+
int p0 = this.p0;
44+
45+
for (int i = 0; i < source.Length; i += 3)
46+
{
47+
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
48+
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
49+
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
50+
}
51+
}
52+
}
53+
}

0 commit comments

Comments
 (0)