66using System . Runtime . CompilerServices ;
77using System . Runtime . InteropServices ;
88
9+ // The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
10+ // and ROTR (Rotate Right) emitting efficient CPU instructions:
11+ // https://github.com/dotnet/coreclr/pull/1830
912namespace SixLabors . ImageSharp
1013{
1114 /// <summary>
@@ -28,28 +31,44 @@ internal interface IComponentShuffle
2831 void RunFallbackShuffle ( ReadOnlySpan < byte > source , Span < byte > dest ) ;
2932 }
3033
31- internal readonly struct DefaultShuffle4 : IComponentShuffle
34+ /// <inheritdoc/>
35+ internal interface IShuffle4 : IComponentShuffle
3236 {
37+ }
38+
39+ internal readonly struct DefaultShuffle4 : IShuffle4
40+ {
41+ private readonly byte p3 ;
42+ private readonly byte p2 ;
43+ private readonly byte p1 ;
44+ private readonly byte p0 ;
45+
3346 public DefaultShuffle4 ( byte p3 , byte p2 , byte p1 , byte p0 )
34- : this ( SimdUtils . Shuffle . MmShuffle ( p3 , p2 , p1 , p0 ) )
3547 {
48+ DebugGuard . MustBeBetweenOrEqualTo < byte > ( p3 , 0 , 3 , nameof ( p3 ) ) ;
49+ DebugGuard . MustBeBetweenOrEqualTo < byte > ( p2 , 0 , 3 , nameof ( p2 ) ) ;
50+ DebugGuard . MustBeBetweenOrEqualTo < byte > ( p1 , 0 , 3 , nameof ( p1 ) ) ;
51+ DebugGuard . MustBeBetweenOrEqualTo < byte > ( p0 , 0 , 3 , nameof ( p0 ) ) ;
52+
53+ this . p3 = p3 ;
54+ this . p2 = p2 ;
55+ this . p1 = p1 ;
56+ this . p0 = p0 ;
57+ this . Control = SimdUtils . Shuffle . MmShuffle ( p3 , p2 , p1 , p0 ) ;
3658 }
3759
38- public DefaultShuffle4 ( byte control ) => this . Control = control ;
39-
4060 public byte Control { get ; }
4161
4262 [ MethodImpl ( InliningOptions . ShortMethod ) ]
4363 public void RunFallbackShuffle ( ReadOnlySpan < byte > source , Span < byte > dest )
4464 {
4565 ref byte sBase = ref MemoryMarshal . GetReference ( source ) ;
4666 ref byte dBase = ref MemoryMarshal . GetReference ( dest ) ;
47- SimdUtils . Shuffle . InverseMmShuffle (
48- this . Control ,
49- out int p3 ,
50- out int p2 ,
51- out int p1 ,
52- out int p0 ) ;
67+
68+ int p3 = this . p3 ;
69+ int p2 = this . p2 ;
70+ int p1 = this . p1 ;
71+ int p0 = this . p0 ;
5372
5473 for ( int i = 0 ; i < source . Length ; i += 4 )
5574 {
@@ -61,22 +80,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
6180 }
6281 }
6382
64- internal readonly struct WXYZShuffle4 : IComponentShuffle
83+ internal readonly struct WXYZShuffle4 : IShuffle4
6584 {
66- public byte Control => SimdUtils . Shuffle . MmShuffle ( 2 , 1 , 0 , 3 ) ;
85+ public byte Control
86+ {
87+ [ MethodImpl ( InliningOptions . ShortMethod ) ]
88+ get => SimdUtils . Shuffle . MmShuffle ( 2 , 1 , 0 , 3 ) ;
89+ }
6790
6891 [ MethodImpl ( InliningOptions . ShortMethod ) ]
6992 public void RunFallbackShuffle ( ReadOnlySpan < byte > source , Span < byte > dest )
7093 {
71- ReadOnlySpan < uint > s = MemoryMarshal . Cast < byte , uint > ( source ) ;
72- Span < uint > d = MemoryMarshal . Cast < byte , uint > ( dest ) ;
73- ref uint sBase = ref MemoryMarshal . GetReference ( s ) ;
74- ref uint dBase = ref MemoryMarshal . GetReference ( d ) ;
75-
76- // The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
77- // and ROTR (Rotate Right) emitting efficient CPU instructions:
78- // https://github.com/dotnet/coreclr/pull/1830
79- for ( int i = 0 ; i < s . Length ; i ++ )
94+ ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
95+ ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( dest ) ) ;
96+ int n = source . Length / 4 ;
97+
98+ for ( int i = 0 ; i < n ; i ++ )
8099 {
81100 uint packed = Unsafe . Add ( ref sBase , i ) ;
82101
@@ -87,19 +106,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
87106 }
88107 }
89108
90- internal readonly struct WZYXShuffle4 : IComponentShuffle
109+ internal readonly struct WZYXShuffle4 : IShuffle4
91110 {
92- public byte Control => SimdUtils . Shuffle . MmShuffle ( 0 , 1 , 2 , 3 ) ;
111+ public byte Control
112+ {
113+ [ MethodImpl ( InliningOptions . ShortMethod ) ]
114+ get => SimdUtils . Shuffle . MmShuffle ( 0 , 1 , 2 , 3 ) ;
115+ }
93116
94117 [ MethodImpl ( InliningOptions . ShortMethod ) ]
95118 public void RunFallbackShuffle ( ReadOnlySpan < byte > source , Span < byte > dest )
96119 {
97- ReadOnlySpan < uint > s = MemoryMarshal . Cast < byte , uint > ( source ) ;
98- Span < uint > d = MemoryMarshal . Cast < byte , uint > ( dest ) ;
99- ref uint sBase = ref MemoryMarshal . GetReference ( s ) ;
100- ref uint dBase = ref MemoryMarshal . GetReference ( d ) ;
120+ ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
121+ ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( dest ) ) ;
122+ int n = source . Length / 4 ;
101123
102- for ( int i = 0 ; i < s . Length ; i ++ )
124+ for ( int i = 0 ; i < n ; i ++ )
103125 {
104126 uint packed = Unsafe . Add ( ref sBase , i ) ;
105127
@@ -110,19 +132,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
110132 }
111133 }
112134
113- internal readonly struct YZWXShuffle4 : IComponentShuffle
135+ internal readonly struct YZWXShuffle4 : IShuffle4
114136 {
115- public byte Control => SimdUtils . Shuffle . MmShuffle ( 0 , 3 , 2 , 1 ) ;
137+ public byte Control
138+ {
139+ [ MethodImpl ( InliningOptions . ShortMethod ) ]
140+ get => SimdUtils . Shuffle . MmShuffle ( 0 , 3 , 2 , 1 ) ;
141+ }
116142
117143 [ MethodImpl ( InliningOptions . ShortMethod ) ]
118144 public void RunFallbackShuffle ( ReadOnlySpan < byte > source , Span < byte > dest )
119145 {
120- ReadOnlySpan < uint > s = MemoryMarshal . Cast < byte , uint > ( source ) ;
121- Span < uint > d = MemoryMarshal . Cast < byte , uint > ( dest ) ;
122- ref uint sBase = ref MemoryMarshal . GetReference ( s ) ;
123- ref uint dBase = ref MemoryMarshal . GetReference ( d ) ;
146+ ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
147+ ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( dest ) ) ;
148+ int n = source . Length / 4 ;
124149
125- for ( int i = 0 ; i < s . Length ; i ++ )
150+ for ( int i = 0 ; i < n ; i ++ )
126151 {
127152 uint packed = Unsafe . Add ( ref sBase , i ) ;
128153
@@ -133,19 +158,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
133158 }
134159 }
135160
136- internal readonly struct ZYXWShuffle4 : IComponentShuffle
161+ internal readonly struct ZYXWShuffle4 : IShuffle4
137162 {
138- public byte Control => SimdUtils . Shuffle . MmShuffle ( 3 , 0 , 1 , 2 ) ;
163+ public byte Control
164+ {
165+ [ MethodImpl ( InliningOptions . ShortMethod ) ]
166+ get => SimdUtils . Shuffle . MmShuffle ( 3 , 0 , 1 , 2 ) ;
167+ }
139168
140169 [ MethodImpl ( InliningOptions . ShortMethod ) ]
141170 public void RunFallbackShuffle ( ReadOnlySpan < byte > source , Span < byte > dest )
142171 {
143- ReadOnlySpan < uint > s = MemoryMarshal . Cast < byte , uint > ( source ) ;
144- Span < uint > d = MemoryMarshal . Cast < byte , uint > ( dest ) ;
145- ref uint sBase = ref MemoryMarshal . GetReference ( s ) ;
146- ref uint dBase = ref MemoryMarshal . GetReference ( d ) ;
172+ ref uint sBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( source ) ) ;
173+ ref uint dBase = ref Unsafe . As < byte , uint > ( ref MemoryMarshal . GetReference ( dest ) ) ;
174+ int n = source . Length / 4 ;
147175
148- for ( int i = 0 ; i < s . Length ; i ++ )
176+ for ( int i = 0 ; i < n ; i ++ )
149177 {
150178 uint packed = Unsafe . Add ( ref sBase , i ) ;
151179
0 commit comments