Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
49e9364
Initial 3padshuffle4
JimBobSquarePants Oct 30, 2020
1d21dc9
Add Shuffle4Slice3
JimBobSquarePants Oct 30, 2020
9f38d40
Cleanup
JimBobSquarePants Oct 30, 2020
2421a56
Merge branch 'master' into js/Shuffle3Channel
JimBobSquarePants Oct 30, 2020
1b85483
fix spans directly
JimBobSquarePants Oct 31, 2020
21611e1
Faster Pad3Shuffle4
JimBobSquarePants Oct 31, 2020
f462bfe
Faster Shuffle4Slice3
JimBobSquarePants Oct 31, 2020
2d1f2cc
Update benchmark
JimBobSquarePants Oct 31, 2020
d5b2577
Fast fallbacks
JimBobSquarePants Nov 1, 2020
893bfdd
Don't cast full spans
JimBobSquarePants Nov 1, 2020
76d5277
Shuffle3 + Tests
JimBobSquarePants Nov 2, 2020
49062c4
Cleanup and fix tests
JimBobSquarePants Nov 2, 2020
8c32469
Fix Shuffle4Slice3, wire up shuffles.
JimBobSquarePants Nov 2, 2020
1f73b21
Add Rgb24 <==> Vector4 benchmarks
JimBobSquarePants Nov 2, 2020
a08f906
Unroll XYZWShuffle4Slice3
JimBobSquarePants Nov 4, 2020
4416d3d
Fix shuffle +m slice fallback
JimBobSquarePants Nov 4, 2020
11cc6af
Inline controls as constants
JimBobSquarePants Nov 4, 2020
1ad9fcd
Handle Bmp encoder padding.
JimBobSquarePants Nov 6, 2020
e1168ad
Update src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
JimBobSquarePants Nov 6, 2020
a46fb9b
Merge branch 'js/Shuffle3Channel' of https://github.com/SixLabors/Ima…
JimBobSquarePants Nov 6, 2020
74dd8cd
Use ROS trick all round and optimize Shuffle3
JimBobSquarePants Nov 6, 2020
56cfd96
Merge branch 'master' into js/Shuffle3Channel
JimBobSquarePants Nov 6, 2020
3cda066
Fix shuffle
JimBobSquarePants Nov 6, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
namespace SixLabors.ImageSharp
{
/// <summary>
Expand All @@ -28,28 +31,44 @@ internal interface IComponentShuffle
void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest);
}

internal readonly struct DefaultShuffle4 : IComponentShuffle
/// <inheritdoc/>
internal interface IShuffle4 : IComponentShuffle
{
}

internal readonly struct DefaultShuffle4 : IShuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;

public DefaultShuffle4(byte p3, byte p2, byte p1, byte p0)
: this(SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0))
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));

this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}

public DefaultShuffle4(byte control) => this.Control = control;

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);
SimdUtils.Shuffle.InverseMmShuffle(
this.Control,
out int p3,
out int p2,
out int p1,
out int p0);

int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;

for (int i = 0; i < source.Length; i += 4)
{
Expand All @@ -61,22 +80,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct WXYZShuffle4 : IComponentShuffle
internal readonly struct WXYZShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(2, 1, 0, 3);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);

// The JIT can detect and optimize rotation idioms ROTL (Rotate Left)
// and ROTR (Rotate Right) emitting efficient CPU instructions:
// https://github.com/dotnet/coreclr/pull/1830
for (int i = 0; i < s.Length; i++)
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand All @@ -87,19 +106,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct WZYXShuffle4 : IComponentShuffle
internal readonly struct WZYXShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 1, 2, 3);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand All @@ -110,19 +132,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct YZWXShuffle4 : IComponentShuffle
internal readonly struct YZWXShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(0, 3, 2, 1);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand All @@ -133,19 +158,22 @@ public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
}
}

internal readonly struct ZYXWShuffle4 : IComponentShuffle
internal readonly struct ZYXWShuffle4 : IShuffle4
{
public byte Control => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 0, 1, 2);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ReadOnlySpan<uint> s = MemoryMarshal.Cast<byte, uint>(source);
Span<uint> d = MemoryMarshal.Cast<byte, uint>(dest);
ref uint sBase = ref MemoryMarshal.GetReference(s);
ref uint dBase = ref MemoryMarshal.GetReference(d);
ref uint sBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(source));
ref uint dBase = ref Unsafe.As<byte, uint>(ref MemoryMarshal.GetReference(dest));
int n = source.Length / 4;

for (int i = 0; i < s.Length; i++)
for (int i = 0; i < n; i++)
{
uint packed = Unsafe.Add(ref sBase, i);

Expand Down
103 changes: 103 additions & 0 deletions src/ImageSharp/Common/Helpers/Shuffle/IPad3Shuffle4.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IPad3Shuffle4 : IComponentShuffle
{
}

internal readonly struct DefaultPad3Shuffle4 : IPad3Shuffle4
{
private readonly byte p3;
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;

public DefaultPad3Shuffle4(byte p3, byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p3, 0, 3, nameof(p3));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 3, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 3, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 3, nameof(p0));

this.p3 = p3;
this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(p3, p2, p1, p0);
}

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

int p3 = this.p3;
int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;

Span<byte> temp = stackalloc byte[4];
ref byte t = ref MemoryMarshal.GetReference(temp);
ref uint tu = ref Unsafe.As<byte, uint>(ref t);

for (int i = 0, j = 0; i < source.Length; i += 3, j += 4)
{
ref var s = ref Unsafe.Add(ref sBase, i);
tu = Unsafe.As<byte, uint>(ref s) | 0xFF000000;

Unsafe.Add(ref dBase, j) = Unsafe.Add(ref t, p0);
Unsafe.Add(ref dBase, j + 1) = Unsafe.Add(ref t, p1);
Unsafe.Add(ref dBase, j + 2) = Unsafe.Add(ref t, p2);
Unsafe.Add(ref dBase, j + 3) = Unsafe.Add(ref t, p3);
}
}
}

internal readonly struct XYZWPad3Shuffle4 : IPad3Shuffle4
{
public byte Control
{
[MethodImpl(InliningOptions.ShortMethod)]
get => SimdUtils.Shuffle.MmShuffle(3, 2, 1, 0);
}

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

ref byte sEnd = ref Unsafe.Add(ref sBase, source.Length);
ref byte sLoopEnd = ref Unsafe.Subtract(ref sEnd, 4);

while (Unsafe.IsAddressLessThan(ref sBase, ref sLoopEnd))
{
Unsafe.As<byte, uint>(ref dBase) = Unsafe.As<byte, uint>(ref sBase) | 0xFF000000;

sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}

while (Unsafe.IsAddressLessThan(ref sBase, ref sEnd))
{
Unsafe.Add(ref dBase, 0) = Unsafe.Add(ref sBase, 0);
Unsafe.Add(ref dBase, 1) = Unsafe.Add(ref sBase, 1);
Unsafe.Add(ref dBase, 2) = Unsafe.Add(ref sBase, 2);
Unsafe.Add(ref dBase, 3) = byte.MaxValue;

sBase = ref Unsafe.Add(ref sBase, 3);
dBase = ref Unsafe.Add(ref dBase, 4);
}
}
}
}
53 changes: 53 additions & 0 deletions src/ImageSharp/Common/Helpers/Shuffle/IShuffle3.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace SixLabors.ImageSharp
{
/// <inheritdoc/>
internal interface IShuffle3 : IComponentShuffle
{
}

internal readonly struct DefaultShuffle3 : IShuffle3
{
private readonly byte p2;
private readonly byte p1;
private readonly byte p0;

public DefaultShuffle3(byte p2, byte p1, byte p0)
{
DebugGuard.MustBeBetweenOrEqualTo<byte>(p2, 0, 2, nameof(p2));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p1, 0, 2, nameof(p1));
DebugGuard.MustBeBetweenOrEqualTo<byte>(p0, 0, 2, nameof(p0));

this.p2 = p2;
this.p1 = p1;
this.p0 = p0;
this.Control = SimdUtils.Shuffle.MmShuffle(3, p2, p1, p0);
}

public byte Control { get; }

[MethodImpl(InliningOptions.ShortMethod)]
public void RunFallbackShuffle(ReadOnlySpan<byte> source, Span<byte> dest)
{
ref byte sBase = ref MemoryMarshal.GetReference(source);
ref byte dBase = ref MemoryMarshal.GetReference(dest);

int p2 = this.p2;
int p1 = this.p1;
int p0 = this.p0;

for (int i = 0; i < source.Length; i += 3)
{
Unsafe.Add(ref dBase, i) = Unsafe.Add(ref sBase, p0 + i);
Unsafe.Add(ref dBase, i + 1) = Unsafe.Add(ref sBase, p1 + i);
Unsafe.Add(ref dBase, i + 2) = Unsafe.Add(ref sBase, p2 + i);
}
}
}
}
Loading