Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 19 additions & 17 deletions src/ImageSharp/Common/Helpers/Numerics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -780,25 +780,13 @@ public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int ReduceSum(Vector128<int> accumulator)
{
if (Ssse3.IsSupported)
{
Vector128<int> hadd = Ssse3.HorizontalAdd(accumulator, accumulator);
Vector128<int> swapped = Sse2.Shuffle(hadd, 0x1);
Vector128<int> tmp = Sse2.Add(hadd, swapped);
// Add odd to even.
Vector128<int> vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_11_01_01));

// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
return Sse2.ConvertToInt32(tmp);
}
else
{
int sum = 0;
for (int i = 0; i < Vector128<int>.Count; i++)
{
sum += accumulator.GetElement(i);
}
// Add high to low.
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10));

return sum;
}
return Sse2.ConvertToInt32(vsum);
}

/// <summary>
Expand All @@ -821,6 +809,20 @@ public static int ReduceSum(Vector256<int> accumulator)
return Sse2.ConvertToInt32(vsum);
}

/// <summary>
/// Reduces even elements of the vector into one sum.
/// </summary>
/// <param name="accumulator">The accumulator to reduce.</param>
/// <returns>The sum of even elements.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int EvenReduceSum(Vector128<int> accumulator)
{
// Add high to low.
Vector128<int> vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_10_11_10));

return Sse2.ConvertToInt32(vsum);
}

/// <summary>
/// Reduces even elements of the vector into one sum.
/// </summary>
Expand Down
39 changes: 12 additions & 27 deletions src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
sum = 0;

// Average(x) = Raw(x) - floor((Raw(x-bpp)+Prior(x))/2)
resultBaseRef = 3;
resultBaseRef = (byte)FilterType.Average;

int x = 0;
nint x = 0;
for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
Expand All @@ -138,7 +138,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
Vector256<int> sumAccumulator = Vector256<int>.Zero;
Vector256<byte> allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();

for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256<byte>.Count; xLeft += Vector256<byte>.Count)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Expand All @@ -157,12 +157,11 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
}
else if (Sse2.IsSupported)
{
Vector128<sbyte> zero8 = Vector128<sbyte>.Zero;
Vector128<short> zero16 = Vector128<short>.Zero;
Vector128<byte> zero = Vector128<byte>.Zero;
Vector128<int> sumAccumulator = Vector128<int>.Zero;
Vector128<byte> allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();

for (int xLeft = x - bytesPerPixel; x + Vector128<byte>.Count <= scanline.Length; xLeft += Vector128<byte>.Count)
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector128<byte>.Count; xLeft += Vector128<byte>.Count)
{
Vector128<byte> scan = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector128<byte> left = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Expand All @@ -174,36 +173,24 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
x += Vector128<byte>.Count;

Vector128<sbyte> absRes;
Vector128<byte> absRes;
if (Ssse3.IsSupported)
{
absRes = Ssse3.Abs(res.AsSByte()).AsSByte();
absRes = Ssse3.Abs(res.AsSByte());
}
else
{
Vector128<sbyte> mask = Sse2.CompareGreaterThan(res.AsSByte(), zero8);
mask = Sse2.Xor(mask, allBitsSet.AsSByte());
absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask);
Vector128<sbyte> mask = Sse2.CompareGreaterThan(zero.AsSByte(), res.AsSByte());
absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask).AsByte();
}

Vector128<short> loRes16 = Sse2.UnpackLow(absRes, zero8).AsInt16();
Vector128<short> hiRes16 = Sse2.UnpackHigh(absRes, zero8).AsInt16();

Vector128<int> loRes32 = Sse2.UnpackLow(loRes16, zero16).AsInt32();
Vector128<int> hiRes32 = Sse2.UnpackHigh(loRes16, zero16).AsInt32();
sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);

loRes32 = Sse2.UnpackLow(hiRes16, zero16).AsInt32();
hiRes32 = Sse2.UnpackHigh(hiRes16, zero16).AsInt32();
sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
sumAccumulator = Sse2.Add(sumAccumulator, Sse2.SumAbsoluteDifferences(absRes, zero).AsInt32());
}

sum += Numerics.ReduceSum(sumAccumulator);
sum += Numerics.EvenReduceSum(sumAccumulator);
}

for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
byte left = Unsafe.Add(ref scanBaseRef, xLeft);
Expand All @@ -213,8 +200,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
res = (byte)(scan - Average(left, above));
sum += Numerics.Abs(unchecked((sbyte)res));
}

sum -= 3;
}

/// <summary>
Expand Down
6 changes: 3 additions & 3 deletions src/ImageSharp/Formats/Png/Filters/NoneFilter.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System;
Expand All @@ -21,8 +21,8 @@ internal static class NoneFilter
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Encode(ReadOnlySpan<byte> scanline, Span<byte> result)
{
// Insert a byte before the data.
result[0] = 0;
// Insert row filter byte before the data.
result[0] = (byte)FilterType.None;
result = result[1..];
scanline[..Math.Min(scanline.Length, result.Length)].CopyTo(result);
}
Expand Down
29 changes: 15 additions & 14 deletions src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private static void DecodeScalar(Span<byte> scanline, Span<byte> previousScanlin

// Paeth(x) + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp))
int offset = bytesPerPixel + 1; // Add one because x starts at one.
int x = 1;
nint x = 1;
for (; x < offset; x++)
{
ref byte scan = ref Unsafe.Add(ref scanBaseRef, x);
Expand Down Expand Up @@ -146,9 +146,9 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
sum = 0;

// Paeth(x) = Raw(x) - PaethPredictor(Raw(x-bpp), Prior(x), Prior(x - bpp))
resultBaseRef = 4;
resultBaseRef = (byte)FilterType.Paeth;

int x = 0;
nint x = 0;
for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
Expand All @@ -164,7 +164,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
Vector256<byte> zero = Vector256<byte>.Zero;
Vector256<int> sumAccumulator = Vector256<int>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256<byte>.Count; xLeft += Vector256<byte>.Count)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Expand All @@ -184,7 +184,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector<byte>.Count <= scanline.Length; xLeft += Vector<byte>.Count)
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector<byte>.Count; xLeft += Vector<byte>.Count)
{
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector<byte> left = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Expand All @@ -204,7 +204,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
}
}

for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
byte left = Unsafe.Add(ref scanBaseRef, xLeft);
Expand All @@ -215,8 +215,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
res = (byte)(scan - PaethPredictor(left, above, upperLeft));
sum += Numerics.Abs(unchecked((sbyte)res));
}

sum -= 4;
}

/// <summary>
Expand Down Expand Up @@ -250,6 +248,7 @@ private static byte PaethPredictor(byte left, byte above, byte upperLeft)
return upperLeft;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> PaethPredictor(Vector256<byte> left, Vector256<byte> above, Vector256<byte> upleft)
{
Vector256<byte> zero = Vector256<byte>.Zero;
Expand Down Expand Up @@ -282,6 +281,7 @@ private static Vector256<byte> PaethPredictor(Vector256<byte> left, Vector256<by
return Avx2.BlendVariable(resbc, left, Avx2.CompareEqual(Avx2.Min(minbc, pa), pa));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above, Vector<byte> upperLeft)
{
Vector.Widen(left, out Vector<ushort> a1, out Vector<ushort> a2);
Expand All @@ -293,16 +293,17 @@ private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above
return Vector.AsVectorByte(Vector.Narrow(p1, p2));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<short> PaethPredictor(Vector<short> left, Vector<short> above, Vector<short> upperLeft)
{
Vector<short> p = left + above - upperLeft;
var pa = Vector.Abs(p - left);
var pb = Vector.Abs(p - above);
var pc = Vector.Abs(p - upperLeft);
Vector<short> pa = Vector.Abs(p - left);
Vector<short> pb = Vector.Abs(p - above);
Vector<short> pc = Vector.Abs(p - upperLeft);

var pa_pb = Vector.LessThanOrEqual(pa, pb);
var pa_pc = Vector.LessThanOrEqual(pa, pc);
var pb_pc = Vector.LessThanOrEqual(pb, pc);
Vector<short> pa_pb = Vector.LessThanOrEqual(pa, pb);
Vector<short> pa_pc = Vector.LessThanOrEqual(pa, pc);
Vector<short> pb_pc = Vector.LessThanOrEqual(pb, pc);

return Vector.ConditionalSelect(
condition: Vector.BitwiseAnd(pa_pb, pa_pc),
Expand Down
12 changes: 5 additions & 7 deletions src/ImageSharp/Formats/Png/Filters/SubFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
sum = 0;

// Sub(x) = Raw(x) - Raw(x-bpp)
resultBaseRef = 1;
resultBaseRef = (byte)FilterType.Sub;

int x = 0;
nint x = 0;
for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
Expand All @@ -108,7 +108,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
Vector256<byte> zero = Vector256<byte>.Zero;
Vector256<int> sumAccumulator = Vector256<int>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256<byte>.Count; xLeft += Vector256<byte>.Count)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> prev = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Expand All @@ -126,7 +126,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

for (int xLeft = x - bytesPerPixel; x + Vector<byte>.Count <= scanline.Length; xLeft += Vector<byte>.Count)
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector<byte>.Count; xLeft += Vector<byte>.Count)
{
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector<byte> prev = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
Expand All @@ -144,7 +144,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
}
}

for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
{
byte scan = Unsafe.Add(ref scanBaseRef, x);
byte prev = Unsafe.Add(ref scanBaseRef, xLeft);
Expand All @@ -153,8 +153,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
res = (byte)(scan - prev);
sum += Numerics.Abs(unchecked((sbyte)res));
}

sum--;
}
}
}
26 changes: 11 additions & 15 deletions src/ImageSharp/Formats/Png/Filters/UpFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ private static void DecodeAvx2(Span<byte> scanline, Span<byte> previousScanline)
// Up(x) + Prior(x)
int rb = scanline.Length;
nint offset = 1;
const int bytesPerBatch = 32;
while (rb >= bytesPerBatch)
while (rb >= Vector256<byte>.Count)
{
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
Vector256<byte> current = Unsafe.As<byte, Vector256<byte>>(ref scanRef);
Expand All @@ -59,8 +58,8 @@ private static void DecodeAvx2(Span<byte> scanline, Span<byte> previousScanline)
Vector256<byte> sum = Avx2.Add(up, current);
Unsafe.As<byte, Vector256<byte>>(ref scanRef) = sum;

offset += bytesPerBatch;
rb -= bytesPerBatch;
offset += Vector256<byte>.Count;
rb -= Vector256<byte>.Count;
}

// Handle left over.
Expand All @@ -81,8 +80,7 @@ private static void DecodeSse2(Span<byte> scanline, Span<byte> previousScanline)
// Up(x) + Prior(x)
int rb = scanline.Length;
nint offset = 1;
const int bytesPerBatch = 16;
while (rb >= bytesPerBatch)
while (rb >= Vector128<byte>.Count)
{
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
Vector128<byte> current = Unsafe.As<byte, Vector128<byte>>(ref scanRef);
Expand All @@ -91,8 +89,8 @@ private static void DecodeSse2(Span<byte> scanline, Span<byte> previousScanline)
Vector128<byte> sum = Sse2.Add(up, current);
Unsafe.As<byte, Vector128<byte>>(ref scanRef) = sum;

offset += bytesPerBatch;
rb -= bytesPerBatch;
offset += Vector128<byte>.Count;
rb -= Vector128<byte>.Count;
}

// Handle left over.
Expand All @@ -112,7 +110,7 @@ private static void DecodeScalar(Span<byte> scanline, Span<byte> previousScanlin
ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);

// Up(x) + Prior(x)
for (int x = 1; x < scanline.Length; x++)
for (nint x = 1; x < scanline.Length; x++)
{
ref byte scan = ref Unsafe.Add(ref scanBaseRef, x);
byte above = Unsafe.Add(ref prevBaseRef, x);
Expand All @@ -139,16 +137,16 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
sum = 0;

// Up(x) = Raw(x) - Prior(x)
resultBaseRef = 2;
resultBaseRef = (byte)FilterType.Up;

int x = 0;
nint x = 0;

if (Avx2.IsSupported)
{
Vector256<byte> zero = Vector256<byte>.Zero;
Vector256<int> sumAccumulator = Vector256<int>.Zero;

for (; x + Vector256<byte>.Count <= scanline.Length;)
for (; x <= scanline.Length - Vector256<byte>.Count;)
{
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
Expand All @@ -166,7 +164,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
{
Vector<uint> sumAccumulator = Vector<uint>.Zero;

for (; x + Vector<byte>.Count <= scanline.Length;)
for (; x <= scanline.Length - Vector<byte>.Count;)
{
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
Vector<byte> above = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
Expand All @@ -193,8 +191,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
res = (byte)(scan - above);
sum += Numerics.Abs(unchecked((sbyte)res));
}

sum -= 2;
}
}
}
Loading