From 919721342f583cba791420db5b04942b0c9a4cb5 Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 13 Sep 2022 12:07:41 -0700 Subject: [PATCH] clean up PNG filters --- src/ImageSharp/Common/Helpers/Numerics.cs | 36 +++++++++-------- .../Formats/Png/Filters/AverageFilter.cs | 39 ++++++------------- .../Formats/Png/Filters/NoneFilter.cs | 6 +-- .../Formats/Png/Filters/PaethFilter.cs | 29 +++++++------- .../Formats/Png/Filters/SubFilter.cs | 12 +++--- .../Formats/Png/Filters/UpFilter.cs | 26 ++++++------- .../Formats/Png/ReferenceImplementations.cs | 10 +---- 7 files changed, 66 insertions(+), 92 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 8be59ca9a1..6b9bd72cfa 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -780,25 +780,13 @@ public static void Accumulate(ref Vector accumulator, Vector values) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int ReduceSum(Vector128 accumulator) { - if (Ssse3.IsSupported) - { - Vector128 hadd = Ssse3.HorizontalAdd(accumulator, accumulator); - Vector128 swapped = Sse2.Shuffle(hadd, 0x1); - Vector128 tmp = Sse2.Add(hadd, swapped); + // Add odd to even. + Vector128 vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_11_01_01)); - // Vector128.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882 - return Sse2.ConvertToInt32(tmp); - } - else - { - int sum = 0; - for (int i = 0; i < Vector128.Count; i++) - { - sum += accumulator.GetElement(i); - } + // Add high to low. + vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10)); - return sum; - } + return Sse2.ConvertToInt32(vsum); } /// @@ -821,6 +809,20 @@ public static int ReduceSum(Vector256 accumulator) return Sse2.ConvertToInt32(vsum); } + /// + /// Reduces even elements of the vector into one sum. + /// + /// The accumulator to reduce. + /// The sum of even elements. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int EvenReduceSum(Vector128 accumulator) + { + // Add high to low. + Vector128 vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_10_11_10)); + + return Sse2.ConvertToInt32(vsum); + } + /// /// Reduces even elements of the vector into one sum. /// diff --git a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs index 4471b355ef..2cc7697550 100644 --- a/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/AverageFilter.cs @@ -119,9 +119,9 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo sum = 0; // Average(x) = Raw(x) - floor((Raw(x-bpp)+Prior(x))/2) - resultBaseRef = 3; + resultBaseRef = (byte)FilterType.Average; - int x = 0; + nint x = 0; for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */) { byte scan = Unsafe.Add(ref scanBaseRef, x); @@ -138,7 +138,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo Vector256 sumAccumulator = Vector256.Zero; Vector256 allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte(); - for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) + for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256.Count; xLeft += Vector256.Count) { Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector256 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); @@ -157,12 +157,11 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo } else if (Sse2.IsSupported) { - Vector128 zero8 = Vector128.Zero; - Vector128 zero16 = Vector128.Zero; + Vector128 zero = Vector128.Zero; Vector128 sumAccumulator = Vector128.Zero; Vector128 allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte(); - for (int xLeft = x - bytesPerPixel; x + Vector128.Count <= scanline.Length; xLeft += Vector128.Count) + for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector128.Count; xLeft += Vector128.Count) { Vector128 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector128 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); @@ -174,36 +173,24 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo Unsafe.As>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type x += Vector128.Count; - Vector128 absRes; + Vector128 absRes; if (Ssse3.IsSupported) { - absRes = Ssse3.Abs(res.AsSByte()).AsSByte(); + absRes = Ssse3.Abs(res.AsSByte()); } else { - Vector128 mask = Sse2.CompareGreaterThan(res.AsSByte(), zero8); - mask = Sse2.Xor(mask, allBitsSet.AsSByte()); - absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask); + Vector128 mask = Sse2.CompareGreaterThan(zero.AsSByte(), res.AsSByte()); + absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask).AsByte(); } - Vector128 loRes16 = Sse2.UnpackLow(absRes, zero8).AsInt16(); - Vector128 hiRes16 = Sse2.UnpackHigh(absRes, zero8).AsInt16(); - - Vector128 loRes32 = Sse2.UnpackLow(loRes16, zero16).AsInt32(); - Vector128 hiRes32 = Sse2.UnpackHigh(loRes16, zero16).AsInt32(); - sumAccumulator = Sse2.Add(sumAccumulator, loRes32); - sumAccumulator = Sse2.Add(sumAccumulator, hiRes32); - - loRes32 = Sse2.UnpackLow(hiRes16, zero16).AsInt32(); - hiRes32 = Sse2.UnpackHigh(hiRes16, zero16).AsInt32(); - sumAccumulator = Sse2.Add(sumAccumulator, loRes32); - sumAccumulator = Sse2.Add(sumAccumulator, hiRes32); + sumAccumulator = Sse2.Add(sumAccumulator, Sse2.SumAbsoluteDifferences(absRes, zero).AsInt32()); } - sum += Numerics.ReduceSum(sumAccumulator); + sum += Numerics.EvenReduceSum(sumAccumulator); } - for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */) + for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */) { byte scan = Unsafe.Add(ref scanBaseRef, x); byte left = Unsafe.Add(ref scanBaseRef, xLeft); @@ -213,8 +200,6 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo res = (byte)(scan - Average(left, above)); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 3; } /// diff --git a/src/ImageSharp/Formats/Png/Filters/NoneFilter.cs b/src/ImageSharp/Formats/Png/Filters/NoneFilter.cs index c0e14d4ab0..a791952cf2 100644 --- a/src/ImageSharp/Formats/Png/Filters/NoneFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/NoneFilter.cs @@ -1,4 +1,4 @@ -// Copyright (c) Six Labors. +// Copyright (c) Six Labors. // Licensed under the Six Labors Split License. using System; @@ -21,8 +21,8 @@ internal static class NoneFilter [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void Encode(ReadOnlySpan scanline, Span result) { - // Insert a byte before the data. - result[0] = 0; + // Insert row filter byte before the data. + result[0] = (byte)FilterType.None; result = result[1..]; scanline[..Math.Min(scanline.Length, result.Length)].CopyTo(result); } diff --git a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs index 0676f618b1..f4de3132c9 100644 --- a/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/PaethFilter.cs @@ -108,7 +108,7 @@ private static void DecodeScalar(Span scanline, Span previousScanlin // Paeth(x) + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp)) int offset = bytesPerPixel + 1; // Add one because x starts at one. - int x = 1; + nint x = 1; for (; x < offset; x++) { ref byte scan = ref Unsafe.Add(ref scanBaseRef, x); @@ -146,9 +146,9 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo sum = 0; // Paeth(x) = Raw(x) - PaethPredictor(Raw(x-bpp), Prior(x), Prior(x - bpp)) - resultBaseRef = 4; + resultBaseRef = (byte)FilterType.Paeth; - int x = 0; + nint x = 0; for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */) { byte scan = Unsafe.Add(ref scanBaseRef, x); @@ -164,7 +164,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo Vector256 zero = Vector256.Zero; Vector256 sumAccumulator = Vector256.Zero; - for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) + for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256.Count; xLeft += Vector256.Count) { Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector256 left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); @@ -184,7 +184,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo { Vector sumAccumulator = Vector.Zero; - for (int xLeft = x - bytesPerPixel; x + Vector.Count <= scanline.Length; xLeft += Vector.Count) + for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector.Count; xLeft += Vector.Count) { Vector scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector left = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); @@ -204,7 +204,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo } } - for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */) + for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */) { byte scan = Unsafe.Add(ref scanBaseRef, x); byte left = Unsafe.Add(ref scanBaseRef, xLeft); @@ -215,8 +215,6 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo res = (byte)(scan - PaethPredictor(left, above, upperLeft)); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 4; } /// @@ -250,6 +248,7 @@ private static byte PaethPredictor(byte left, byte above, byte upperLeft) return upperLeft; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 PaethPredictor(Vector256 left, Vector256 above, Vector256 upleft) { Vector256 zero = Vector256.Zero; @@ -282,6 +281,7 @@ private static Vector256 PaethPredictor(Vector256 left, Vector256 PaethPredictor(Vector left, Vector above, Vector upperLeft) { Vector.Widen(left, out Vector a1, out Vector a2); @@ -293,16 +293,17 @@ private static Vector PaethPredictor(Vector left, Vector above return Vector.AsVectorByte(Vector.Narrow(p1, p2)); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector PaethPredictor(Vector left, Vector above, Vector upperLeft) { Vector p = left + above - upperLeft; - var pa = Vector.Abs(p - left); - var pb = Vector.Abs(p - above); - var pc = Vector.Abs(p - upperLeft); + Vector pa = Vector.Abs(p - left); + Vector pb = Vector.Abs(p - above); + Vector pc = Vector.Abs(p - upperLeft); - var pa_pb = Vector.LessThanOrEqual(pa, pb); - var pa_pc = Vector.LessThanOrEqual(pa, pc); - var pb_pc = Vector.LessThanOrEqual(pb, pc); + Vector pa_pb = Vector.LessThanOrEqual(pa, pb); + Vector pa_pc = Vector.LessThanOrEqual(pa, pc); + Vector pb_pc = Vector.LessThanOrEqual(pb, pc); return Vector.ConditionalSelect( condition: Vector.BitwiseAnd(pa_pb, pa_pc), diff --git a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs index 063cc11c36..f339e1bbac 100644 --- a/src/ImageSharp/Formats/Png/Filters/SubFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/SubFilter.cs @@ -91,9 +91,9 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result sum = 0; // Sub(x) = Raw(x) - Raw(x-bpp) - resultBaseRef = 1; + resultBaseRef = (byte)FilterType.Sub; - int x = 0; + nint x = 0; for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */) { byte scan = Unsafe.Add(ref scanBaseRef, x); @@ -108,7 +108,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result Vector256 zero = Vector256.Zero; Vector256 sumAccumulator = Vector256.Zero; - for (int xLeft = x - bytesPerPixel; x + Vector256.Count <= scanline.Length; xLeft += Vector256.Count) + for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256.Count; xLeft += Vector256.Count) { Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector256 prev = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); @@ -126,7 +126,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result { Vector sumAccumulator = Vector.Zero; - for (int xLeft = x - bytesPerPixel; x + Vector.Count <= scanline.Length; xLeft += Vector.Count) + for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector.Count; xLeft += Vector.Count) { Vector scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector prev = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, xLeft)); @@ -144,7 +144,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result } } - for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */) + for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */) { byte scan = Unsafe.Add(ref scanBaseRef, x); byte prev = Unsafe.Add(ref scanBaseRef, xLeft); @@ -153,8 +153,6 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan result res = (byte)(scan - prev); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum--; } } } diff --git a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs index e80c5d22b4..9990f4c6fd 100644 --- a/src/ImageSharp/Formats/Png/Filters/UpFilter.cs +++ b/src/ImageSharp/Formats/Png/Filters/UpFilter.cs @@ -49,8 +49,7 @@ private static void DecodeAvx2(Span scanline, Span previousScanline) // Up(x) + Prior(x) int rb = scanline.Length; nint offset = 1; - const int bytesPerBatch = 32; - while (rb >= bytesPerBatch) + while (rb >= Vector256.Count) { ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); Vector256 current = Unsafe.As>(ref scanRef); @@ -59,8 +58,8 @@ private static void DecodeAvx2(Span scanline, Span previousScanline) Vector256 sum = Avx2.Add(up, current); Unsafe.As>(ref scanRef) = sum; - offset += bytesPerBatch; - rb -= bytesPerBatch; + offset += Vector256.Count; + rb -= Vector256.Count; } // Handle left over. @@ -81,8 +80,7 @@ private static void DecodeSse2(Span scanline, Span previousScanline) // Up(x) + Prior(x) int rb = scanline.Length; nint offset = 1; - const int bytesPerBatch = 16; - while (rb >= bytesPerBatch) + while (rb >= Vector128.Count) { ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset); Vector128 current = Unsafe.As>(ref scanRef); @@ -91,8 +89,8 @@ private static void DecodeSse2(Span scanline, Span previousScanline) Vector128 sum = Sse2.Add(up, current); Unsafe.As>(ref scanRef) = sum; - offset += bytesPerBatch; - rb -= bytesPerBatch; + offset += Vector128.Count; + rb -= Vector128.Count; } // Handle left over. @@ -112,7 +110,7 @@ private static void DecodeScalar(Span scanline, Span previousScanlin ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline); // Up(x) + Prior(x) - for (int x = 1; x < scanline.Length; x++) + for (nint x = 1; x < scanline.Length; x++) { ref byte scan = ref Unsafe.Add(ref scanBaseRef, x); byte above = Unsafe.Add(ref prevBaseRef, x); @@ -139,16 +137,16 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo sum = 0; // Up(x) = Raw(x) - Prior(x) - resultBaseRef = 2; + resultBaseRef = (byte)FilterType.Up; - int x = 0; + nint x = 0; if (Avx2.IsSupported) { Vector256 zero = Vector256.Zero; Vector256 sumAccumulator = Vector256.Zero; - for (; x + Vector256.Count <= scanline.Length;) + for (; x <= scanline.Length - Vector256.Count;) { Vector256 scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector256 above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x)); @@ -166,7 +164,7 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo { Vector sumAccumulator = Vector.Zero; - for (; x + Vector.Count <= scanline.Length;) + for (; x <= scanline.Length - Vector.Count;) { Vector scan = Unsafe.As>(ref Unsafe.Add(ref scanBaseRef, x)); Vector above = Unsafe.As>(ref Unsafe.Add(ref prevBaseRef, x)); @@ -193,8 +191,6 @@ public static void Encode(ReadOnlySpan scanline, ReadOnlySpan previo res = (byte)(scan - above); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 2; } } } diff --git a/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs b/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs index c91ef66670..dbc0bde0ed 100644 --- a/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs +++ b/tests/ImageSharp.Tests/Formats/Png/ReferenceImplementations.cs @@ -24,7 +24,7 @@ internal static partial class ReferenceImplementations [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void EncodePaethFilter(ReadOnlySpan scanline, Span previousScanline, Span result, int bytesPerPixel, out int sum) { - DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); + DebugGuard.MustBeSameSized(scanline, previousScanline, nameof(scanline)); DebugGuard.MustBeSizedAtLeast(result, scanline, nameof(result)); ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline); @@ -57,8 +57,6 @@ public static void EncodePaethFilter(ReadOnlySpan scanline, Span pre res = (byte)(scan - PaethPredictor(left, above, upperLeft)); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 4; } /// @@ -99,8 +97,6 @@ public static void EncodeSubFilter(ReadOnlySpan scanline, Span resul res = (byte)(scan - prev); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 1; } /// @@ -135,8 +131,6 @@ public static void EncodeUpFilter(ReadOnlySpan scanline, Span previo res = (byte)(scan - above); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 2; } /// @@ -182,8 +176,6 @@ public static void EncodeAverageFilter(ReadOnlySpan scanline, ReadOnlySpan res = (byte)(scan - Average(left, above)); sum += Numerics.Abs(unchecked((sbyte)res)); } - - sum -= 3; } ///