diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs index 56afae68c7..2d19f5ce26 100644 --- a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs +++ b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs @@ -494,32 +494,32 @@ private static void Scale16X16To8X8Vectorized(ref Block8x8F destination, ReadOnl var f2 = Vector256.Create(2f); var f025 = Vector256.Create(0.25f); Vector256 switchInnerDoubleWords = Unsafe.As>(ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskSwitchInnerDWords8x32)); - - ref Vector256 in1 = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 1)); ref Vector256 destRef = ref Unsafe.As>(ref destination); - for (int i = 0; i < 8; i++) + for (int i = 0; i < 2; i++) { - Vector256 a = in1; - Vector256 b = Unsafe.Add(ref in1, 1); - Vector256 c = in2; - Vector256 d = Unsafe.Add(ref in2, 1); - - Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00); - Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01); - Vector256 calc3 = Avx.Shuffle(b, d, 0b10_00_10_00); - Vector256 calc4 = Avx.Shuffle(b, d, 0b11_01_11_01); - - Vector256 sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4)); - Vector256 add = Avx.Add(sum, f2); - Vector256 res = Avx.Multiply(add, f025); + ref Vector256 in1 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), 2 * i)); + ref Vector256 in2 = ref Unsafe.As>(ref Unsafe.Add(ref MemoryMarshal.GetReference(source), (2 * i) + 1)); - destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords); - destRef = ref Unsafe.Add(ref destRef, 1); - - in1 = ref Unsafe.Add(ref in1, 2); - in2 = ref Unsafe.Add(ref in2, 2); + for (int j = 0; j < 8; j += 2) + { + Vector256 a = Unsafe.Add(ref in1, j); + Vector256 b = Unsafe.Add(ref in1, j + 1); + Vector256 c = Unsafe.Add(ref in2, j); + Vector256 d = Unsafe.Add(ref in2, j + 1); + + Vector256 calc1 = Avx.Shuffle(a, c, 0b10_00_10_00); + Vector256 calc2 = Avx.Shuffle(a, c, 0b11_01_11_01); + Vector256 calc3 = Avx.Shuffle(b, d, 0b10_00_10_00); + Vector256 calc4 = Avx.Shuffle(b, d, 0b11_01_11_01); + + Vector256 sum = Avx.Add(Avx.Add(calc1, calc2), Avx.Add(calc3, calc4)); + Vector256 add = Avx.Add(sum, f2); + Vector256 res = Avx.Multiply(add, f025); + + destRef = Avx2.PermuteVar8x32(res, switchInnerDoubleWords); + destRef = ref Unsafe.Add(ref destRef, 1); + } } #endif }