Skip to content

Commit cad52a2

Browse files
Merge pull request #1444 from SixLabors/sp/bokeh-kernel-jit-opt
Reduce code duplication due to reified generics
2 parents 718945c + 05659b8 commit cad52a2

File tree

2 files changed

+58
-48
lines changed

2 files changed

+58
-48
lines changed

src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Apache License, Version 2.0.
33

4+
using System;
5+
using System.Numerics;
6+
using System.Runtime.CompilerServices;
7+
using SixLabors.ImageSharp.Advanced;
8+
using SixLabors.ImageSharp.Memory;
49
using SixLabors.ImageSharp.PixelFormats;
510

611
namespace SixLabors.ImageSharp.Processing.Processors.Convolution
@@ -77,5 +82,56 @@ public BokehBlurProcessor(int radius, int components, float gamma)
7782
public IImageProcessor<TPixel> CreatePixelSpecificProcessor<TPixel>(Configuration configuration, Image<TPixel> source, Rectangle sourceRectangle)
7883
where TPixel : unmanaged, IPixel<TPixel>
7984
=> new BokehBlurProcessor<TPixel>(configuration, this, source, sourceRectangle);
85+
86+
/// <summary>
87+
/// A <see langword="struct"/> implementing the horizontal convolution logic for <see cref="BokehBlurProcessor{T}"/>.
88+
/// </summary>
89+
/// <remarks>
90+
/// This type is located in the non-generic <see cref="BokehBlurProcessor"/> class and not in <see cref="BokehBlurProcessor{TPixel}"/>, where
91+
/// it is actually used, because it does not use any generic parameters internally. Defining in a non-generic class means that there will only
92+
/// ever be a single instantiation of this type for the JIT/AOT compilers to process, instead of having duplicate versions for each pixel type.
93+
/// </remarks>
94+
internal readonly struct ApplyHorizontalConvolutionRowOperation : IRowOperation
95+
{
96+
private readonly Rectangle bounds;
97+
private readonly Buffer2D<Vector4> targetValues;
98+
private readonly Buffer2D<ComplexVector4> sourceValues;
99+
private readonly Complex64[] kernel;
100+
private readonly float z;
101+
private readonly float w;
102+
private readonly int maxY;
103+
private readonly int maxX;
104+
105+
[MethodImpl(InliningOptions.ShortMethod)]
106+
public ApplyHorizontalConvolutionRowOperation(
107+
Rectangle bounds,
108+
Buffer2D<Vector4> targetValues,
109+
Buffer2D<ComplexVector4> sourceValues,
110+
Complex64[] kernel,
111+
float z,
112+
float w)
113+
{
114+
this.bounds = bounds;
115+
this.maxY = this.bounds.Bottom - 1;
116+
this.maxX = this.bounds.Right - 1;
117+
this.targetValues = targetValues;
118+
this.sourceValues = sourceValues;
119+
this.kernel = kernel;
120+
this.z = z;
121+
this.w = w;
122+
}
123+
124+
/// <inheritdoc/>
125+
[MethodImpl(InliningOptions.ShortMethod)]
126+
public void Invoke(int y)
127+
{
128+
Span<Vector4> targetRowSpan = this.targetValues.GetRowSpan(y).Slice(this.bounds.X);
129+
130+
for (int x = 0; x < this.bounds.Width; x++)
131+
{
132+
Buffer2DUtils.Convolve4AndAccumulatePartials(this.kernel, this.sourceValues, targetRowSpan, y, x, this.bounds.Y, this.maxY, this.bounds.X, this.maxX, this.z, this.w);
133+
}
134+
}
135+
}
80136
}
81137
}

src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

Lines changed: 2 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ private void OnFrameApplyCore(
127127
in verticalOperation);
128128

129129
// Compute the horizontal 1D convolutions and accumulate the partial results on the target buffer
130-
var horizontalOperation = new ApplyHorizontalConvolutionRowOperation(sourceRectangle, processingBuffer, firstPassBuffer, kernel, parameters.Z, parameters.W);
130+
var horizontalOperation = new BokehBlurProcessor.ApplyHorizontalConvolutionRowOperation(sourceRectangle, processingBuffer, firstPassBuffer, kernel, parameters.Z, parameters.W);
131131
ParallelRowIterator.IterateRows(
132132
configuration,
133133
sourceRectangle,
@@ -175,52 +175,6 @@ public void Invoke(int y)
175175
}
176176
}
177177

178-
/// <summary>
179-
/// A <see langword="struct"/> implementing the horizontal convolution logic for <see cref="BokehBlurProcessor{T}"/>.
180-
/// </summary>
181-
private readonly struct ApplyHorizontalConvolutionRowOperation : IRowOperation
182-
{
183-
private readonly Rectangle bounds;
184-
private readonly Buffer2D<Vector4> targetValues;
185-
private readonly Buffer2D<ComplexVector4> sourceValues;
186-
private readonly Complex64[] kernel;
187-
private readonly float z;
188-
private readonly float w;
189-
private readonly int maxY;
190-
private readonly int maxX;
191-
192-
[MethodImpl(InliningOptions.ShortMethod)]
193-
public ApplyHorizontalConvolutionRowOperation(
194-
Rectangle bounds,
195-
Buffer2D<Vector4> targetValues,
196-
Buffer2D<ComplexVector4> sourceValues,
197-
Complex64[] kernel,
198-
float z,
199-
float w)
200-
{
201-
this.bounds = bounds;
202-
this.maxY = this.bounds.Bottom - 1;
203-
this.maxX = this.bounds.Right - 1;
204-
this.targetValues = targetValues;
205-
this.sourceValues = sourceValues;
206-
this.kernel = kernel;
207-
this.z = z;
208-
this.w = w;
209-
}
210-
211-
/// <inheritdoc/>
212-
[MethodImpl(InliningOptions.ShortMethod)]
213-
public void Invoke(int y)
214-
{
215-
Span<Vector4> targetRowSpan = this.targetValues.GetRowSpan(y).Slice(this.bounds.X);
216-
217-
for (int x = 0; x < this.bounds.Width; x++)
218-
{
219-
Buffer2DUtils.Convolve4AndAccumulatePartials(this.kernel, this.sourceValues, targetRowSpan, y, x, this.bounds.Y, this.maxY, this.bounds.X, this.maxX, this.z, this.w);
220-
}
221-
}
222-
}
223-
224178
/// <summary>
225179
/// A <see langword="struct"/> implementing the gamma exposure logic for <see cref="BokehBlurProcessor{T}"/>.
226180
/// </summary>
@@ -304,7 +258,7 @@ public void Invoke(int y)
304258
for (int x = 0; x < this.bounds.Width; x++)
305259
{
306260
ref Vector4 v = ref Unsafe.Add(ref sourceRef, x);
307-
var clamp = Numerics.Clamp(v, low, high);
261+
Vector4 clamp = Numerics.Clamp(v, low, high);
308262
v.X = MathF.Pow(clamp.X, this.inverseGamma);
309263
v.Y = MathF.Pow(clamp.Y, this.inverseGamma);
310264
v.Z = MathF.Pow(clamp.Z, this.inverseGamma);

0 commit comments

Comments
 (0)