55using System . Collections . Generic ;
66using System . Linq ;
77using System . Numerics ;
8+ using System . Runtime . CompilerServices ;
9+ using System . Runtime . InteropServices ;
810using System . Threading . Tasks ;
911using SixLabors . ImageSharp . Advanced ;
1012using SixLabors . ImageSharp . Memory ;
@@ -70,6 +72,7 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
7072
7173 // Convert from screen to world space.
7274 Matrix4x4 . Invert ( matrix , out matrix ) ;
75+ const float Epsilon = 0.0000001F ;
7376
7477 if ( this . Sampler is NearestNeighborResampler )
7578 {
@@ -83,10 +86,15 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
8386
8487 for ( int x = 0 ; x < width ; x ++ )
8588 {
86- var point = Point . Round ( Vector2 . Transform ( new Vector2 ( x , y ) , matrix ) ) ;
87- if ( sourceBounds . Contains ( point . X , point . Y ) )
89+ var v3 = Vector3 . Transform ( new Vector3 ( x , y , 1 ) , matrix ) ;
90+
91+ float z = MathF . Max ( v3 . Z , Epsilon ) ;
92+ int px = ( int ) MathF . Round ( v3 . X / z ) ;
93+ int py = ( int ) MathF . Round ( v3 . Y / z ) ;
94+
95+ if ( sourceBounds . Contains ( px , py ) )
8896 {
89- destRow [ x ] = source [ point . X , point . Y ] ;
97+ destRow [ x ] = source [ px , py ] ;
9098 }
9199 }
92100 } ) ;
@@ -100,7 +108,10 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
100108 ( float radius , float scale , float ratio ) yRadiusScale = this . GetSamplingRadius ( source . Height , destination . Height ) ;
101109 float xScale = xRadiusScale . scale ;
102110 float yScale = yRadiusScale . scale ;
103- var radius = new Vector2 ( xRadiusScale . radius , yRadiusScale . radius ) ;
111+
112+ // Using Vector4 with dummy 0-s, because Vector2 SIMD implementation is not reliable:
113+ var radius = new Vector4 ( xRadiusScale . radius , yRadiusScale . radius , 0 , 0 ) ;
114+
104115 IResampler sampler = this . Sampler ;
105116 var maxSource = new Vector4 ( maxSourceX , maxSourceY , maxSourceX , maxSourceY ) ;
106117 int xLength = ( int ) MathF . Ceiling ( ( radius . X * 2 ) + 2 ) ;
@@ -117,19 +128,23 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
117128 configuration . ParallelOptions ,
118129 y =>
119130 {
120- Span < TPixel > destRow = destination . GetPixelRowSpan ( y ) ;
121- Span < float > ySpan = yBuffer . GetRowSpan ( y ) ;
122- Span < float > xSpan = xBuffer . GetRowSpan ( y ) ;
131+ ref TPixel destRowRef = ref MemoryMarshal . GetReference ( destination . GetPixelRowSpan ( y ) ) ;
132+ ref float ySpanRef = ref MemoryMarshal . GetReference ( yBuffer . GetRowSpan ( y ) ) ;
133+ ref float xSpanRef = ref MemoryMarshal . GetReference ( xBuffer . GetRowSpan ( y ) ) ;
123134
124135 for ( int x = 0 ; x < width ; x ++ )
125136 {
126137 // Use the single precision position to calculate correct bounding pixels
127138 // otherwise we get rogue pixels outside of the bounds.
128- var point = Vector2 . Transform ( new Vector2 ( x , y ) , matrix ) ;
139+ var v3 = Vector3 . Transform ( new Vector3 ( x , y , 1 ) , matrix ) ;
140+ float z = MathF . Max ( v3 . Z , Epsilon ) ;
141+
142+ // Using Vector4 with dummy 0-s, because Vector2 SIMD implementation is not reliable:
143+ Vector4 point = new Vector4 ( v3 . X , v3 . Y , 0 , 0 ) / z ;
129144
130145 // Clamp sampling pixel radial extents to the source image edges
131- Vector2 maxXY = point + radius ;
132- Vector2 minXY = point - radius ;
146+ Vector4 maxXY = point + radius ;
147+ Vector4 minXY = point - radius ;
133148
134149 // max, maxY, minX, minY
135150 var extents = new Vector4 (
@@ -161,24 +176,24 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
161176 // I've optimized where I can but am always open to suggestions.
162177 if ( yScale > 1 && xScale > 1 )
163178 {
164- CalculateWeightsDown ( top , bottom , minY , maxY , point . Y , sampler , yScale , ySpan ) ;
165- CalculateWeightsDown ( left , right , minX , maxX , point . X , sampler , xScale , xSpan ) ;
179+ CalculateWeightsDown ( top , bottom , minY , maxY , point . Y , sampler , yScale , ref ySpanRef , yLength ) ;
180+ CalculateWeightsDown ( left , right , minX , maxX , point . X , sampler , xScale , ref xSpanRef , xLength ) ;
166181 }
167182 else
168183 {
169- CalculateWeightsScaleUp ( minY , maxY , point . Y , sampler , ySpan ) ;
170- CalculateWeightsScaleUp ( minX , maxX , point . X , sampler , xSpan ) ;
184+ CalculateWeightsScaleUp ( minY , maxY , point . Y , sampler , ref ySpanRef ) ;
185+ CalculateWeightsScaleUp ( minX , maxX , point . X , sampler , ref xSpanRef ) ;
171186 }
172187
173188 // Now multiply the results against the offsets
174189 Vector4 sum = Vector4 . Zero ;
175190 for ( int yy = 0 , j = minY ; j <= maxY ; j ++ , yy ++ )
176191 {
177- float yWeight = ySpan [ yy ] ;
192+ float yWeight = Unsafe . Add ( ref ySpanRef , yy ) ;
178193
179194 for ( int xx = 0 , i = minX ; i <= maxX ; i ++ , xx ++ )
180195 {
181- float xWeight = xSpan [ xx ] ;
196+ float xWeight = Unsafe . Add ( ref xSpanRef , xx ) ;
182197 var vector = source [ i , j ] . ToVector4 ( ) ;
183198
184199 // Values are first premultiplied to prevent darkening of edge pixels
@@ -187,7 +202,7 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
187202 }
188203 }
189204
190- ref TPixel dest = ref destRow [ x ] ;
205+ ref TPixel dest = ref Unsafe . Add ( ref destRowRef , x ) ;
191206
192207 // Reverse the premultiplication
193208 dest . PackFromVector4 ( sum . UnPremultiply ( ) ) ;
0 commit comments