@@ -919,7 +919,7 @@ Base.@constprop :aggressive generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::A
919919 _generic_matmatmul! (C, wrap (A, tA), wrap (B, tB), MulAddMul (α, β))
920920
921921@noinline function _generic_matmatmul! (C:: AbstractVecOrMat{R} , A:: AbstractVecOrMat{T} , B:: AbstractVecOrMat{S} ,
922- _add:: MulAddMul ) where {T,S,R}
922+ _add:: MulAddMul{ais1} ) where {T,S,R,ais1 }
923923 AxM = axes (A, 1 )
924924 AxK = axes (A, 2 ) # we use two `axes` calls in case of `AbstractVector`
925925 BxK = axes (B, 1 )
@@ -935,11 +935,13 @@ Base.@constprop :aggressive generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::A
935935 if BxN != CxN
936936 throw (DimensionMismatch (lazy " matrix B has axes ($BxK,$BxN), matrix C has axes ($CxM,$CxN)" ))
937937 end
938+ _rmul_alpha = MulAddMul {ais1,true,typeof(_add.alpha),Bool} (_add. alpha,false )
938939 if isbitstype (R) && sizeof (R) ≤ 16 && ! (A isa Adjoint || A isa Transpose)
939940 _rmul_or_fill! (C, _add. beta)
940941 (iszero (_add. alpha) || isempty (A) || isempty (B)) && return C
941942 @inbounds for n in BxN, k in BxK
942- Balpha = B[k,n]* _add. alpha
943+ # Balpha = B[k,n] * alpha, but we skip the multiplication in case isone(alpha)
944+ Balpha = _rmul_alpha (B[k,n])
943945 @simd for m in AxM
944946 C[m,n] = muladd (A[m,k], Balpha, C[m,n])
945947 end
0 commit comments