diff --git a/src/DiffEqBase.jl b/src/DiffEqBase.jl
index 9ac71391c..adb753936 100644
--- a/src/DiffEqBase.jl
+++ b/src/DiffEqBase.jl
@@ -398,8 +398,11 @@ include("ensemble/ensemble_problems.jl")
 include("ensemble/basic_ensemble_solve.jl")
 include("ensemble/ensemble_analysis.jl")
 include("nlsolve/type.jl")
+include("nlsolve/interface.jl")
+include("nlsolve/nlsolve.jl")
 include("nlsolve/newton.jl")
 include("nlsolve/functional.jl")
+include("nlsolve/common.jl")
 include("nlsolve/utils.jl")
 include("operators/diffeq_operator.jl")
 include("operators/common_defaults.jl")
diff --git a/src/nlsolve/common.jl b/src/nlsolve/common.jl
new file mode 100644
index 000000000..4277e1e03
--- /dev/null
+++ b/src/nlsolve/common.jl
@@ -0,0 +1,163 @@
+## build_nlsolver
+
+function build_nlsolver(alg,nlalg::Union{NLFunctional,NLAnderson,NLNewton},u,rate_prototype,
+                        uEltypeNoUnits,uBottomEltypeNoUnits,tTypeNoUnits,uprev,f,t,dt,p,γ,c,
+                        ::Val{true})
+  # define unitless types
+  uTolType = real(uBottomEltypeNoUnits)
+
+  # define fields of non-linear solver
+  z = similar(u); gz = similar(u); tmp = similar(u)
+
+  # build cache for non-linear solver
+  dz = similar(u)
+  tstep = zero(t)
+  k = zero(rate_prototype)
+  atmp = similar(u, uEltypeNoUnits)
+
+  if nlalg isa NLNewton
+    nf = nlsolve_f(f, alg)
+
+    if islinear(f)
+      du1 = rate_prototype
+      uf = nothing
+      jac_config = nothing
+      linsolve = alg.linsolve(Val{:init},nf,u)
+    else
+      du1 = zero(rate_prototype)
+      # if the algorithm specializes on split problems then use `nf`
+      # we pass this `alg` here just for identification purpose, because get_uf would be overloaded in different repos
+      uf = build_uf(alg,nf,t,p,Val(true))
+      jac_config = build_jac_config(alg,nf,uf,du1,uprev,u,tmp,dz)
+      linsolve = alg.linsolve(Val{:init},uf,u)
+    end
+    # TODO: check if the solver is iterative
+    weight = similar(u)
+
+    J, W = build_J_W(alg,u,uprev,p,t,dt,f,uEltypeNoUnits,Val(true))
+
+    invγdt = inv(oneunit(dt) * one(uTolType))
+
+    cache = NLNewtonCache(dz, tstep, k, atmp, J, W, true, dt, du1, uf, jac_config,
+                          linsolve, weight, invγdt, (typeof(t))(nlalg.new_W_dt_cutoff))
+  elseif nlalg isa NLFunctional
+    cache = NLFunctionalCache(dz, tstep, k, atmp)
+  elseif nlalg isa NLAnderson
+    dzprev = similar(u)
+    gprev = similar(u)
+
+    max_history = min(nlalg.max_history, nlalg.maxiters, length(u))
+    Δgs = [zero(u) for i in 1:max_history]
+    Q = Matrix{uEltypeNoUnits}(undef, length(u), max_history)
+    R = Matrix{uEltypeNoUnits}(undef, max_history, max_history)
+    γs = Vector{uEltypeNoUnits}(undef, max_history)
+
+    droptol = nlalg.droptol === nothing ? nothing : uEltypeNoUnits(nlalg.droptol)
+
+    cache = NLAndersonCache(dz, tstep, k, atmp, gprev, dzprev, Δgs, Q, R, γs, 0, droptol)
+  end
+
+  # build non-linear solver
+  η = one(uTolType)
+  ndz = one(uTolType)
+
+  NLSolver{typeof(nlalg),true,typeof(u),uTolType,tTypeNoUnits,typeof(cache)}(
+    z, gz, tmp, uTolType(γ), tTypeNoUnits(c), nlalg, uTolType(nlalg.κ), η, ndz,
+    uTolType(nlalg.fast_convergence_cutoff), nlalg.maxiters, 10_000, Convergence, cache)  
+end
+
+function build_nlsolver(alg,nlalg::Union{NLFunctional,NLAnderson,NLNewton},u,rate_prototype,
+                        uEltypeNoUnits,uBottomEltypeNoUnits,tTypeNoUnits,uprev,f,t,dt,p,γ,c,
+                        ::Val{false})
+  # define unitless types
+  uTolType = real(uBottomEltypeNoUnits)
+
+  # define fields of non-linear solver
+  z = u; gz = u; tmp = u
+
+  # create cache of non-linear solver
+  dz = u
+  tstep = zero(t)
+
+  if nlalg isa NLNewton
+    nf = nlsolve_f(f, alg)
+    # only use `nf` if the algorithm specializes on split eqs
+    uf = build_uf(alg,nf,t,p,Val(false))
+
+    J, W = build_J_W(alg,u,uprev,p,t,dt,f,uEltypeNoUnits,Val(false))
+
+    invγdt = inv(oneunit(dt) * one(uTolType))
+
+    cache = NLNewtonConstantCache(dz, tstep, J, W, true, dt, uf, invγdt,
+                                  (typeof(t))(nlalg.new_W_dt_cutoff))
+  elseif nlalg isa NLFunctional
+    cache = NLFunctionalConstantCache(dz, tstep)
+  elseif nlalg isa NLAnderson
+    dzprev = u
+    gprev = u
+    
+    max_history = min(nlalg.max_history, nlalg.maxiters, length(z))
+    Δgs = Vector{typeof(u)}(undef, max_history)
+    Q = Matrix{uEltypeNoUnits}(undef, length(u), max_history)
+    R = Matrix{uEltypeNoUnits}(undef, max_history, max_history)
+    γs = Vector{uEltypeNoUnits}(undef, max_history)
+
+    droptol = nlalg.droptol === nothing ? nothing : uEltypeNoUnits(nlalg.droptol)
+
+    cache = NLAndersonConstantCache(dz, tstep, gprev, dzprev, Δgs, Q, R, γs, 0, droptol)
+  end
+
+  # build non-linear solver
+  η = one(uTolType)
+  ndz = one(uTolType)
+  
+  NLSolver{typeof(nlalg),false,typeof(u),uTolType,tTypeNoUnits,typeof(cache)}(
+    z, gz, tmp, uTolType(γ), tTypeNoUnits(c), nlalg, uTolType(nlalg.κ), η, ndz,
+    uTolType(nlalg.fast_convergence_cutoff), nlalg.maxiters, 10_000, Convergence, cache)
+end
+
+## _apply_step!
+
+function _apply_step!(nlsolver::NLSolver{algType,iip}, integrator) where {algType,iip}
+  if nlsolver.iter > 0
+    if iip
+      recursivecopy!(nlsolver.z, nlsolver.gz)
+    else
+      nlsolver.z = nlsolver.gz
+    end
+  end
+
+  # update statistics
+  nlsolver.iter += 1
+  if has_destats(integrator)
+    integrator.destats.nnonliniter += 1
+  end
+
+  nothing
+end
+
+## norm_of_residuals
+
+function norm_of_residuals(nlsolver::NLSolver{<:Union{NLFunctional,NLAnderson,NLNewton},true},
+                           integrator)
+  @unpack t,opts = integrator
+  @unpack z,gz,cache = nlsolver
+  @unpack dz,atmp = cache
+
+  calculate_residuals!(atmp, dz, z, gz, opts.abstol, opts.reltol, opts.internalnorm, t)
+  opts.internalnorm(atmp, t)
+end
+
+function norm_of_residuals(nlsolver::NLSolver{<:Union{NLFunctional,NLAnderson,NLNewton},false},
+                           integrator)
+  @unpack t,opts = integrator
+  @unpack z,gz,cache = nlsolver
+  @unpack dz = cache
+
+  atmp = calculate_residuals(dz, z, gz, opts.abstol, opts.reltol, opts.internalnorm, t)
+  opts.internalnorm(atmp, t)
+end
+
+## du_cache
+
+du_cache(nlcache::Union{NLFunctionalCache,NLAndersonCache,NLNewtonCache}) = (nlcache.k,)
diff --git a/src/nlsolve/functional.jl b/src/nlsolve/functional.jl
index d043b62c1..d900de618 100644
--- a/src/nlsolve/functional.jl
+++ b/src/nlsolve/functional.jl
@@ -1,321 +1,185 @@
-"""
-    nlsolve!(nlsolver::NLSolver, nlcache::Union{NLFunctionalCache,NLAndersonCache,NLFunctionalConstantCache,NLAndersonConstantCache}, integrator)
+## preamble!
 
-Perform functional iteration that is used by implicit methods.
+@muladd function initialize_cache!(nlcache::Union{NLFunctionalConstantCache,NLFunctionalCache},
+                                   nlsolver::NLSolver{<:NLFunctional}, integrator)
+  nlcache.tstep = integrator.t + nlsolver.c * integrator.dt
 
-It solves
+  nothing
+end
 
-```math
-G(z) = dt⋅f(tmp + γ⋅z, p, t + c⋅h)
-z = G(z)
-```
+@muladd function initialize_cache!(nlcache::Union{NLAndersonConstantCache,NLAndersonCache},
+                                   nlsolver::NLSolver{<:NLAnderson}, integrator)
+  nlcache.history = 0
+  nlcache.tstep = integrator.t + nlsolver.c * integrator.dt
 
-by iterating
+  nothing
+end
 
-```math
-zᵏ⁺¹ = G(zᵏ),
-```
+## apply_step!
 
-where `dt` is the step size and `γ` is a constant.
+function apply_step!(nlsolver::NLSolver{<:NLAnderson,false}, integrator)
+  @unpack iter, alg, cache = nlsolver
+  @unpack aa_start = alg
 
-It returns the tuple `z`, where `z` is the solution.
+  # perform Anderson acceleration
+  if iter == aa_start
+    # update cached values for next step of Anderson acceleration
+    cache.gzprev = nlsolver.gz
+    cache.dzprev = cache.dz
+  elseif iter > aa_start
+    # actually update the next iterate
+    nlsolver.gz = anderson(nlsolver.gz, cache, integrator)
+  end
 
-[^HW96]: Ernst Hairer and Gerhard Wanner, "Solving Ordinary Differential
-Equations II, Springer Series in Computational Mathematics. ISBN
-978-3-642-05221-7. Section IV.8.
-[doi:10.1007/978-3-642-05221-7](https://doi.org/10.1007/978-3-642-05221-7)
-"""
-@muladd function nlsolve!(nlsolver::NLSolver, nlcache::Union{NLFunctionalConstantCache,NLAndersonConstantCache}, integrator)
-  @unpack t,dt,uprev,u,p = integrator
-  @unpack z,tmp,κ,c,γ,max_iter = nlsolver
+  # apply step
+  _apply_step!(nlsolver, integrator)
 
-  if nlcache isa NLAndersonConstantCache
-    @unpack Δz₊s,Q,R,γs,aa_start,droptol = nlcache
-  end
+  nothing
+end
 
-  # precalculations
-  mass_matrix = integrator.f.mass_matrix
-  f = nlsolve_f(integrator)
-  tstep = t + c*dt
-  η = nlsolver.ηold
-  if nlcache isa NLAndersonConstantCache
-    history = 0
-    max_history = length(Δz₊s)
+function apply_step!(nlsolver::NLSolver{<:NLAnderson,true}, integrator)
+  @unpack iter, alg, cache = nlsolver
+  @unpack aa_start = alg
+
+  # perform Anderson acceleration
+  if iter == aa_start
+    # update cached values for next step of Anderson acceleration
+    @.. cache.gzprev = nlsolver.gz
+    @.. cache.dzprev = cache.dz
+  elseif iter > aa_start
+    # actually update the next iterate
+    anderson!(nlsolver.gz, cache, integrator)
   end
 
-  # fixed point iteration
-  local ndz
-  if nlcache isa NLAndersonConstantCache
-    local Δdz, dzold, z₊old # cache variables
-  end
-  fail_convergence = true
-  iter = 0
-  while iter < max_iter
-    iter += 1
-    if has_destats(integrator)
-      integrator.destats.nnonliniter += 1
-    end
+  # apply step
+  _apply_step!(nlsolver, integrator)
 
-    # evaluate function
-    u = @.. tmp + γ*z
-    if mass_matrix == I
-      z₊ = dt .* f(u, p, tstep)
-      dz = z₊ .- z
-    else
-      mz = _reshape(mass_matrix * _vec(z), axes(z))
-      dz = dt .* f(u, p, tstep) .- mz
-      z₊ = z .+ dz
-    end
-    if has_destats(integrator)
-      integrator.destats.nf += 1
-    end
+  nothing  
+end
 
-    # compute norm of residuals
-    iter > 1 && (ndzprev = ndz)
-    atmp = calculate_residuals(dz, uprev, u, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-    ndz = integrator.opts.internalnorm(atmp, t)
-
-    # check divergence (not in initial step)
-    if iter > 1
-      θ = ndz / ndzprev
-      ( diverge = θ > 1 ) && ( nlsolver.status = Divergence )
-      ( veryslowconvergence = ndz * θ^(max_iter - iter) > κ * (1 - θ) ) && ( nlsolver.status = VerySlowConvergence )
-      if diverge || veryslowconvergence
-        break
-      end
-    end
+## perform_step
 
-    # update iterate
-    z = z₊
+"""
+    perform_step!(nlsolver::NLSolver{<:Union{NLFunctional,NLAnderson}}, integrator)
 
-    # check stopping criterion
-    iter > 1 && (η = θ / (1 - θ))
-    if η * ndz < κ && (iter > 1 || iszero(ndz))
-      # fixed-point iteration converges
-      nlsolver.status = η < nlsolver.fast_convergence_cutoff ? FastConvergence : Convergence
-      fail_convergence = false
-      break
-    end
+Update `nlsolver.z` with the next iterate `g(nlsolver.z)`, where
+```math
+g(z) = dt⋅f(tmp + γ⋅z, p, t + c⋅dt).
+```
+"""
+@muladd function perform_step!(nlsolver::NLSolver{<:Union{NLFunctional,NLAnderson},false}, integrator)
+  @unpack p,dt = integrator
+  @unpack z,tmp,γ,cache = nlsolver
+  @unpack tstep = cache
 
-    # perform Anderson acceleration
-    if nlcache isa NLAndersonConstantCache && iter < max_iter
-      if iter == aa_start
-        # update cached values for next step of Anderson acceleration
-        dzold = dz
-        z₊old = z₊
-      elseif iter > aa_start
-        # increase size of history
-        history += 1
-
-        # remove oldest history if maximum size is exceeded
-        if history > max_history
-          # circularly shift differences of z₊
-          for i in 1:(max_history-1)
-            Δz₊s[i] = Δz₊s[i + 1]
-          end
-
-          # delete left-most column of QR decomposition
-          qrdelete!(Q, R, max_history)
-
-          # update size of history
-          history = max_history
-        end
-
-        # update history of differences of z₊
-        Δz₊s[history] = @.. z₊ - z₊old
-
-        # replace/add difference of residuals as right-most column to QR decomposition
-        qradd!(Q, R, _vec(dz .- dzold), history)
-
-        # update cached values
-        dzold = dz
-        z₊old = z₊
-
-        # define current Q and R matrices
-        Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
-
-        # check condition (TODO: incremental estimation)
-        if droptol !== nothing
-          while cond(R) > droptol && history > 1
-            qrdelete!(Q, R, history)
-            history -= 1
-            Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
-          end
-        end
-
-        # solve least squares problem
-        γscur = view(γs, 1:history)
-        ldiv!(Rcur, mul!(γscur, Qcur', _vec(dz)))
-        if has_destats(integrator)
-          integrator.destats.nsolve += 1
-        end
-
-        # update next iterate
-        for i in 1:history
-          z = @.. z - γs[i] * Δz₊s[i]
-        end
-
-        # update norm of residuals
-        atmp = calculate_residuals(z .- z₊ .+ dz, uprev, u, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-        ndz = integrator.opts.internalnorm(atmp, t)
-      end
-    end
+  mass_matrix = integrator.f.mass_matrix
+  f = nlsolve_f(integrator)
+
+  # evaluate function
+  ztmp = @.. tmp + γ * z
+  if mass_matrix == I
+    gz = dt .* f(ztmp, p, tstep)
+    dz = gz .- z
+  else
+    mz = _reshape(mass_matrix * _vec(z), axes(z))
+    dz = dt .* f(ztmp, p, tstep) .- mz
+    gz = z .+ dz
   end
-  if fail_convergence && has_destats(integrator)
-    integrator.destats.nnonlinconvfail += 1
+  if has_destats(integrator)
+    integrator.destats.nf += 1
   end
-  integrator.force_stepfail = fail_convergence
-  nlsolver.ηold = η
-  nlsolver.nl_iters = iter
-  return z
-end
 
-@muladd function nlsolve!(nlsolver::NLSolver, nlcache::Union{NLFunctionalCache,NLAndersonCache}, integrator)
-  @unpack t,dt,uprev,u,p = integrator
-  @unpack z,dz,tmp,ztmp,k,κ,c,γ,max_iter = nlsolver
+  # cache residuals 
+  cache.dz = dz
 
-  if nlcache isa NLFunctionalCache
-    @unpack z₊ = nlcache
-  else
-    @unpack z₊,dzold,z₊old,Δz₊s,Q,R,γs,aa_start,droptol = nlcache
-  end
+  # save next step
+  nlsolver.gz = gz
+
+  nothing
+end
+
+@muladd function perform_step!(nlsolver::NLSolver{<:Union{NLFunctional,NLAnderson},true}, integrator)
+  @unpack p,dt = integrator
+  @unpack z,gz,tmp,γ,cache = nlsolver
+  @unpack dz,tstep,k = cache
 
-  # precalculations
-  vecztmp = vec(ztmp); vecz = vec(z); vecz₊ = vec(z₊)
   mass_matrix = integrator.f.mass_matrix
   f = nlsolve_f(integrator)
-  tstep = t + c*dt
-  η = nlsolver.ηold
-  if nlcache isa NLAndersonCache
-    history = 0
-    max_history = length(Δz₊s)
+
+  # use gz as temporary variable
+  ztmp = gz
+
+  # evaluate function
+  @.. ztmp = tmp + γ * z
+  f(k, ztmp, p, tstep)
+  if has_destats(integrator)
+    integrator.destats.nf += 1
+  end
+  
+  if mass_matrix == I
+    @.. gz = dt * k
+    @.. dz = gz - z
+  else
+    mul!(vec(ztmp), mass_matrix, vec(z))
+    @.. dz = dt * k - ztmp
+    @.. gz = z + dz
   end
 
-  # fixed-point iteration without Newton
-  local ndz
-  fail_convergence = true
-  iter = 0
-  while iter < max_iter
-    iter += 1
-    if has_destats(integrator)
-      integrator.destats.nnonliniter += 1
-    end
+  nothing
+end
 
-    # evaluate function
-    @.. u = tmp + γ*z
-    f(k, u, p, tstep)
-    if has_destats(integrator)
-      integrator.destats.nf += 1
-    end
-    if mass_matrix == I
-      @.. z₊ = dt*k
-      @.. dz = z₊ - z
-    else
-      mul!(vecztmp, mass_matrix, vecz)
-      @.. dz = dt*k - ztmp
-      @.. z₊ = z + dz
-    end
+## resize!
 
-    # compute norm of residuals
-    iter > 1 && (ndzprev = ndz)
-    calculate_residuals!(ztmp, dz, uprev, u, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-    ndz = integrator.opts.internalnorm(ztmp, t)
-
-    # check divergence (not in initial step)
-    if iter > 1
-      θ = ndz / ndzprev
-      ( diverge = θ > 1 ) && ( nlsolver.status = Divergence )
-      ( veryslowconvergence = ndz * θ^(max_iter - iter) > κ * (1 - θ) ) && ( nlsolver.status = VerySlowConvergence )
-      if diverge || veryslowconvergence
-        break
-      end
-    end
+function Base.resize!(nlcache::NLFunctionalCache, i::Int)
+  resize!(nlcache.dz, i)
+  resize!(nlcache.k, i)
+  resize!(nlcache.atmp, i)
+  nothing
+end
 
-    # update iterate
-    @.. z = z₊
+function Base.resize!(nlcache::Union{NLAndersonCache,NLAndersonConstantCache}, nlsolver::NLSolver{<:NLAnderson},
+  integrator, i::Int)
+  resize!(nlcache, nlsolver.alg, i)
+end
 
-    # check stopping criterion
-    iter > 1 && (η = θ / (1 - θ))
-    if η * ndz < κ && (iter > 1 || iszero(ndz))
-      # fixed-point iteration converges
-      nlsolver.status = η < nlsolver.fast_convergence_cutoff ? FastConvergence : Convergence
-      fail_convergence = false
-      break
-    end
+function Base.resize!(nlcache::NLAndersonCache, nlalg::NLAnderson, i::Int)
+  @unpack gzprev,Δgzs = nlcache
 
-    # perform Anderson acceleration
-    if nlcache isa NLAndersonCache && iter < max_iter
-      if iter == aa_start
-        # update cached values for next step of Anderson acceleration
-        @.. dzold = dz
-        @.. z₊old = z₊
-      elseif iter > aa_start
-        # increase size of history
-        history += 1
-
-        # remove oldest history if maximum size is exceeded
-        if history > max_history
-          # circularly shift differences of z₊
-          ptr = Δgs[1]
-          for i in 1:(max_history-1)
-            Δz₊s[i] = Δz₊s[i + 1]
-          end
-          Δz₊s[max_history] = ptr
-
-          # delete left-most column of QR decomposition
-          qrdelete!(Q, R, max_history)
-
-          # update size of history
-          history = max_history
-        end
-
-        # update history of differences of z₊
-        @.. Δz₊s[history] = z₊ - z₊old
-
-        # replace/add difference of residuals as right-most column to QR decomposition
-        @.. dzold = dz - dzold
-        qradd!(Q, R, vec(dzold), history)
-
-        # update cached values
-        @.. dzold = dz
-        @.. z₊old = z₊
-
-        # define current Q and R matrices
-        Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
-
-        # check condition (TODO: incremental estimation)
-        if droptol !== nothing
-          while cond(R) > droptol && history > 1
-            qrdelete!(Q, R, history)
-            history -= 1
-            Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
-          end
-        end
-
-        # solve least squares problem
-        γscur = view(γs, 1:history)
-        ldiv!(Rcur, mul!(γscur, Qcur', vec(dz)))
-        if has_destats(integrator)
-          integrator.destats.nsolve += 1
-        end
-
-        # update next iterate
-        for i in 1:history
-          @.. z = z - γs[i] * Δz₊s[i]
-        end
-
-        # update norm of residuals
-        @.. dz = z - z₊ + dz
-        calculate_residuals!(ztmp, dz, uprev, u, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-        ndz = integrator.opts.internalnorm(ztmp, t)
-      end
+  resize!(nlcache.dz, i)
+  resize!(nlcache.k, i)
+  resize!(nlcache.atmp, i)
+  resize!(gzprev, i)
+  resize!(nlcache.dzprev, i)
+
+  # update history of Anderson cache
+  max_history_old = length(Δgzs)
+  resize_anderson_history!(nlcache, nlalg, i)
+
+  max_history = length(Δgzs)
+  if max_history > max_history_old
+    for i in (max_history_old + 1):max_history
+      Δgzs[i] = zero(gzprev)
     end
   end
-  if fail_convergence && has_destats(integrator)
-    integrator.destats.nnonlinconvfail += 1
-  end
-  integrator.force_stepfail = fail_convergence
-  nlsolver.ηold = η
-  nlsolver.nl_iters = iter
-  return z
+
+  nothing
 end
+
+Base.resize!(nlcache::NLAndersonConstantCache, nlalg::NLAnderson, i::Int) =
+  resize_anderson_history!(nlcache, nlalg, i)
+
+function resize_anderson_history!(nlcache, nlalg::NLAnderson, i::Int)
+  # determine new maximum history
+  max_history_old = length(nlcache.Δgzs)
+  max_history = min(nlalg.max_history, nlalg.maxiters, i)
+
+  resize!(nlcache.γs, max_history)
+  resize!(nlcache.Δgzs, max_history)
+
+  if max_history != max_history_old
+    nlcache.Q = typeof(nlcache.Q)(undef, i, max_history)
+    nlcache.R = typeof(nlcache.R)(undef, max_history, max_history)
+  end
+
+  nothing
+end
\ No newline at end of file
diff --git a/src/nlsolve/interface.jl b/src/nlsolve/interface.jl
new file mode 100644
index 000000000..c241bf2fe
--- /dev/null
+++ b/src/nlsolve/interface.jl
@@ -0,0 +1,85 @@
+## accessors
+function nlsolve_f end
+
+get_status(nlsolver::AbstractNLSolver) = nlsolver.status
+
+nlsolvefail(nlsolver::AbstractNLSolver) = nlsolvefail(get_status(nlsolver))
+nlsolvefail(nlstatus::NLStatus) = Int8(nlstatus) < 0
+
+get_cache(nlsolver::AbstractNLSolver) = nlsolver.cache
+
+get_new_W(nlsolver::AbstractNLSolver)::Bool = get_new_W(get_cache(nlsolver))
+get_new_W(nlcache::AbstractNLSolverCache)::Bool = nlcache.new_W
+
+set_new_W!(nlsolver::AbstractNLSolver, val::Bool)::Bool = set_new_W!(get_cache(nlsolver), val)
+set_new_W!(nlcache::AbstractNLSolverCache, val::Bool)::Bool = (nlcache.new_W = val; val)
+
+get_W(nlsolver::AbstractNLSolver) = get_W(get_cache(nlsolver))
+get_W(nlcache::AbstractNLSolverCache) = nlcache.W
+
+set_W!(nlsolver::AbstractNLSolver, W) = set_W!(get_cache(nlsolver), W)
+set_W!(nlcache::AbstractNLSolverCache, W) = (nlcache.W = W; nothing)
+
+get_W_dt(nlsolver::AbstractNLSolver) = get_W_dt(get_cache(nlsolver))
+get_W_dt(nlcache::AbstractNLSolverCache) = nlcache.W_dt
+
+set_W_dt!(nlsolver::AbstractNLSolver, W_dt) = set_W_dt!(get_cache(nlsolver), W_dt)
+set_W_dt!(nlcache::AbstractNLSolverCache, W_dt) = (nlcache.W_dt = W_dt; nothing)
+
+get_linsolve(nlsolver::AbstractNLSolver) = get_linsolve(get_cache(nlsolver))
+get_linsolve(nlcache::AbstractNLSolverCache) = nlcache.linsolve
+
+du_cache(nlsolver::AbstractNLSolver) = du_cache(get_cache(nlsolver))
+du_cache(::AbstractNLSolverCache) = nothing
+
+function get_nlsolver(integrator::DEIntegrator)
+  isdefined(integrator.cache, :nlsolver) || return
+  
+  integrator.cache.nlsolver
+end
+
+## traits
+
+isnewton(::AbstractNLSolver) = false
+
+## build
+
+function build_uf end
+function build_jac_config end
+function build_J_W end
+
+## resize
+
+function resize_jac_config! end
+function resize_J! end
+function resize_W! end
+
+function resize_nlsolver!(integrator::DEIntegrator, i::Int)
+  nlsolver = get_nlsolver(integrator)
+ 
+  nlsolver === nothing && return
+
+  if nlsolver isa AbstractArray
+    for idx in eachindex(nlsolver)
+      resize!(nlsolver[idx], integrator, i)
+    end
+  else
+    resize!(nlsolver, integrator, i)
+  end
+
+  nothing
+end
+
+function Base.resize!(nlsolver::AbstractNLSolver, integrator, i::Int)
+  @unpack z,gz,tmp = nlsolver
+
+  resize!(z, i)
+  resize!(gz, i)
+  resize!(tmp, i)
+
+  resize!(get_cache(nlsolver), nlsolver, integrator, i)
+end
+
+## default: dispatch only on the cache
+Base.resize!(cache::AbstractNLSolverCache, nlsolver, integrator, i::Int) =
+  Base.resize!(cache, i)
\ No newline at end of file
diff --git a/src/nlsolve/newton.jl b/src/nlsolve/newton.jl
index 229b1fa66..0c3d09880 100644
--- a/src/nlsolve/newton.jl
+++ b/src/nlsolve/newton.jl
@@ -1,36 +1,60 @@
+## initial_η
+
+initial_η(nlsolver::NLSolver{NLNewton}, integrator) =
+  max(nlsolver.η, eps(eltype(integrator.opts.reltol)))^(0.8)
+
+## preamble!
+
+@muladd function initialize_cache!(nlcache::NLNewtonConstantCache,
+                                   nlsolver::NLSolver{<:NLNewton,false}, integrator)
+  @unpack dt = integrator
+
+  nlcache.invγdt = inv(dt * nlsolver.γ)
+  nlcache.tstep = integrator.t + nlsolver.c * dt 
+
+  nothing
+end
+
+@muladd function initialize_cache!(nlcache::NLNewtonCache,
+                                   nlsolver::NLSolver{<:NLNewton,true}, integrator)
+  @unpack u,uprev,t,dt,opts = integrator
+  @unpack weight = nlcache
+
+  nlcache.invγdt = inv(dt * nlsolver.γ)
+  nlcache.tstep = integrator.t + nlsolver.c * dt 
+  calculate_residuals!(weight, fill!(weight, one(eltype(u))), uprev, u,
+                       opts.abstol, opts.reltol, opts.internalnorm, t)
+  
+  nothing
+end
+
+## perform_step!
+
 """
-    nlsolve!(nlsolver::NLSolver, nlcache::Union{NLNewtonCache,NLNewtonConstantCache}, integrator)
+    perform_step!(nlsolver::NLSolver{<:NLNewton}, integrator)
 
-Perform numerically stable modified Newton iteration that is specialized for implicit
-methods (see [^HS96] and [^HW96]).
+Compute next iterate of numerically stable modified Newton iteration
+that is specialized for implicit methods (see [^HS96] and [^HW96]).
 
 It solves
-
 ```math
-G(z) = dt⋅f(tmp + γ⋅z, p, t + c⋅h) - z = 0
+z = dt⋅f(tmp + γ⋅z, p, t + c⋅dt)
 ```
-
 by iterating
-
 ```math
-(I + (dt⋅γ)J) Δᵏ = dt*f(tmp + γ⋅zᵏ, p, t + c⋅h) - zᵏ
-zᵏ⁺¹ = zᵏ + Δᵏ
+(I + (dt⋅γ)J) Δᵏ = dt*f(tmp + γ⋅zᵏ, p, t + c⋅dt) - zᵏ
+zᵏ⁺¹ = g(zᵏ) = zᵏ - Δᵏ
 ```
-
 or, by utilizing a transformation,
-
 ```math
-W Δᵏ = f(tmp + γ⋅zᵏ, p, t + c⋅h)/γ - zᵏ/(dt⋅γ)
-zᵏ⁺¹ = zᵏ + Δᵏ/(dt⋅γ)
+W Δᵏ = f(tmp + γ⋅zᵏ, p, t + c⋅dt)/γ - zᵏ/(dt⋅γ)
+zᵏ⁺¹ = g(zᵏ) = zᵏ - Δᵏ/(dt⋅γ)
 ```
-
-where `W = M/(dt⋅γ) - J`, `M` is the mass matrix, `dt` is the step size, `γ` is
-a constant, `J` is the Jacobian matrix. This transformation occurs since `c*J` is
+where `W = M/(dt⋅γ) - J`, `M` is the mass matrix, `dt` is the step size, `γ` and
+`c` are constants, `J` is the Jacobian matrix. This transformation occurs since `c*J` is
 O(n^2), while `c*M` is usually much sparser. In the most common case, `M=I`, we
 have that `c*M` is O(1) for `I isa UniformScaling`.
 
-This returns `z`, where `z` is the solution.
-
 [^HS96]: M.E.Hoseaa and L.F.Shampine, "Analysis and implementation of TR-BDF2",
 Applied Numerical Mathematics, Volume 20, Issues 1–2, February 1996, Pages
 21-37.
@@ -41,166 +65,96 @@ Equations II, Springer Series in Computational Mathematics. ISBN
 978-3-642-05221-7. Section IV.8.
 [doi:10.1007/978-3-642-05221-7](https://doi.org/10.1007/978-3-642-05221-7)
 """
-@muladd function nlsolve!(nlsolver::NLSolver, nlcache::NLNewtonConstantCache, integrator)
-  @unpack t,dt,uprev,u,p = integrator
-  @unpack z,tmp,κ,c,γ,max_iter = nlsolver
-  W = nlcache.W
+@muladd function perform_step!(nlsolver::NLSolver{<:NLNewton,false}, integrator)
+  @unpack p,dt = integrator
+  @unpack z,tmp,γ,cache = nlsolver
+  @unpack tstep,W,invγdt = cache
 
   # precalculations
   mass_matrix = integrator.f.mass_matrix
   f = nlsolve_f(integrator)
-  invγdt = inv(dt*γ)
-  tstep = t + c*dt
-  η = max(nlsolver.ηold,eps(eltype(integrator.opts.reltol)))^(0.8)
-
-  # Newton iteration
-  local ndz
-  fail_convergence = true
-  iter = 0
-  while iter < max_iter
-    iter += 1
-    if has_destats(integrator)
-      integrator.destats.nnonliniter += 1
-    end
-
-    # evaluate function
-    u = @.. tmp + γ * z
-    if mass_matrix === I
-      ztmp = (dt .* f(u, p, tstep) .- z) .* invγdt
-    else
-      ztmp = (dt .* f(u, p, tstep) .- mass_matrix * z) .* invγdt
-    end
-    if has_destats(integrator)
-      integrator.destats.nf += 1
-    end
-
-    dz = _reshape(W \ _vec(ztmp), axes(ztmp))
-
-    if has_destats(integrator)
-      integrator.destats.nsolve += 1
-    end
-
-    # compute norm of residuals
-    iter > 1 && (ndzprev = ndz)
-    atmp = calculate_residuals(dz, uprev, u, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-    ndz = integrator.opts.internalnorm(atmp, t)
-
-    # check divergence (not in initial step)
-    if iter > 1
-      θ = ndz / ndzprev
-      ( diverge = θ > 1 ) && ( nlsolver.status = Divergence )
-      ( veryslowconvergence = ndz * θ^(max_iter - iter) > κ * (1 - θ) ) && ( nlsolver.status = VerySlowConvergence )
-      if diverge || veryslowconvergence
-        # Newton method diverges
-        break
-      end
-    end
-
-    # update solution
-    z = @.. z - dz
-
-    # check stopping criterion
-    iter > 1 && (η = θ / (1 - θ))
-    if η * ndz < κ && (iter > 1 || iszero(ndz) || !iszero(integrator.success_iter))
-      # Newton method converges
-      nlsolver.status = η < nlsolver.fast_convergence_cutoff ? FastConvergence : Convergence
-      fail_convergence = false
-      break
-    end
+
+  # evaluate function
+  ztmp = @.. tmp + γ * z
+  if mass_matrix === I
+    ztmp = (dt .* f(ztmp, p, tstep) .- z) .* invγdt
+  else
+    ztmp = (dt .* f(ztmp, p, tstep) .- mass_matrix * z) .* invγdt
+  end
+  if has_destats(integrator)
+    integrator.destats.nf += 1
   end
 
-  if fail_convergence && has_destats(integrator)
-    integrator.destats.nnonlinconvfail += 1
+  dz = _reshape(W \ _vec(ztmp), axes(ztmp))
+  if has_destats(integrator)
+    integrator.destats.nsolve += 1
   end
-  integrator.force_stepfail = fail_convergence
-  nlsolver.ηold = η
-  nlsolver.nl_iters = iter
-  return z
-end
 
-@muladd function nlsolve!(nlsolver::NLSolver, nlcache::NLNewtonCache, integrator)
-  @unpack t,dt,uprev,u,p,cache = integrator
-  @unpack z,dz,tmp,ztmp,k,κ,c,γ,max_iter,weight = nlsolver
-  @unpack W, new_W, W_dt = nlcache
-  cache = unwrap_cache(integrator, true)
-  calculate_residuals!(weight, fill!(weight, one(eltype(u))), uprev, u, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-  lintol = integrator.opts.reltol
+  cache.dz = dz
+  nlsolver.gz = @.. z - dz
 
-  # precalculations
+  nothing
+end
+
+@muladd function perform_step!(nlsolver::NLSolver{<:NLNewton,true}, integrator)
+  @unpack p,dt = integrator
+  @unpack z,gz,tmp,γ,iter,cache = nlsolver
+  @unpack dz,tstep,k,W,new_W,linsolve,weight,invγdt = cache
+  
   mass_matrix = integrator.f.mass_matrix
   f = nlsolve_f(integrator)
-  invγdt = inv(dt*γ)
-  vecztmp = vec(ztmp); vecz = vec(z); vecdz = vec(dz)
-  tstep = t + c*dt
-  η = max(nlsolver.ηold,eps(eltype(integrator.opts.reltol)))^(0.8)
-
-  # Newton iteration
-  local ndz
-  fail_convergence = true
-  iter = 0
-  while iter < max_iter
-    iter += 1
-    if has_destats(integrator)
-      integrator.destats.nnonliniter += 1
-    end
-
-    # evaluate function
-    @.. dz = tmp + γ*z
-    f(k, dz, p, tstep)
-    if has_destats(integrator)
-      integrator.destats.nf += 1
-    end
-    if mass_matrix === I
-      @.. ztmp = (dt*k - z) * invγdt
-    else
-      mul!(vecztmp,mass_matrix,vecz)
-      @.. ztmp = (dt*k - ztmp) * invγdt
-    end
-
-    if W isa AbstractDiffEqLinearOperator
-      update_coefficients!(W,dz,p,tstep)
-    end
-    nlsolver.linsolve(vecdz,W,vecztmp,iter == 1 && new_W; Pl=ScaleVector(weight, true), Pr=ScaleVector(weight, false), tol=lintol)
-
-    if has_destats(integrator)
-      integrator.destats.nsolve += 1
-    end
-
-    # compute norm of residuals
-    iter > 1 && (ndzprev = ndz)
-    #W_dt != dt && (rmul!(dz, 2/(1 + dt / W_dt))) # relaxation
-    @.. ztmp = tmp + γ*z
-    calculate_residuals!(ztmp, dz, uprev, ztmp, integrator.opts.abstol, integrator.opts.reltol, integrator.opts.internalnorm, t)
-    ndz = integrator.opts.internalnorm(ztmp, t)
-
-    # check divergence (not in initial step)
-    if iter > 1
-      θ = ndz / ndzprev
-      ( diverge = θ > 1 ) && ( nlsolver.status = Divergence )
-      ( veryslowconvergence = ndz * θ^(max_iter - iter) > κ * (1 - θ) ) && ( nlsolver.status = VerySlowConvergence )
-      if diverge || veryslowconvergence
-        break
-      end
-    end
-
-    # update solution
-    @.. z = z - dz
-
-    # check stopping criterion
-    iter > 1 && (η = θ / (1 - θ))
-    if η * ndz < κ && (iter > 1 || iszero(ndz) || !iszero(integrator.success_iter))
-      # Newton method converges
-      nlsolver.status = η < nlsolver.fast_convergence_cutoff ? FastConvergence : Convergence
-      fail_convergence = false
-      break
-    end
+
+  # use gz as temporary variable
+  ztmp = gz
+
+  # evaluate function
+  @.. ztmp = tmp + γ * z
+  if W isa AbstractDiffEqLinearOperator
+    update_coefficients!(W, ztmp, p, tstep)
+  end
+
+  f(k, ztmp, p, tstep)
+  if has_destats(integrator)
+    integrator.destats.nf += 1
+  end
+
+  if mass_matrix === I
+    @.. ztmp = (dt * k - z) * invγdt
+  else
+    mul!(vec(ztmp), mass_matrix, vec(z))
+    @.. ztmp = (dt * k - ztmp) * invγdt
   end
 
-  if fail_convergence && has_destats(integrator)
-    integrator.destats.nnonlinconvfail += 1
+  linsolve(vec(dz), W, vec(ztmp), iter == 1 && new_W; Pl=ScaleVector(weight, true), Pr=ScaleVector(weight, false), tol=integrator.opts.reltol)
+  if has_destats(integrator)
+    integrator.destats.nsolve += 1
   end
-  integrator.force_stepfail = fail_convergence
-  nlsolver.ηold = η
-  nlsolver.nl_iters = iter
-  return z
+
+  # compute next iterate
+  @.. gz = z - dz
+
+  nothing
 end
+
+## traits
+
+isnewton(::NLSolver{<:NLNewton}) = true
+
+## resize!
+
+function Base.resize!(nlcache::NLNewtonCache, nlsolver, integrator, i::Int)
+  resize!(nlcache.dz, i)
+  resize!(nlcache.k, i)
+  resize!(nlcache.atmp, i)
+  resize!(nlcache.du1, i)
+  if jac_config !== nothing
+    nlcache.jac_config = resize_jac_config!(nlcache.jac_config, i)
+  end
+  resize!(nlcache.weight, i)
+
+  # have to be implemented for integrator
+  resize_J!(nlcache, integrator, i)
+  resize_W!(nlcache, integrator, i)
+
+  nothing
+end
\ No newline at end of file
diff --git a/src/nlsolve/nlsolve.jl b/src/nlsolve/nlsolve.jl
new file mode 100644
index 000000000..f316ee700
--- /dev/null
+++ b/src/nlsolve/nlsolve.jl
@@ -0,0 +1,116 @@
+"""
+    nlsolve!(nlsolver::AbstractNLSolver, integrator)
+
+Solve
+```math
+dt⋅f(tmp + γ⋅z, p, t + c⋅dt) = z
+```
+where `dt` is the step size and `γ` and `c` are constants, and return the solution `z`.
+"""
+function nlsolve!(nlsolver::AbstractNLSolver, integrator)
+  preamble!(nlsolver, integrator)
+
+  while get_status(nlsolver) === SlowConvergence
+    # (possibly modify and) accept step
+    apply_step!(nlsolver, integrator)
+
+    # compute next iterate
+    perform_step!(nlsolver, integrator)
+  
+    # check convergence and divergence criteria
+    check_status!(nlsolver, integrator)
+  end
+
+  postamble!(nlsolver, integrator)
+end
+
+## default implementations
+
+function preamble!(nlsolver::AbstractNLSolver, integrator)
+  nlsolver.iter = 0
+  if nlsolver.maxiters == 0
+    nlsolver.status = MaxItersReached
+    return
+  end
+  
+  nlsolver.status = SlowConvergence
+  nlsolver.η = initial_η(nlsolver, integrator)
+
+  initialize_cache!(nlsolver.cache, nlsolver, integrator)
+
+  nothing
+end
+
+initial_η(nlsolver::AbstractNLSolver, integrator) = nlsolver.η
+
+initialize_cache!(nlcache, nlsolver::AbstractNLSolver, integrator) = nothing
+
+apply_step!(nlsolver::AbstractNLSolver, integrator) = _apply_step!(nlsolver, integrator)
+
+function check_status!(nlsolver::AbstractNLSolver, integrator)
+  nlsolver.status = check_status(nlsolver, integrator)
+  nothing
+end
+
+function check_status(nlsolver::AbstractNLSolver, integrator)
+  @unpack iter,maxiters,κ,fast_convergence_cutoff = nlsolver
+
+  # compute norm of residuals and cache previous value
+  iter > 1 && (ndzprev = nlsolver.ndz)
+  ndz = norm_of_residuals(nlsolver, integrator)
+  nlsolver.ndz = ndz
+
+  # check for convergence
+  if iter > 1
+    Θ = ndz / ndzprev
+    η = Θ / (1 - Θ)
+    nlsolver.η = η
+  else
+    η = nlsolver.η
+  end
+  if iszero(ndz) || (η * ndz < κ && (iter > 1 || !iszero(integrator.success_iter)))
+    if η < nlsolver.fast_convergence_cutoff
+      return FastConvergence
+    else
+      return Convergence
+    end
+  end
+
+  # check for divergence (not in initial step)
+  if iter > 1
+    # divergence
+    if Θ > 1
+      return Divergence
+    end
+
+    # very slow convergence
+    if ndz * Θ^(maxiters - iter) > κ * (1 - Θ)
+      return VerySlowConvergence
+    end
+  end
+
+  # check number of iterations
+  if iter >= maxiters
+    return MaxItersReached
+  end
+
+  SlowConvergence
+end  
+
+function norm_of_residuals(nlsolver::AbstractNLSolver, integrator)
+  @unpack t,opts = integrator
+  @unpack z,gz = nlsolver
+
+  atmp = calculate_residuals(z, gz, opts.abstol, opts.reltol, opts.internalnorm, t)
+  opts.internalnorm(atmp, t)
+end
+
+function postamble!(nlsolver::AbstractNLSolver, integrator)
+  fail_convergence = nlsolvefail(nlsolver)
+  if fail_convergence && has_destats(integrator)
+      integrator.destats.nnonlinconvfail += 1
+  end
+  integrator.force_stepfail = fail_convergence
+
+  nlsolver.z
+end
diff --git a/src/nlsolve/type.jl b/src/nlsolve/type.jl
index 35254c6cc..1f7ec491e 100644
--- a/src/nlsolve/type.jl
+++ b/src/nlsolve/type.jl
@@ -1,108 +1,151 @@
-abstract type AbstractNLSolverAlgorithm end
-abstract type AbstractNLSolverCache end
-
-@enum NLStatus::Int8 begin
-  FastConvergence     = 2
-  Convergence         = 1
-  SlowConvergence     = 0
-  VerySlowConvergence = -1
-  Divergence          = -2
-end
+## algorithms
 
-# solver
-
-mutable struct NLSolver{iip,uType,rateType,uTolType,kType,gType,cType,du1Type,ufType,jcType,lsType,C1,C<:AbstractNLSolverCache}
-  z::uType
-  dz::uType
-  tmp::uType
-  ztmp::uType
-  k::rateType
-  ηold::uTolType
-  κ::kType
-  γ::gType
-  c::cType
-  max_iter::Int
-  nl_iters::Int
-  status::NLStatus
-  fast_convergence_cutoff::C1
-  du1::du1Type
-  uf::ufType
-  jac_config::jcType
-  linsolve::lsType
-  weight::uType
-  cache::C
-end
-
-# algorithms
+abstract type AbstractNLSolverAlgorithm end
 
 struct NLFunctional{K,C} <: AbstractNLSolverAlgorithm
   κ::K
   fast_convergence_cutoff::C
-  max_iter::Int
+  maxiters::Int
 end
 
-NLFunctional(; κ=1//100, max_iter=10, fast_convergence_cutoff=1//5) = NLFunctional(κ, fast_convergence_cutoff, max_iter)
+NLFunctional(; κ=1//100, maxiters=10, fast_convergence_cutoff=1//5) = NLFunctional(κ, fast_convergence_cutoff, maxiters)
 
 struct NLAnderson{K,D,C} <: AbstractNLSolverAlgorithm
   κ::K
   fast_convergence_cutoff::C
-  max_iter::Int
+  maxiters::Int
   max_history::Int
   aa_start::Int
   droptol::D
 end
 
-NLAnderson(; κ=1//100, max_iter=10, max_history::Int=5, aa_start::Int=1, droptol=nothing, fast_convergence_cutoff=1//5) =
-  NLAnderson(κ, fast_convergence_cutoff, max_iter, max_history, aa_start, droptol)
+NLAnderson(; κ=1//100, maxiters=10, max_history::Int=10, aa_start::Int=1, droptol=1e10, fast_convergence_cutoff=1//5) =
+  NLAnderson(κ, fast_convergence_cutoff, maxiters, max_history, aa_start, droptol)
 
 struct NLNewton{K,C1,C2} <: AbstractNLSolverAlgorithm
   κ::K
-  max_iter::Int
+  maxiters::Int
   fast_convergence_cutoff::C1
   new_W_dt_cutoff::C2
 end
 
-NLNewton(; κ=1//100, max_iter=10, fast_convergence_cutoff=1//5, new_W_dt_cutoff=1//5) = NLNewton(κ, max_iter, fast_convergence_cutoff, new_W_dt_cutoff)
+NLNewton(; κ=1//100, maxiters=10, fast_convergence_cutoff=1//5, new_W_dt_cutoff=1//5) =
+  NLNewton(κ, maxiters, fast_convergence_cutoff, new_W_dt_cutoff)
 
-# caches
+## status
 
-mutable struct NLNewtonCache{W,J,T,C} <: AbstractNLSolverCache
-  new_W::Bool
-  W::W
-  J::J
-  W_dt::T
-  new_W_dt_cutoff::C
+@enum NLStatus::Int8 begin
+  FastConvergence     = 2
+  Convergence         = 1
+  SlowConvergence     = 0
+  VerySlowConvergence = -1
+  Divergence          = -2
+  MaxItersReached      = -3
 end
 
-mutable struct NLNewtonConstantCache{W,J,C} <: AbstractNLSolverCache
-  W::W
-  J::J
-  new_W_dt_cutoff::C
+## NLsolver
+
+abstract type AbstractNLSolver end
+
+mutable struct NLSolver{algType<:AbstractNLSolverAlgorithm,IIP,uType,uTolType,tTypeNoUnits,C} <: AbstractNLSolver
+  """current solution"""
+  z::uType
+  """value `g(z)` of Newton or fixed-point iteration"""
+  gz::uType
+  tmp::uType
+  γ::uTolType
+  c::tTypeNoUnits
+  alg::algType
+  κ::uTolType
+  η::uTolType
+  ndz::uTolType
+  fast_convergence_cutoff::uTolType
+  iter::Int
+  maxiters::Int
+  status::NLStatus
+  cache::C
 end
 
-struct NLFunctionalCache{uType} <: AbstractNLSolverCache
-  z₊::uType
+## caches
+
+abstract type AbstractNLSolverCache end
+
+mutable struct NLFunctionalCache{uType,tType,rateType,uNoUnitsType} <: AbstractNLSolverCache
+  """residuals `g(z) - z` of fixed-point iteration"""
+  dz::uType
+  tstep::tType
+  k::rateType
+  atmp::uNoUnitsType
 end
 
-struct NLFunctionalConstantCache <: AbstractNLSolverCache end
+mutable struct NLFunctionalConstantCache{uType,tType} <: AbstractNLSolverCache
+  """residuals `g(z) - z` of fixed-point iteration"""
+  dz::uType
+  tstep::tType
+end
 
-mutable struct NLAndersonCache{uType,gsType,QType,RType,gType,D} <: AbstractNLSolverCache
-  z₊::uType
-  dzold::uType
-  z₊old::uType
-  Δz₊s::gsType
-  Q::QType
-  R::RType
-  γs::gType
-  aa_start::Int
+mutable struct NLAndersonCache{uType,tType,rateType,uNoUnitsType,uEltypeNoUnits,D} <: AbstractNLSolverCache
+  """residuals of `g(z) - z` of fixed-point iteration"""
+  dz::uType
+  tstep::tType
+  k::rateType
+  atmp::uNoUnitsType
+  """value `g(zprev)` of previous fixed-point iteration"""
+  gzprev::uType
+  """residuals `g(zprev) - zprev` of previous fixed-point iteration"""
+  dzprev::uType
+  Δgzs::Vector{uType}
+  Q::Matrix{uEltypeNoUnits}
+  R::Matrix{uEltypeNoUnits}
+  γs::Vector{uEltypeNoUnits}
+  history::Int
   droptol::D
 end
 
-mutable struct NLAndersonConstantCache{gsType,QType,RType,gType,D} <: AbstractNLSolverCache
-  Δz₊s::gsType
-  Q::QType
-  R::RType
-  γs::gType
-  aa_start::Int
+mutable struct NLAndersonConstantCache{uType,tType,uEltypeNoUnits,D} <: AbstractNLSolverCache
+  """residuals `g(z) - z` of fixed-point iteration"""
+  dz::uType
+  tstep::tType
+  """value `g(zprev)` of previous fixed-point iteration"""
+  gzprev::uType
+  """residuals `g(zprev) - zprev` of previous fixed-point iteration"""
+  dzprev::uType
+  Δgzs::Vector{uType}
+  Q::Matrix{uEltypeNoUnits}
+  R::Matrix{uEltypeNoUnits}
+  γs::Vector{uEltypeNoUnits}
+  history::Int
   droptol::D
 end
+
+mutable struct NLNewtonCache{uType,tType,rateType,uNoUnitsType,J,W,du1Type,ufType,jcType,lsType,G} <: AbstractNLSolverCache
+  """residuals `z - g(z)` of Newton iteration"""
+  dz::uType
+  tstep::tType
+  k::rateType
+  atmp::uNoUnitsType
+  J::J
+  W::W
+  new_W::Bool
+  W_dt::tType
+  du1::du1Type
+  uf::ufType
+  jac_config::jcType
+  linsolve::lsType
+  weight::uType
+  invγdt::G
+  new_W_dt_cutoff::tType
+end
+
+mutable struct NLNewtonConstantCache{uType,tType,J,W,ufType,G} <: AbstractNLSolverCache
+  """residuals `z - g(z)` of Newton iteration"""
+  dz::uType
+  tstep::tType
+  J::J
+  W::W
+  new_W::Bool
+  W_dt::tType
+  uf::ufType
+  invγdt::G
+  new_W_dt_cutoff::tType
+end
\ No newline at end of file
diff --git a/src/nlsolve/utils.jl b/src/nlsolve/utils.jl
index 53c84ea6b..407e38d04 100644
--- a/src/nlsolve/utils.jl
+++ b/src/nlsolve/utils.jl
@@ -1,3 +1,145 @@
+## Anderson acceleration
+
+"""
+    anderson(gz, cache, integrator)
+
+Return the value for the next fixed-point iteration by performing Anderson acceleration
+based on the current evaluation `gz = G(z)` of the fixed-point iteration, and the settings
+and history in the `cache`.
+"""
+@muladd function anderson(gz, cache, integrator)
+  @unpack dz,Δgzs,Q,R,γs,history,droptol = cache
+
+  # increase size of history
+  history += 1
+  
+  # remove oldest history if maximum size is exceeded
+  max_history = length(Δgzs)
+  if history > max_history
+    # circularly shift differences of G(z)
+    for i in 1:(max_history-1)
+      Δgzs[i] = Δgzs[i + 1]
+    end
+  
+    # delete left-most column of QR decomposition
+    qrdelete!(Q, R, max_history)
+  
+    # update size of history
+    history = max_history
+  end
+  
+  # update history of differences of G(z)
+  Δgzs[history] = @.. gz - cache.gzprev
+  
+  # replace/add difference of residuals as right-most column to QR decomposition
+  qradd!(Q, R, _vec(dz .- cache.dzprev), history)
+  
+  # update cached values
+  cache.dzprev = dz
+  cache.gzprev = gz
+  
+  # define current Q and R matrices
+  Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
+  
+  # check condition (TODO: incremental estimation)
+  if droptol !== nothing
+    while cond(R) > droptol && history > 1
+      qrdelete!(Q, R, history)
+      history -= 1
+      Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
+    end
+  end
+  
+  # solve least squares problem
+  γscur = view(γs, 1:history)
+  ldiv!(Rcur, mul!(γscur, Qcur', _vec(dz)))
+  if has_destats(integrator)
+    integrator.destats.nsolve += 1
+  end
+  
+  # update iterate
+  for i in 1:history
+    gz = @.. gz - γs[i] * Δgzs[i]
+  end
+  
+  # update cached values
+  cache.history = history
+
+  gz
+end
+
+"""
+    anderson!(gz, cache, integrator)
+
+Update the current evaluation `gz = G(z)` of the fixed-point iteration in-place by
+performing Anderson acceleration based on the settings and history in the `cache`.
+"""
+@muladd function anderson!(gz, cache, integrator)
+  @unpack gzprev,dz,dzprev,Δgzs,Q,R,γs,history,droptol = cache
+
+  # increase size of history
+  history += 1
+
+  # remove oldest history if maximum size is exceeded
+  max_history = length(Δgzs)
+  if history > max_history
+    # circularly shift differences of z
+    ptr = Δgzs[1]
+    for i in 1:(max_history-1)
+      Δgzs[i] = Δgzs[i + 1]
+    end
+    Δgzs[max_history] = ptr
+
+    # delete left-most column of QR decomposition
+    qrdelete!(Q, R, max_history)
+
+    # update size of history
+    history = max_history
+  end
+
+  # update history of differences of g(z)
+  @.. Δgzs[history] = gz - gzprev
+
+  # replace/add difference of residuals as right-most column to QR decomposition
+  @.. dzprev = dz - dzprev
+  qradd!(Q, R, vec(dzprev), history)
+
+  # update cached values
+  @.. dzprev = dz
+  @.. gzprev = gz
+
+  # define current Q and R matrices
+  Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
+
+  # check condition (TODO: incremental estimation)
+  if droptol !== nothing
+    while cond(R) > droptol && history > 1
+      qrdelete!(Q, R, history)
+      history -= 1
+      Qcur, Rcur = view(Q, :, 1:history), UpperTriangular(view(R, 1:history, 1:history))
+    end
+  end
+
+  # solve least squares problem
+  γscur = view(γs, 1:history)
+  ldiv!(Rcur, mul!(γscur, Qcur', vec(dz)))
+  if has_destats(integrator)
+    integrator.destats.nsolve += 1
+  end
+
+  # update next iterate
+  for i in 1:history
+    @.. gz = gz - γs[i] * Δgzs[i]
+  end
+
+  # update cached values
+  cache.history = history
+
+  nothing
+end
+
+## helpers for Anderson acceleration
+
 """
     qrdelete!(Q, R, k)
 
@@ -65,368 +207,4 @@ function qradd!(Q::AbstractMatrix, R::AbstractMatrix, v::Number, k::Int)
   Q[1, 1] = one(v)
 
   Q, R
-end
-
-get_status(nlsolver::NLSolver) = nlsolver.status
-nlsolvefail(nlsolver::NLSolver) = nlsolvefail(get_status(nlsolver))
-nlsolvefail(status::NLStatus) = Int8(status) < 0
-
-isnewton(nlsolver::NLSolver) = isnewton(nlsolver.cache)
-isnewton(nlcache::Union{NLNewtonCache,NLNewtonConstantCache}) = true
-isnewton(nlcache::AbstractNLSolverCache) = false
-
-set_new_W!(nlsolver::NLSolver, val::Bool)::Bool = set_new_W!(nlsolver.cache, val)
-set_new_W!(nlcache::NLNewtonCache, val::Bool)::Bool = nlcache.new_W = val
-set_new_W!(nlcache::AbstractNLSolverCache, val::Bool)::Bool = val
-
-get_W(nlsolver::NLSolver) = get_W(nlsolver.cache)
-get_W(nlcache::Union{NLNewtonCache,NLNewtonConstantCache}) = nlcache.W
-
-set_W!(nlsolver::NLSolver, W) = set_W!(nlsolver.cache, W)
-set_W!(nlcache::Union{NLNewtonCache,NLNewtonConstantCache}, W) = (nlcache.W = W; W)
-
-set_W_dt!(nlsolver::NLSolver, W_dt) = set_W_dt!(nlsolver.cache, W_dt)
-set_W_dt!(nlcache::NLNewtonCache, W_dt) = (nlcache.W_dt = W_dt; W_dt)
-set_W_dt!(nlcache::NLNewtonConstantCache, W_dt) = W_dt
-
-function nlsolve_f end
-function iip_get_uf end
-function oop_get_uf end
-function build_jac_config end
-function resize_jac_config! end
-
-DiffEqBase.@def iipnlsolve begin
-  @unpack κ, fast_convergence_cutoff = alg.nlsolve
-
-  # define additional fields of cache of non-linear solver
-  z = similar(u); dz = similar(u); tmp = similar(u); b = similar(u)
-  k = zero(rate_prototype)
-
-  uTolType = real(uBottomEltypeNoUnits)
-
-  # create cache of non-linear solver
-  if alg.nlsolve isa NLNewton
-    nf = nlsolve_f(f, alg)
-
-    if islinear(f)
-      # get the operator
-      J = nf.f
-      W = WOperator(f.mass_matrix, dt, J, true)
-    else
-      if DiffEqBase.has_jac(f) && !DiffEqBase.has_Wfact(f) && f.jac_prototype !== nothing
-        W = WOperator(f, dt, true)
-        J = nothing # is J = W.J better?
-      else
-        J = false .* vec(u) .* vec(u)'
-        W = similar(J)
-      end
-    end
-
-    nlcache = DiffEqBase.NLNewtonCache(true,W,J,dt,alg.nlsolve.new_W_dt_cutoff)
-  elseif alg.nlsolve isa NLFunctional
-    z₊ = similar(z)
-
-    nlcache = DiffEqBase.NLFunctionalCache(z₊)
-  elseif alg.nlsolve isa NLAnderson
-    z₊ = similar(z)
-
-    max_history = min(alg.nlsolve.max_history, alg.nlsolve.max_iter, length(z))
-    Δz₊s = [zero(z) for i in 1:max_history]
-    Q = Matrix{uEltypeNoUnits}(undef, length(z), max_history)
-    R = Matrix{uEltypeNoUnits}(undef, max_history, max_history)
-    γs = Vector{uEltypeNoUnits}(undef, max_history)
-    dzold = zero(z)
-    z₊old = zero(z)
-
-    nlcache = DiffEqBase.NLAndersonCache(z₊,dzold,z₊old,Δz₊s,Q,R,γs,alg.nlsolve.aa_start,alg.nlsolve.droptol)
-  end
-
-  # define additional fields of cache
-  fsalfirst = zero(rate_prototype)
-  if alg.nlsolve isa NLNewton
-    if islinear(f)
-      du1 = rate_prototype
-      uf = nothing
-      jac_config = nothing
-      linsolve = alg.linsolve(Val{:init},nf,u)
-    else
-      du1 = zero(rate_prototype)
-      # if the algorithm specializes on split problems the use `nf`
-      uf = DiffEqDiffTools.UJacobianWrapper(nf,t,p)
-      jac_config = build_jac_config(alg,nf,uf,du1,uprev,u,tmp,dz)
-      linsolve = alg.linsolve(Val{:init},uf,u)
-    end
-    # TODO: check if the solver is iterative
-    weight = similar(u)
-  else
-    J = nothing
-    W = nothing
-    du1 = rate_prototype
-    uf = nothing
-    jac_config = nothing
-    linsolve = nothing
-    weight = z
-  end
-
-  # create non-linear solver
-  nlsolver = NLSolver{true,typeof(z),typeof(k),uTolType,typeof(κ),typeof(γ),typeof(c),typeof(du1),typeof(uf),typeof(jac_config),typeof(linsolve),typeof(fast_convergence_cutoff),typeof(nlcache)}(z,dz,tmp,b,k,one(uTolType),κ,γ,c,alg.nlsolve.max_iter,10000,Convergence,fast_convergence_cutoff,du1,uf,jac_config,linsolve,weight,nlcache)
-end
-
-DiffEqBase.@def oopnlsolve begin
-  @unpack κ, fast_convergence_cutoff = alg.nlsolve
-
-  # define additional fields of cache of non-linear solver (all aliased)
-  z = uprev; dz = z; tmp = z; b = z; k = rate_prototype
-
-  uTolType = real(uBottomEltypeNoUnits)
-
-  # create cache of non-linear solver
-  if alg.nlsolve isa NLNewton
-    nf = nlsolve_f(f, alg)
-
-    # only use `nf` if the algorithm specializes on split eqs
-    uf = DiffEqDiffTools.UDerivativeWrapper(nf,t,p)
-
-    if islinear(f) || DiffEqBase.has_jac(f)
-      # get the operator
-      J = islinear(f) ? nf.f : f.jac(uprev, p, t)
-      if !isa(J, DiffEqBase.AbstractDiffEqLinearOperator)
-        J = DiffEqArrayOperator(J)
-      end
-      W = WOperator(f.mass_matrix, dt, J, false)
-    else
-      # https://github.com/JuliaDiffEq/OrdinaryDiffEq.jl/pull/672
-      if u isa StaticArray
-        # get a "fake" `J`
-        J = if u isa AbstractMatrix && size(u, 1) > 1 # `u` is already a matrix
-          u
-        elseif size(u, 1) == 1 # `u` is a row vector
-          vcat(u, u)
-        else # `u` is a column vector
-          hcat(u, u)
-        end
-        W = lu(J)
-      else
-        W = u isa Number ? u : LU{LinearAlgebra.lutype(uEltypeNoUnits)}(Matrix{uEltypeNoUnits}(undef, 0, 0),
-                                                                        Vector{LinearAlgebra.BlasInt}(undef, 0),
-                                                                        zero(LinearAlgebra.BlasInt))
-      end
-    end
-
-    nlcache = DiffEqBase.NLNewtonConstantCache(W,J,alg.nlsolve.new_W_dt_cutoff)
-  elseif alg.nlsolve isa NLFunctional
-    uf = nothing
-
-    nlcache = DiffEqBase.NLFunctionalConstantCache()
-  elseif alg.nlsolve isa NLAnderson
-    uf = nothing
-
-    max_history = min(alg.nlsolve.max_history, alg.nlsolve.max_iter, length(z))
-    Δz₊s = Vector{typeof(z)}(undef, max_history)
-    Q = Matrix{uEltypeNoUnits}(undef, length(z), max_history)
-    R = Matrix{uEltypeNoUnits}(undef, max_history, max_history)
-    γs = Vector{uEltypeNoUnits}(undef, max_history)
-
-    nlcache = DiffEqBase.NLAndersonConstantCache(Δz₊s,Q,R,γs,alg.nlsolve.aa_start,alg.nlsolve.droptol)
-  end
-
-  # create non-linear solver
-  nlsolver = NLSolver{false,typeof(z),typeof(k),uTolType,typeof(κ),typeof(γ),typeof(c),Nothing,typeof(uf),Nothing,Nothing,typeof(fast_convergence_cutoff),typeof(nlcache)}(z,dz,tmp,b,k,one(uTolType),κ,γ,c,alg.nlsolve.max_iter,10000,Convergence,fast_convergence_cutoff,nothing,uf,nothing,nothing,z,nlcache)
-end
-
-# No J version
-function iipnlsolve(alg,u,uprev,p,t,dt,f,W,rate_prototype,uEltypeNoUnits,uBottomEltypeNoUnits,γ,c)
-  iipnlsolve(alg,u,uprev,p,t,dt,f,W,nothing,rate_prototype,uEltypeNoUnits,uBottomEltypeNoUnits,γ,c)
-end
-
-function iipnlsolve(alg,u,uprev,p,t,dt,f,W,J,rate_prototype,uEltypeNoUnits,uBottomEltypeNoUnits,γ,c)
-  @unpack κ, fast_convergence_cutoff = alg.nlsolve
-
-  # define additional fields of cache of non-linear solver
-  z = similar(u); dz = similar(u); tmp = similar(u); b = similar(u)
-  k = zero(rate_prototype)
-
-  uTolType = real(uBottomEltypeNoUnits)
-
-  # create cache of non-linear solver
-  if alg.nlsolve isa NLNewton
-    nlcache = DiffEqBase.NLNewtonCache(true,W,J,dt,alg.nlsolve.new_W_dt_cutoff)
-  elseif alg.nlsolve isa NLFunctional
-    z₊ = similar(z)
-
-    nlcache = DiffEqBase.NLFunctionalCache(z₊)
-  elseif alg.nlsolve isa NLAnderson
-    z₊ = similar(z)
-
-    max_history = min(alg.nlsolve.max_history, alg.nlsolve.max_iter, length(z))
-    Δz₊s = [zero(z) for i in 1:max_history]
-    Q = Matrix{uEltypeNoUnits}(undef, length(z), max_history)
-    R = Matrix{uEltypeNoUnits}(undef, max_history, max_history)
-    γs = Vector{uEltypeNoUnits}(undef, max_history)
-    dzold = zero(z)
-    z₊old = zero(z)
-
-    nlcache = DiffEqBase.NLAndersonCache(z₊,dzold,z₊old,Δz₊s,Q,R,γs,alg.nlsolve.aa_start,alg.nlsolve.droptol)
-  end
-
-  # define additional fields of cache
-  if alg.nlsolve isa NLNewton
-    nf = nlsolve_f(f, alg)
-
-    if islinear(f)
-      du1 = rate_prototype
-      uf = nothing
-      jac_config = nothing
-      linsolve = alg.linsolve(Val{:init},nf,u)
-    else
-      du1 = zero(rate_prototype)
-      # if the algorithm specializes on split problems the use `nf`
-      # we pass this `alg` here just for identification purpose, because get_uf would be overloaded in different repos
-      uf = iip_get_uf(alg,nf,t,p)
-      jac_config = build_jac_config(alg,nf,uf,du1,uprev,u,tmp,dz)
-      linsolve = alg.linsolve(Val{:init},uf,u)
-    end
-    # TODO: check if the solver is iterative
-    weight = similar(u)
-  else
-    du1 = rate_prototype
-    uf = nothing
-    jac_config = nothing
-    linsolve = nothing
-    weight = z
-  end
-
-  # create non-linear solver
-  nlsolver = NLSolver{true,typeof(z),typeof(k),uTolType,typeof(κ),typeof(γ),typeof(c),typeof(du1),typeof(uf),typeof(jac_config),typeof(linsolve),typeof(fast_convergence_cutoff),typeof(nlcache)}(z,dz,tmp,b,k,one(uTolType),κ,γ,c,alg.nlsolve.max_iter,10000,Convergence,fast_convergence_cutoff,du1,uf,jac_config,linsolve,weight,nlcache)
-end
-
-DiffEqBase.@def getiipnlsolvefields begin
-  @unpack z,dz,tmp,k,uf,du1,jac_config,linsolve = nlsolver
-  b = nlsolver.ztmp
-  fsalfirst = zero(rate_prototype)
-end
-
-# No J version
-function oopnlsolve(alg,u,uprev,p,t,dt,f,W,rate_prototype,uEltypeNoUnits,uBottomEltypeNoUnits,γ,c)
-  oopnlsolve(alg,u,uprev,p,t,dt,f,W,nothing,rate_prototype,uEltypeNoUnits,uBottomEltypeNoUnits,γ,c)  
-end
-
-function oopnlsolve(alg,u,uprev,p,t,dt,f,W,J,rate_prototype,uEltypeNoUnits,uBottomEltypeNoUnits,γ,c)
-  @unpack κ, fast_convergence_cutoff = alg.nlsolve
-
-  # define additional fields of cache of non-linear solver (all aliased)
-  z = uprev; dz = z; tmp = z; b = z; k = rate_prototype
-
-  uTolType = real(uBottomEltypeNoUnits)
-
-  # create cache of non-linear solver
-  if alg.nlsolve isa NLNewton
-    nf = nlsolve_f(f, alg)
-    # only use `nf` if the algorithm specializes on split eqs
-    uf = oop_get_uf(alg,nf,t,p)
-    nlcache = DiffEqBase.NLNewtonConstantCache(W,J,alg.nlsolve.new_W_dt_cutoff)
-  elseif alg.nlsolve isa NLFunctional
-    uf = nothing
-    nlcache = DiffEqBase.NLFunctionalConstantCache()
-  elseif alg.nlsolve isa NLAnderson
-    uf = nothing
-    max_history = min(alg.nlsolve.max_history, alg.nlsolve.max_iter, length(z))
-    Δz₊s = Vector{typeof(z)}(undef, max_history)
-    Q = Matrix{uEltypeNoUnits}(undef, length(z), max_history)
-    R = Matrix{uEltypeNoUnits}(undef, max_history, max_history)
-    γs = Vector{uEltypeNoUnits}(undef, max_history)
-
-    nlcache = DiffEqBase.NLAndersonConstantCache(Δz₊s,Q,R,γs,alg.nlsolve.aa_start,alg.nlsolve.droptol)
-  end
-
-  # create non-linear solver
-  nlsolver = NLSolver{false,typeof(z),typeof(k),uTolType,typeof(κ),typeof(γ),typeof(c),Nothing,typeof(uf),Nothing,Nothing,typeof(fast_convergence_cutoff),typeof(nlcache)}(z,dz,tmp,b,k,one(uTolType),κ,γ,c,alg.nlsolve.max_iter,10000,Convergence,fast_convergence_cutoff,nothing,uf,nothing,nothing,z,nlcache)
-end
-
-DiffEqBase.@def getoopnlsolvefields begin
-  uf = nlsolver.uf
-end
-
-function nlsolve_resize!(integrator::DEIntegrator, i::Int)
-  if !isdefined(integrator.cache, :nlsolver)
-    return nothing
-  end
-  alg = integrator.alg; nlsolver = integrator.cache.nlsolver
-  if nlsolver isa AbstractArray
-    for idx in eachindex(nlsolver) # looping because we may have multiple nlsolver for threaded case
-      _nlsolver = nlsolver[idx]
-      @unpack z,dz,tmp,ztmp,k,du1,uf,jac_config,linsolve,weight,cache = _nlsolver
-      # doubt: if these fields are always going to be in alg cache too, then we shouldnt do this here.
-      # double resize doesn't do any bad I think though
-      resize!(z,i)
-      resize!(dz,i)
-      resize!(tmp,i)
-      resize!(ztmp,i)
-      resize!(k,i)
-      resize!(du1,i)
-      if jac_config !== nothing
-        _nlsolver.jac_config = resize_jac_config!(jac_config, i)
-      end
-      resize!(weight, i)
-      nlsolve_cache_resize!(cache,alg,i)
-    end
-  else
-    @unpack z,dz,tmp,ztmp,k,du1,uf,jac_config,linsolve,weight,cache = nlsolver
-    resize!(z,i)
-    resize!(dz,i)
-    resize!(tmp,i)
-    resize!(ztmp,i)
-    resize!(k,i)
-    resize!(du1,i)
-    if jac_config !== nothing
-      nlsolver.jac_config = resize_jac_config!(jac_config,i)
-    end
-    resize!(weight, i)
-    nlsolve_cache_resize!(cache,alg,i)
-  end
-  nothing
-end
-
-function nlsolve_cache_resize!(cache::NLNewtonCache, alg, i::Int)
-  nothing
-end
-
-function nlsolve_cache_resize!(cache::NLNewtonConstantCache, alg, i::Int)
-  nothing
-end
-
-function nlsolve_cache_resize!(cache::NLAndersonCache, alg, i::Int)
-  resize!(cache.z₊, i)
-  resize!(cache.dzold, i)
-  resize!(cache.z₊old, i)
-  max_history = min(alg.nlsolve.max_history, alg.nlsolve.max_iter, i)
-  prev_max_history = length(cache.Δz₊s)
-  resize!(cache.γs, max_history)
-  resize!(cache.Δz₊s, max_history)
-  if max_history > prev_max_history
-    for i in (max_history - prev_max_history):max_history
-      cache.Δz₊s[i] = zero(z₊)
-    end
-  end
-  cache.Q = typeof(cache.Q)(undef, i, max_history)
-  cache.R = typeof(cache.R)(undef, max_history, max_history)
-  nothing
-end
-
-function nlsolve_cache_resize!(cache::NLAndersonConstantCache, alg, i::Int)
-  max_history = min(alg.nlsolve.max_history, alg.nlsolve.max_iter, i)
-  resize!(cache.Δz₊s, max_history)
-  cache.Q = typeof(cache.Q)(undef, i, max_history)
-  cache.R = typeof(cache.R)(undef, max_history, max_history)
-  resize!(cache.γs, max_history)
-  nothing
-end
-
-function nlsolve_cache_resize!(cache::NLFunctionalCache, alg, i::Int)
-  resize!(cache.z₊, i)
-  nothing
-end
-
-function nlsolve_cache_resize!(cache::NLFunctionalConstantCache, alg, i::Int)
-  nothing
-end
+end
\ No newline at end of file