inference: remove throw block deoptimization completely

aviatesk · aviatesk · commit 7328bb326cb2 · 2023-09-26T18:26:13.000+09:00
After experimenting with #49235, I started to question if we are getting any actual benefit from the `throw` block deoptimization anymore. This commit removes the deoptimization from the system entirely. Based on the numbers below, it appears that the deoptimization is not very profitable in our current Julia-level compilation pipeline, with the effects analysis playing a significant role in reducing latency. Here are the updated benchmark: | Metric | master | #49235 | this commit | |-------------------------|-----------|-------------|--------------------------------------------| | Base (seconds) | 15.579300 | 15.206645 | 15.42059 | | Stdlibs (seconds) | 17.919013 | 17.667094 | 17.404586 | | Total (seconds) | 33.499279 | 32.874737 | 32.826162 | | Precompilation (seconds) | 53.488528 | 53.152028 | 53.152028 | | First time `plot(rand(10,3))` [^1] | `3.432983 seconds (16.55 M allocations)` | `3.477767 seconds (16.45 M allocations)` | `3.539117 seconds (16.43 M allocations)` | | First time `solve(prob, QNDF())(5.0)` [^2] | `4.628278 seconds (15.74 M allocations)` | `4.609222 seconds (15.32 M allocations)` | `4.547323 seconds (15.19 M allocations: 823.510 MiB)` | [^1]: With disabling precompilation of Plots.jl. [^2]: With disabling precompilation of OrdinaryDiffEq.
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
@@ -11,15 +11,6 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                                   arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
                                   sv::AbsIntState, max_methods::Int)
     ⊑ₚ = ⊑(ipo_lattice(interp))
-    if !should_infer_this_call(interp, sv)
-        add_remark!(interp, sv, "Skipped call in throw block")
-        # At this point we are guaranteed to end up throwing on this path,
-        # which is all that's required for :consistent-cy. Of course, we don't
-        # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE)
-        return CallMeta(Any, effects, NoCallInfo())
-    end
-
     argtypes = arginfo.argtypes
     matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
         InferenceParams(interp).max_union_splitting, max_methods)
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
@@ -307,7 +307,6 @@ mutable struct InferenceState
         cached = cache === :global
 
         # some more setups
-        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
         cache !== :no && push!(get_inference_cache(interp), result)
 
         return new(
@@ -861,30 +860,6 @@ bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceStat
 bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
     state.rt === Any
 
-function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
-    if InferenceParams(interp).unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-function should_infer_for_effects(sv::InferenceState)
-    def = sv.linfo.def
-    def isa Method || return false # toplevel frame will not be [semi-]concrete-evaluated
-    effects = sv.ipo_effects
-    override = decode_effects_override(def.purity)
-    effects.consistent === ALWAYS_FALSE && !is_effect_overridden(override, :consistent) && return false
-    effects.effect_free === ALWAYS_FALSE && !is_effect_overridden(override, :effect_free) && return false
-    !effects.terminates && !is_effect_overridden(override, :terminates_globally) && return false
-    return true
-end
-should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
-
 add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
 add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
 
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
@@ -15,29 +15,28 @@ const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
 
 # NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
 
-const IR_FLAG_NULL        = UInt32(0)
+const IR_FLAG_NULL        = zero(UInt32)
 # This statement is marked as @inbounds by user.
 # Ff replaced by inlining, any contained boundschecks may be removed.
-const IR_FLAG_INBOUNDS    = UInt32(1) << 0
+const IR_FLAG_INBOUNDS    = one(UInt32) << 0
 # This statement is marked as @inline by user
-const IR_FLAG_INLINE      = UInt32(1) << 1
+const IR_FLAG_INLINE      = one(UInt32) << 1
 # This statement is marked as @noinline by user
-const IR_FLAG_NOINLINE    = UInt32(1) << 2
-const IR_FLAG_THROW_BLOCK = UInt32(1) << 3
+const IR_FLAG_NOINLINE    = one(UInt32) << 2
 # This statement may be removed if its result is unused. In particular,
 # it must be both :effect_free and :nothrow.
 # TODO: Separate these out.
-const IR_FLAG_EFFECT_FREE = UInt32(1) << 4
+const IR_FLAG_EFFECT_FREE = one(UInt32) << 3
 # This statement was proven not to throw
-const IR_FLAG_NOTHROW     = UInt32(1) << 5
+const IR_FLAG_NOTHROW     = one(UInt32) << 4
 # This is :consistent
-const IR_FLAG_CONSISTENT  = UInt32(1) << 6
+const IR_FLAG_CONSISTENT  = one(UInt32) << 5
 # An optimization pass has updated this statement in a way that may
 # have exposed information that inference did not see. Re-running
 # inference on this statement may be profitable.
-const IR_FLAG_REFINED     = UInt32(1) << 7
+const IR_FLAG_REFINED     = one(UInt32) << 6
 # This is :noub == ALWAYS_TRUE
-const IR_FLAG_NOUB        = UInt32(1) << 8
+const IR_FLAG_NOUB        = one(UInt32) << 7
 
 const IR_FLAGS_EFFECTS = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_CONSISTENT | IR_FLAG_NOUB
 
@@ -237,7 +236,6 @@ _topmod(sv::OptimizationState) = _topmod(sv.mod)
 
 is_stmt_inline(stmt_flag::UInt32)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
 is_stmt_noinline(stmt_flag::UInt32)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
-is_stmt_throw_block(stmt_flag::UInt32) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
 
 function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
     Targ = args[1]
@@ -1013,7 +1011,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
 function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                        params::OptimizationParams, error_path::Bool = false)
+                        params::OptimizationParams)
     head = ex.head
     if is_meta_expr_head(head)
         return 0
@@ -1048,7 +1046,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 return 0
             elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
                 atyp = argextype(ex.args[3], src, sptypes)
-                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+                return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty
             elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
                 return 1
             end
@@ -1064,7 +1062,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         if extyp === Union{}
             return 0
         end
-        return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+        return params.inline_nonleaf_penalty
     elseif head === :foreigncall || head === :invoke || head === :invoke_modify
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
@@ -1081,7 +1079,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         end
         a = ex.args[2]
         if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
+            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params))
         end
         return cost
     elseif head === :copyast
@@ -1101,8 +1099,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
     thiscost = 0
     dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
     if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, params,
-                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
+        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
     elseif stmt isa GotoNode
         # loops are generally always expensive
         # but assume that forward jumps are already counted for from
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
@@ -131,11 +131,6 @@ Parameters that control abstract interpretation-based type inference operation.
   information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
   more fine-grained control on this configuration with per-method annotation basis.
 ---
-- `inf_params.unoptimize_throw_blocks::Bool = true`\\
-  If `true`, skips inferring calls that are in a block that is known to `throw`.
-  It may improve the compiler latency without sacrificing the runtime performance
-  in common situations.
----
 - `inf_params.assume_bindings_static::Bool = false`\\
   If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
   inference time can be assumed to always not exist at runtime (and thus e.g. any access to
@@ -151,7 +146,6 @@ struct InferenceParams
     tuple_complexity_limit_depth::Int
     ipo_constant_propagation::Bool
     aggressive_constant_propagation::Bool
-    unoptimize_throw_blocks::Bool
     assume_bindings_static::Bool
     ignore_recursion_hardlimit::Bool
 
@@ -163,7 +157,6 @@ struct InferenceParams
         tuple_complexity_limit_depth::Int,
         ipo_constant_propagation::Bool,
         aggressive_constant_propagation::Bool,
-        unoptimize_throw_blocks::Bool,
         assume_bindings_static::Bool,
         ignore_recursion_hardlimit::Bool)
         return new(
@@ -174,7 +167,6 @@ struct InferenceParams
             tuple_complexity_limit_depth,
             ipo_constant_propagation,
             aggressive_constant_propagation,
-            unoptimize_throw_blocks,
             assume_bindings_static,
             ignore_recursion_hardlimit)
     end
@@ -188,7 +180,6 @@ function InferenceParams(
         #=tuple_complexity_limit_depth::Int=# 3,
         #=ipo_constant_propagation::Bool=# true,
         #=aggressive_constant_propagation::Bool=# false,
-        #=unoptimize_throw_blocks::Bool=# true,
         #=assume_bindings_static::Bool=# false,
         #=ignore_recursion_hardlimit::Bool=# false);
     max_methods::Int = params.max_methods,
@@ -198,7 +189,6 @@ function InferenceParams(
     tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
     ipo_constant_propagation::Bool = params.ipo_constant_propagation,
     aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
-    unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
     assume_bindings_static::Bool = params.assume_bindings_static,
     ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
     return InferenceParams(
@@ -209,7 +199,6 @@ function InferenceParams(
         tuple_complexity_limit_depth,
         ipo_constant_propagation,
         aggressive_constant_propagation,
-        unoptimize_throw_blocks,
         assume_bindings_static,
         ignore_recursion_hardlimit)
 end
@@ -234,10 +223,6 @@ Parameters that control optimizer operation.
   tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
   be added to `opt_params.inline_cost_threshold` when making inlining decision.
 ---
-- `opt_params.inline_error_path_cost::Int = 20`\\
-  Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
-  `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
----
 - `opt_params.max_tuple_splat::Int = 32`\\
   When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
   contains more than this many elements.
@@ -259,7 +244,6 @@ struct OptimizationParams
     inline_cost_threshold::Int
     inline_nonleaf_penalty::Int
     inline_tupleret_bonus::Int
-    inline_error_path_cost::Int
     max_tuple_splat::Int
     compilesig_invokes::Bool
     assume_fatal_throw::Bool
@@ -269,7 +253,6 @@ struct OptimizationParams
         inline_cost_threshold::Int,
         inline_nonleaf_penalty::Int,
         inline_tupleret_bonus::Int,
-        inline_error_path_cost::Int,
         max_tuple_splat::Int,
         compilesig_invokes::Bool,
         assume_fatal_throw::Bool)
@@ -278,7 +261,6 @@ struct OptimizationParams
             inline_cost_threshold,
             inline_nonleaf_penalty,
             inline_tupleret_bonus,
-            inline_error_path_cost,
             max_tuple_splat,
             compilesig_invokes,
             assume_fatal_throw)
@@ -290,15 +272,13 @@ function OptimizationParams(
         #=inline_cost_threshold::Int=# 100,
         #=inline_nonleaf_penalty::Int=# 1000,
         #=inline_tupleret_bonus::Int=# 250,
-        #=inline_error_path_cost::Int=# 20,
         #=max_tuple_splat::Int=# 32,
         #=compilesig_invokes::Bool=# true,
         #=assume_fatal_throw::Bool=# false);
     inlining::Bool = params.inlining,
     inline_cost_threshold::Int = params.inline_cost_threshold,
     inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
     inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
-    inline_error_path_cost::Int = params.inline_error_path_cost,
     max_tuple_splat::Int = params.max_tuple_splat,
     compilesig_invokes::Bool = params.compilesig_invokes,
     assume_fatal_throw::Bool = params.assume_fatal_throw)
@@ -307,7 +287,6 @@ function OptimizationParams(
         inline_cost_threshold,
         inline_nonleaf_penalty,
         inline_tupleret_bonus,
-        inline_error_path_cost,
         max_tuple_splat,
         compilesig_invokes,
         assume_fatal_throw)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
@@ -441,51 +441,6 @@ function is_throw_call(e::Expr)
     return false
 end
 
-function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
-    for stmt in find_throw_blocks(src.code, handler_at)
-        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
-    end
-    return nothing
-end
-
-function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
-    stmts = BitSet()
-    n = length(code)
-    for i in n:-1:1
-        s = code[i]
-        if isa(s, Expr)
-            if s.head === :gotoifnot
-                if i+1 in stmts && s.args[2]::Int in stmts
-                    push!(stmts, i)
-                end
-            elseif s.head === :return
-                # see `ReturnNode` handling
-            elseif is_throw_call(s)
-                if handler_at[i] == 0
-                    push!(stmts, i)
-                end
-            elseif i+1 in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, ReturnNode)
-            # NOTE: it potentially makes sense to treat unreachable nodes
-            # (where !isdefined(s, :val)) as `throw` points, but that can cause
-            # worse codegen around the call site (issue #37558)
-        elseif isa(s, GotoNode)
-            if s.label in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, GotoIfNot)
-            if i+1 in stmts && s.dest in stmts
-                push!(stmts, i)
-            end
-        elseif i+1 in stmts
-            push!(stmts, i)
-        end
-    end
-    return stmts
-end
-
 # using a function to ensure we can infer this
 @inline function slot_id(s)
     isa(s, SlotNumber) && return s.id
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
@@ -695,9 +695,11 @@ A (usually temporary) container for holding lowered source code.
     * 0x01 << 0 = statement is marked as `@inbounds`
     * 0x01 << 1 = statement is marked as `@inline`
     * 0x01 << 2 = statement is marked as `@noinline`
-    * 0x01 << 3 = statement is within a block that leads to `throw` call
-    * 0x01 << 4 = statement may be removed if its result is unused, in particular it is thus be both pure and effect free
-    * 0x01 << 5-6 = <unused>
+    * 0x01 << 3 = statement is "effect free", i.e. it may be removed if its result is unused,
+                  in particular it is thus be both `:nothrow` and `:effect_free`
+    * 0x01 << 4 = statement is "nothrow", i.e. it is proven not to `:throw`
+    * 0x01 << 5 = statement is "consistent", i.e. it does not taint `:consistent`-cy of the function
+    * 0x01 << 6 = <unused>
     * 0x01 << 7 = <reserved> has out-of-band info
 
   * `linetable`
diff --git a/src/julia.h b/src/julia.h
@@ -283,9 +283,10 @@ typedef struct _jl_code_info_t {
         // 0 = inbounds
         // 1 = inline
         // 2 = noinline
-        // 3 = <reserved> strict-ieee (strictfp)
-        // 4 = effect-free (may be deleted if unused)
-        // 5-6 = <unused>
+        // 3 = effect-free (may be deleted if unused)
+        // 4 = nothrow
+        // 5 = consistent
+        // 6 = <unused>
         // 7 = has out-of-band info
     // miscellaneous data:
     jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference