Skip to content

Commit 7328bb3

Browse files
committed
inference: remove throw block deoptimization completely
After experimenting with #49235, I started to question if we are getting any actual benefit from the `throw` block deoptimization anymore. This commit removes the deoptimization from the system entirely. Based on the numbers below, it appears that the deoptimization is not very profitable in our current Julia-level compilation pipeline, with the effects analysis playing a significant role in reducing latency. Here are the updated benchmark: | Metric | master | #49235 | this commit | |-------------------------|-----------|-------------|--------------------------------------------| | Base (seconds) | 15.579300 | 15.206645 | 15.42059 | | Stdlibs (seconds) | 17.919013 | 17.667094 | 17.404586 | | Total (seconds) | 33.499279 | 32.874737 | 32.826162 | | Precompilation (seconds) | 53.488528 | 53.152028 | 53.152028 | | First time `plot(rand(10,3))` [^1] | `3.432983 seconds (16.55 M allocations)` | `3.477767 seconds (16.45 M allocations)` | `3.539117 seconds (16.43 M allocations)` | | First time `solve(prob, QNDF())(5.0)` [^2] | `4.628278 seconds (15.74 M allocations)` | `4.609222 seconds (15.32 M allocations)` | `4.547323 seconds (15.19 M allocations: 823.510 MiB)` | [^1]: With disabling precompilation of Plots.jl. [^2]: With disabling precompilation of OrdinaryDiffEq.
1 parent 28d9f73 commit 7328bb3

File tree

7 files changed

+23
-123
lines changed

7 files changed

+23
-123
lines changed

base/compiler/abstractinterpretation.jl

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,6 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
1111
arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
1212
sv::AbsIntState, max_methods::Int)
1313
= (ipo_lattice(interp))
14-
if !should_infer_this_call(interp, sv)
15-
add_remark!(interp, sv, "Skipped call in throw block")
16-
# At this point we are guaranteed to end up throwing on this path,
17-
# which is all that's required for :consistent-cy. Of course, we don't
18-
# know anything else about this statement.
19-
effects = Effects(; consistent=ALWAYS_TRUE)
20-
return CallMeta(Any, effects, NoCallInfo())
21-
end
22-
2314
argtypes = arginfo.argtypes
2415
matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
2516
InferenceParams(interp).max_union_splitting, max_methods)

base/compiler/inferencestate.jl

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,6 @@ mutable struct InferenceState
307307
cached = cache === :global
308308

309309
# some more setups
310-
InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
311310
cache !== :no && push!(get_inference_cache(interp), result)
312311

313312
return new(
@@ -861,30 +860,6 @@ bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceStat
861860
bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
862861
state.rt === Any
863862

864-
function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
865-
if InferenceParams(interp).unoptimize_throw_blocks
866-
# Disable inference of calls in throw blocks, since we're unlikely to
867-
# need their types. There is one exception however: If up until now, the
868-
# function has not seen any side effects, we would like to make sure there
869-
# aren't any in the throw block either to enable other optimizations.
870-
if is_stmt_throw_block(get_curr_ssaflag(sv))
871-
should_infer_for_effects(sv) || return false
872-
end
873-
end
874-
return true
875-
end
876-
function should_infer_for_effects(sv::InferenceState)
877-
def = sv.linfo.def
878-
def isa Method || return false # toplevel frame will not be [semi-]concrete-evaluated
879-
effects = sv.ipo_effects
880-
override = decode_effects_override(def.purity)
881-
effects.consistent === ALWAYS_FALSE && !is_effect_overridden(override, :consistent) && return false
882-
effects.effect_free === ALWAYS_FALSE && !is_effect_overridden(override, :effect_free) && return false
883-
!effects.terminates && !is_effect_overridden(override, :terminates_globally) && return false
884-
return true
885-
end
886-
should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
887-
888863
add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
889864
add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
890865

base/compiler/optimize.jl

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,28 @@ const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
1515

1616
# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
1717

18-
const IR_FLAG_NULL = UInt32(0)
18+
const IR_FLAG_NULL = zero(UInt32)
1919
# This statement is marked as @inbounds by user.
2020
# Ff replaced by inlining, any contained boundschecks may be removed.
21-
const IR_FLAG_INBOUNDS = UInt32(1) << 0
21+
const IR_FLAG_INBOUNDS = one(UInt32) << 0
2222
# This statement is marked as @inline by user
23-
const IR_FLAG_INLINE = UInt32(1) << 1
23+
const IR_FLAG_INLINE = one(UInt32) << 1
2424
# This statement is marked as @noinline by user
25-
const IR_FLAG_NOINLINE = UInt32(1) << 2
26-
const IR_FLAG_THROW_BLOCK = UInt32(1) << 3
25+
const IR_FLAG_NOINLINE = one(UInt32) << 2
2726
# This statement may be removed if its result is unused. In particular,
2827
# it must be both :effect_free and :nothrow.
2928
# TODO: Separate these out.
30-
const IR_FLAG_EFFECT_FREE = UInt32(1) << 4
29+
const IR_FLAG_EFFECT_FREE = one(UInt32) << 3
3130
# This statement was proven not to throw
32-
const IR_FLAG_NOTHROW = UInt32(1) << 5
31+
const IR_FLAG_NOTHROW = one(UInt32) << 4
3332
# This is :consistent
34-
const IR_FLAG_CONSISTENT = UInt32(1) << 6
33+
const IR_FLAG_CONSISTENT = one(UInt32) << 5
3534
# An optimization pass has updated this statement in a way that may
3635
# have exposed information that inference did not see. Re-running
3736
# inference on this statement may be profitable.
38-
const IR_FLAG_REFINED = UInt32(1) << 7
37+
const IR_FLAG_REFINED = one(UInt32) << 6
3938
# This is :noub == ALWAYS_TRUE
40-
const IR_FLAG_NOUB = UInt32(1) << 8
39+
const IR_FLAG_NOUB = one(UInt32) << 7
4140

4241
const IR_FLAGS_EFFECTS = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_CONSISTENT | IR_FLAG_NOUB
4342

@@ -237,7 +236,6 @@ _topmod(sv::OptimizationState) = _topmod(sv.mod)
237236

238237
is_stmt_inline(stmt_flag::UInt32) = stmt_flag & IR_FLAG_INLINE 0
239238
is_stmt_noinline(stmt_flag::UInt32) = stmt_flag & IR_FLAG_NOINLINE 0
240-
is_stmt_throw_block(stmt_flag::UInt32) = stmt_flag & IR_FLAG_THROW_BLOCK 0
241239

242240
function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
243241
Targ = args[1]
@@ -1013,7 +1011,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
10131011
isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
10141012

10151013
function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
1016-
params::OptimizationParams, error_path::Bool = false)
1014+
params::OptimizationParams)
10171015
head = ex.head
10181016
if is_meta_expr_head(head)
10191017
return 0
@@ -1048,7 +1046,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
10481046
return 0
10491047
elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
10501048
atyp = argextype(ex.args[3], src, sptypes)
1051-
return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
1049+
return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty
10521050
elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
10531051
return 1
10541052
end
@@ -1064,7 +1062,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
10641062
if extyp === Union{}
10651063
return 0
10661064
end
1067-
return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
1065+
return params.inline_nonleaf_penalty
10681066
elseif head === :foreigncall || head === :invoke || head === :invoke_modify
10691067
# Calls whose "return type" is Union{} do not actually return:
10701068
# they are errors. Since these are not part of the typical
@@ -1081,7 +1079,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
10811079
end
10821080
a = ex.args[2]
10831081
if a isa Expr
1084-
cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
1082+
cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params))
10851083
end
10861084
return cost
10871085
elseif head === :copyast
@@ -1101,8 +1099,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
11011099
thiscost = 0
11021100
dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
11031101
if stmt isa Expr
1104-
thiscost = statement_cost(stmt, line, src, sptypes, params,
1105-
is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
1102+
thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
11061103
elseif stmt isa GotoNode
11071104
# loops are generally always expensive
11081105
# but assume that forward jumps are already counted for from

base/compiler/types.jl

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,6 @@ Parameters that control abstract interpretation-based type inference operation.
131131
information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
132132
more fine-grained control on this configuration with per-method annotation basis.
133133
---
134-
- `inf_params.unoptimize_throw_blocks::Bool = true`\\
135-
If `true`, skips inferring calls that are in a block that is known to `throw`.
136-
It may improve the compiler latency without sacrificing the runtime performance
137-
in common situations.
138-
---
139134
- `inf_params.assume_bindings_static::Bool = false`\\
140135
If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
141136
inference time can be assumed to always not exist at runtime (and thus e.g. any access to
@@ -151,7 +146,6 @@ struct InferenceParams
151146
tuple_complexity_limit_depth::Int
152147
ipo_constant_propagation::Bool
153148
aggressive_constant_propagation::Bool
154-
unoptimize_throw_blocks::Bool
155149
assume_bindings_static::Bool
156150
ignore_recursion_hardlimit::Bool
157151

@@ -163,7 +157,6 @@ struct InferenceParams
163157
tuple_complexity_limit_depth::Int,
164158
ipo_constant_propagation::Bool,
165159
aggressive_constant_propagation::Bool,
166-
unoptimize_throw_blocks::Bool,
167160
assume_bindings_static::Bool,
168161
ignore_recursion_hardlimit::Bool)
169162
return new(
@@ -174,7 +167,6 @@ struct InferenceParams
174167
tuple_complexity_limit_depth,
175168
ipo_constant_propagation,
176169
aggressive_constant_propagation,
177-
unoptimize_throw_blocks,
178170
assume_bindings_static,
179171
ignore_recursion_hardlimit)
180172
end
@@ -188,7 +180,6 @@ function InferenceParams(
188180
#=tuple_complexity_limit_depth::Int=# 3,
189181
#=ipo_constant_propagation::Bool=# true,
190182
#=aggressive_constant_propagation::Bool=# false,
191-
#=unoptimize_throw_blocks::Bool=# true,
192183
#=assume_bindings_static::Bool=# false,
193184
#=ignore_recursion_hardlimit::Bool=# false);
194185
max_methods::Int = params.max_methods,
@@ -198,7 +189,6 @@ function InferenceParams(
198189
tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
199190
ipo_constant_propagation::Bool = params.ipo_constant_propagation,
200191
aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
201-
unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
202192
assume_bindings_static::Bool = params.assume_bindings_static,
203193
ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
204194
return InferenceParams(
@@ -209,7 +199,6 @@ function InferenceParams(
209199
tuple_complexity_limit_depth,
210200
ipo_constant_propagation,
211201
aggressive_constant_propagation,
212-
unoptimize_throw_blocks,
213202
assume_bindings_static,
214203
ignore_recursion_hardlimit)
215204
end
@@ -234,10 +223,6 @@ Parameters that control optimizer operation.
234223
tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
235224
be added to `opt_params.inline_cost_threshold` when making inlining decision.
236225
---
237-
- `opt_params.inline_error_path_cost::Int = 20`\\
238-
Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
239-
`throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
240-
---
241226
- `opt_params.max_tuple_splat::Int = 32`\\
242227
When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
243228
contains more than this many elements.
@@ -259,7 +244,6 @@ struct OptimizationParams
259244
inline_cost_threshold::Int
260245
inline_nonleaf_penalty::Int
261246
inline_tupleret_bonus::Int
262-
inline_error_path_cost::Int
263247
max_tuple_splat::Int
264248
compilesig_invokes::Bool
265249
assume_fatal_throw::Bool
@@ -269,7 +253,6 @@ struct OptimizationParams
269253
inline_cost_threshold::Int,
270254
inline_nonleaf_penalty::Int,
271255
inline_tupleret_bonus::Int,
272-
inline_error_path_cost::Int,
273256
max_tuple_splat::Int,
274257
compilesig_invokes::Bool,
275258
assume_fatal_throw::Bool)
@@ -278,7 +261,6 @@ struct OptimizationParams
278261
inline_cost_threshold,
279262
inline_nonleaf_penalty,
280263
inline_tupleret_bonus,
281-
inline_error_path_cost,
282264
max_tuple_splat,
283265
compilesig_invokes,
284266
assume_fatal_throw)
@@ -290,15 +272,13 @@ function OptimizationParams(
290272
#=inline_cost_threshold::Int=# 100,
291273
#=inline_nonleaf_penalty::Int=# 1000,
292274
#=inline_tupleret_bonus::Int=# 250,
293-
#=inline_error_path_cost::Int=# 20,
294275
#=max_tuple_splat::Int=# 32,
295276
#=compilesig_invokes::Bool=# true,
296277
#=assume_fatal_throw::Bool=# false);
297278
inlining::Bool = params.inlining,
298279
inline_cost_threshold::Int = params.inline_cost_threshold,
299280
inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
300281
inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
301-
inline_error_path_cost::Int = params.inline_error_path_cost,
302282
max_tuple_splat::Int = params.max_tuple_splat,
303283
compilesig_invokes::Bool = params.compilesig_invokes,
304284
assume_fatal_throw::Bool = params.assume_fatal_throw)
@@ -307,7 +287,6 @@ function OptimizationParams(
307287
inline_cost_threshold,
308288
inline_nonleaf_penalty,
309289
inline_tupleret_bonus,
310-
inline_error_path_cost,
311290
max_tuple_splat,
312291
compilesig_invokes,
313292
assume_fatal_throw)

base/compiler/utilities.jl

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -441,51 +441,6 @@ function is_throw_call(e::Expr)
441441
return false
442442
end
443443

444-
function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
445-
for stmt in find_throw_blocks(src.code, handler_at)
446-
src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
447-
end
448-
return nothing
449-
end
450-
451-
function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
452-
stmts = BitSet()
453-
n = length(code)
454-
for i in n:-1:1
455-
s = code[i]
456-
if isa(s, Expr)
457-
if s.head === :gotoifnot
458-
if i+1 in stmts && s.args[2]::Int in stmts
459-
push!(stmts, i)
460-
end
461-
elseif s.head === :return
462-
# see `ReturnNode` handling
463-
elseif is_throw_call(s)
464-
if handler_at[i] == 0
465-
push!(stmts, i)
466-
end
467-
elseif i+1 in stmts
468-
push!(stmts, i)
469-
end
470-
elseif isa(s, ReturnNode)
471-
# NOTE: it potentially makes sense to treat unreachable nodes
472-
# (where !isdefined(s, :val)) as `throw` points, but that can cause
473-
# worse codegen around the call site (issue #37558)
474-
elseif isa(s, GotoNode)
475-
if s.label in stmts
476-
push!(stmts, i)
477-
end
478-
elseif isa(s, GotoIfNot)
479-
if i+1 in stmts && s.dest in stmts
480-
push!(stmts, i)
481-
end
482-
elseif i+1 in stmts
483-
push!(stmts, i)
484-
end
485-
end
486-
return stmts
487-
end
488-
489444
# using a function to ensure we can infer this
490445
@inline function slot_id(s)
491446
isa(s, SlotNumber) && return s.id

doc/src/devdocs/ast.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,9 +695,11 @@ A (usually temporary) container for holding lowered source code.
695695
* 0x01 << 0 = statement is marked as `@inbounds`
696696
* 0x01 << 1 = statement is marked as `@inline`
697697
* 0x01 << 2 = statement is marked as `@noinline`
698-
* 0x01 << 3 = statement is within a block that leads to `throw` call
699-
* 0x01 << 4 = statement may be removed if its result is unused, in particular it is thus be both pure and effect free
700-
* 0x01 << 5-6 = <unused>
698+
* 0x01 << 3 = statement is "effect free", i.e. it may be removed if its result is unused,
699+
in particular it is thus be both `:nothrow` and `:effect_free`
700+
* 0x01 << 4 = statement is "nothrow", i.e. it is proven not to `:throw`
701+
* 0x01 << 5 = statement is "consistent", i.e. it does not taint `:consistent`-cy of the function
702+
* 0x01 << 6 = <unused>
701703
* 0x01 << 7 = <reserved> has out-of-band info
702704

703705
* `linetable`

src/julia.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,10 @@ typedef struct _jl_code_info_t {
283283
// 0 = inbounds
284284
// 1 = inline
285285
// 2 = noinline
286-
// 3 = <reserved> strict-ieee (strictfp)
287-
// 4 = effect-free (may be deleted if unused)
288-
// 5-6 = <unused>
286+
// 3 = effect-free (may be deleted if unused)
287+
// 4 = nothrow
288+
// 5 = consistent
289+
// 6 = <unused>
289290
// 7 = has out-of-band info
290291
// miscellaneous data:
291292
jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference

0 commit comments

Comments
 (0)