Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions Compiler/src/ssair/passes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,49 @@ function perform_lifting!(compact::IncrementalCompact,
return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback))
end

function lift_apply_args!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
# Handle _apply_iterate calls: convert arguments to use `Core.svec`. The behavior of Core.svec (with boxing) better matches the ABI of codegen.
compact[idx] = nothing
for i in 4:length(stmt.args) # Skip iterate function, f, and first iterator
arg = stmt.args[i]
arg_type = argextype(arg, compact)
svec_args = nothing
if isa(arg_type, DataType) && arg_type.name === Tuple.name
if isa(arg, SSAValue)
arg_stmt = compact[arg][:stmt]
if is_known_call(arg_stmt, Core.tuple, compact)
svec_args = copy(arg_stmt.args)
end
end
if svec_args === nothing
# Fallback path: generate getfield calls for tuple elements
tuple_length = length(arg_type.parameters)
if tuple_length > 0 && !isvarargtype(arg_type.parameters[tuple_length])
svec_args = Vector{Any}(undef, tuple_length + 1)
for j in 1:tuple_length
getfield_call = Expr(:call, GlobalRef(Core, :getfield), arg, j)
getfield_type = arg_type.parameters[j]
inst = compact[SSAValue(idx)]
getfield_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(getfield_call, getfield_type, NoCallInfo(), inst[:line], inst[:flag]))
svec_args[j + 1] = getfield_ssa
end
end
end
end
# Create Core.svec call if we have arguments
if svec_args !== nothing
svec_args[1] = GlobalRef(Core, :svec)
new_svec_call = Expr(:call)
new_svec_call.args = svec_args
inst = compact[SSAValue(idx)]
new_svec_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(new_svec_call, SimpleVector, NoCallInfo(), inst[:line], inst[:flag]))
stmt.args[i] = new_svec_ssa
end
end
compact[idx] = stmt
nothing
end

function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
length(stmt.args) != 3 && return

Expand Down Expand Up @@ -1377,6 +1420,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope
elseif isexpr(stmt, :new)
refine_new_effects!(𝕃ₒ, compact, idx, stmt)
elseif is_known_call(stmt, Core._apply_iterate, compact)
length(stmt.args) >= 4 || continue
lift_apply_args!(compact, idx, stmt, 𝕃ₒ)
Comment on lines +1423 to +1425
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just commenting for reference, and we don't have to do this in this PR, but I startd to think it'd be better to make this kind of optimization independent of sroa_pass!.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to instead just rename the pass GVN or MemSSAOpt, since doing everything in one pass is probably a lot more efficient, and either alternative name would reflect that this does general memory-value-replacement optimizations

end
continue
end
Expand Down
44 changes: 29 additions & 15 deletions Compiler/src/tfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,15 @@ end
add_tfunc(nfields, 1, 1, nfields_tfunc, 1)
add_tfunc(Core._expr, 1, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->Expr), 100)
add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVector), 20)

@nospecs function _svec_len_tfunc(𝕃::AbstractLattice, s)
if isa(s, Const) && isa(s.val, SimpleVector)
return Const(length(s.val))
end
return Int
end
add_tfunc(Core._svec_len, 1, 1, _svec_len_tfunc, 1)

@nospecs function _svec_ref_tfunc(𝕃::AbstractLattice, s, i)
if isa(s, Const) && isa(i, Const)
s, i = s.val, i.val
Expand Down Expand Up @@ -1960,15 +1969,8 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
# UnionAll context is missing around this.
pop!(argtypes)
end
all_are_const = true
for i in 1:length(argtypes)
if !isa(argtypes[i], Const)
all_are_const = false
break
end
end
if all_are_const
return Const(ntuple(i::Int->argtypes[i].val, length(argtypes)))
if is_all_const_arg(argtypes, 1) # repeated from builtin_tfunction for the benefit of callers that use this tfunc directly
return Const(tuple(collect_const_args(argtypes, 1)...))
end
params = Vector{Any}(undef, length(argtypes))
anyinfo = false
Expand Down Expand Up @@ -2334,14 +2336,17 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
elseif f === Core.compilerbarrier
na == 2 || return false
return compilerbarrier_nothrow(argtypes[1], nothing)
elseif f === Core._svec_len
na == 1 || return false
return _svec_len_tfunc(𝕃, argtypes[1]) isa Const
elseif f === Core._svec_ref
na == 2 || return false
return _svec_ref_tfunc(𝕃, argtypes[1], argtypes[2]) isa Const
end
return false
end

# known to be always effect-free (in particular nothrow)
# known to be always effect-free (in particular also nothrow)
const _PURE_BUILTINS = Any[
tuple,
svec,
Expand Down Expand Up @@ -2370,6 +2375,8 @@ const _CONSISTENT_BUILTINS = Any[
donotdelete,
memoryrefnew,
memoryrefoffset,
Core._svec_len,
Core._svec_ref,
]

# known to be effect-free (but not necessarily nothrow)
Expand All @@ -2394,6 +2401,7 @@ const _EFFECT_FREE_BUILTINS = [
Core.throw_methoderror,
getglobal,
compilerbarrier,
Core._svec_len,
Core._svec_ref,
]

Expand Down Expand Up @@ -2428,6 +2436,7 @@ const _ARGMEM_BUILTINS = Any[
replacefield!,
setfield!,
swapfield!,
Core._svec_len,
Core._svec_ref,
]

Expand Down Expand Up @@ -2571,6 +2580,7 @@ const _EFFECTS_KNOWN_BUILTINS = Any[
# Core._primitivetype,
# Core._setsuper!,
# Core._structtype,
Core._svec_len,
Core._svec_ref,
# Core._typebody!,
Core._typevar,
Expand Down Expand Up @@ -2675,7 +2685,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
else
if contains_is(_CONSISTENT_BUILTINS, f)
consistent = ALWAYS_TRUE
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_ref
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_len || f === Core._svec_ref
consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
elseif f === Core._typevar || f === Core.memorynew
consistent = CONSISTENT_IF_NOTRETURNED
Expand Down Expand Up @@ -2784,11 +2794,12 @@ end
function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
sv::Union{AbsIntState, Nothing})
𝕃ᵢ = typeinf_lattice(interp)
if isa(f, IntrinsicFunction)
if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes)
argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
# Early constant evaluation for foldable builtins with all const args
if isa(f, IntrinsicFunction) ? is_pure_intrinsic_infer(f) : (f in _PURE_BUILTINS || (f in _CONSISTENT_BUILTINS && f in _EFFECT_FREE_BUILTINS))
if is_all_const_arg(argtypes, 1)
argvals = collect_const_args(argtypes, 1)
try
# unroll a few cases which have specialized codegen
# unroll a few common cases for better codegen
if length(argvals) == 1
return Const(f(argvals[1]))
elseif length(argvals) == 2
Expand All @@ -2802,6 +2813,8 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
return Bottom
end
end
end
if isa(f, IntrinsicFunction)
iidx = Int(reinterpret(Int32, f)) + 1
if iidx < 0 || iidx > length(T_IFUNC)
# unknown intrinsic
Expand All @@ -2828,6 +2841,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
end
tf = T_FFUNC_VAL[fidx]
end

if hasvarargtype(argtypes)
if length(argtypes) - 1 > tf[2]
# definitely too many arguments
Expand Down
2 changes: 1 addition & 1 deletion Compiler/test/effects.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ end
let effects = Base.infer_effects((Core.SimpleVector,Int); optimize=false) do svec, i
Core._svec_ref(svec, i)
end
@test !Compiler.is_consistent(effects)
@test Compiler.is_consistent(effects)
@test Compiler.is_effect_free(effects)
@test !Compiler.is_nothrow(effects)
@test Compiler.is_terminates(effects)
Expand Down
6 changes: 1 addition & 5 deletions base/essentials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -979,11 +979,7 @@ setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomi

getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i))
function length(v::SimpleVector)
@_total_meta
t = @_gc_preserve_begin v
len = unsafe_load(Ptr{Int}(pointer_from_objref(v)))
@_gc_preserve_end t
return len
Core._svec_len(v)
end
firstindex(v::SimpleVector) = 1
lastindex(v::SimpleVector) = length(v)
Expand Down
1 change: 1 addition & 0 deletions src/builtin_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ extern "C" {
XX(_primitivetype,"_primitivetype") \
XX(_setsuper,"_setsuper!") \
XX(_structtype,"_structtype") \
XX(_svec_len,"_svec_len") \
XX(_svec_ref,"_svec_ref") \
XX(_typebody,"_typebody!") \
XX(_typevar,"_typevar") \
Expand Down
18 changes: 16 additions & 2 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -701,9 +701,15 @@ JL_CALLABLE(jl_f__apply_iterate)
return (jl_value_t*)t;
}
}
else if (f == BUILTIN(tuple) && jl_is_tuple(args[1])) {
return args[1];
else if (f == BUILTIN(tuple)) {
if (jl_is_tuple(args[1]))
return args[1];
if (jl_is_svec(args[1]))
return jl_f_tuple(NULL, jl_svec_data(args[1]), jl_svec_len(args[1]));
}
// optimization for `f(svec...)`
if (jl_is_svec(args[1]))
return jl_apply_generic(f, jl_svec_data(args[1]), jl_svec_len(args[1]));
}
// estimate how many real arguments we appear to have
size_t precount = 1;
Expand Down Expand Up @@ -2151,6 +2157,14 @@ JL_CALLABLE(jl_f__compute_sparams)
return (jl_value_t*)env;
}

JL_CALLABLE(jl_f__svec_len)
{
JL_NARGS(_svec_len, 1, 1);
jl_svec_t *s = (jl_svec_t*)args[0];
JL_TYPECHK(_svec_len, simplevector, (jl_value_t*)s);
return jl_box_long(jl_svec_len(s));
}

JL_CALLABLE(jl_f__svec_ref)
{
JL_NARGS(_svec_ref, 2, 2);
Expand Down
29 changes: 24 additions & 5 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
}
Value *instr = nullptr;
if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
//We don't specify the stronger expected memory ordering here because of fears it may interfere with vectorization and other optimizations
//if (Order == AtomicOrdering::NotAtomic)
// Order = AtomicOrdering::Monotonic;
// load these FCA as individual fields, so LLVM does not need to split them later
Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0);
LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
Expand Down Expand Up @@ -2403,11 +2406,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
instr = load;
}
if (r) {
StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
ai.decorateInst(store);
if (false && !isboxed && Order == AtomicOrdering::NotAtomic && jl_is_genericmemoryref_type(jltype)) {
// if enabled, store these FCA as individual fields, so LLVM does not need to split them later and they can use release ordering
assert(r->getType() == ctx.types().T_jlgenericmemory);
Value *f1 = ctx.builder.CreateExtractValue(r, 0);
Value *f2 = ctx.builder.CreateExtractValue(r, 1);
static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
StoreInst *store = ctx.builder.CreateAlignedStore(f1, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 0), Align(alignment));
store->setOrdering(AtomicOrdering::Release);
ai.decorateInst(store);
store = ctx.builder.CreateAlignedStore(f2, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 1), Align(alignment));
store->setOrdering(AtomicOrdering::Release);
ai.decorateInst(store);
}
else {
StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
ai.decorateInst(store);
}
}
else {
assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
Expand Down Expand Up @@ -4435,10 +4453,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
for (size_t i = nargs; i < nf; i++) {
if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strctinfo.tbaa);
ai.decorateInst(ctx.builder.CreateAlignedStore(
auto *store = ctx.builder.CreateAlignedStore(
ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1),
Align(1)));
Align(1));
ai.decorateInst(store);
}
}
// TODO: verify that nargs <= nf (currently handled by front-end)
Expand Down
Loading