JuliaLang · vtjnash · Sep 17, 2025 · Sep 12, 2025 · Sep 16, 2025 · aviatesk
diff --git a/Compiler/src/ssair/passes.jl b/Compiler/src/ssair/passes.jl
@@ -874,6 +874,49 @@ function perform_lifting!(compact::IncrementalCompact,
     return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback))
 end
 
+function lift_apply_args!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
+    # Handle _apply_iterate calls: convert arguments to use `Core.svec`. The behavior of Core.svec (with boxing) better matches the ABI of codegen.
+    compact[idx] = nothing
+    for i in 4:length(stmt.args)  # Skip iterate function, f, and first iterator
+        arg = stmt.args[i]
+        arg_type = argextype(arg, compact)
+        svec_args = nothing
+        if isa(arg_type, DataType) && arg_type.name === Tuple.name
+            if isa(arg, SSAValue)
+                arg_stmt = compact[arg][:stmt]
+                if is_known_call(arg_stmt, Core.tuple, compact)
+                    svec_args = copy(arg_stmt.args)
+                end
+            end
+            if svec_args === nothing
+                # Fallback path: generate getfield calls for tuple elements
+                tuple_length = length(arg_type.parameters)
+                if tuple_length > 0 && !isvarargtype(arg_type.parameters[tuple_length])
+                    svec_args = Vector{Any}(undef, tuple_length + 1)
+                    for j in 1:tuple_length
+                        getfield_call = Expr(:call, GlobalRef(Core, :getfield), arg, j)
+                        getfield_type = arg_type.parameters[j]
+                        inst = compact[SSAValue(idx)]
+                        getfield_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(getfield_call, getfield_type, NoCallInfo(), inst[:line], inst[:flag]))
+                        svec_args[j + 1] = getfield_ssa
+                    end
+                end
+            end
+        end
+        # Create Core.svec call if we have arguments
+        if svec_args !== nothing
+            svec_args[1] = GlobalRef(Core, :svec)
+            new_svec_call = Expr(:call)
+            new_svec_call.args = svec_args
+            inst = compact[SSAValue(idx)]
+            new_svec_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(new_svec_call, SimpleVector, NoCallInfo(), inst[:line], inst[:flag]))
+            stmt.args[i] = new_svec_ssa
+        end
+    end
+    compact[idx] = stmt
+    nothing
+end
+
 function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
     length(stmt.args) != 3 && return
 
@@ -1377,6 +1420,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
                 compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope
             elseif isexpr(stmt, :new)
                 refine_new_effects!(𝕃ₒ, compact, idx, stmt)
+            elseif is_known_call(stmt, Core._apply_iterate, compact)
+                length(stmt.args) >= 4 || continue
+                lift_apply_args!(compact, idx, stmt, 𝕃ₒ)
             end
             continue
         end

diff --git a/Compiler/src/tfuncs.jl b/Compiler/src/tfuncs.jl
@@ -580,6 +580,15 @@ end
 add_tfunc(nfields, 1, 1, nfields_tfunc, 1)
 add_tfunc(Core._expr, 1, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->Expr), 100)
 add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVector), 20)
+
+@nospecs function _svec_len_tfunc(𝕃::AbstractLattice, s)
+    if isa(s, Const) && isa(s.val, SimpleVector)
+        return Const(length(s.val))
+    end
+    return Int
+end
+add_tfunc(Core._svec_len, 1, 1, _svec_len_tfunc, 1)
+
 @nospecs function _svec_ref_tfunc(𝕃::AbstractLattice, s, i)
     if isa(s, Const) && isa(i, Const)
         s, i = s.val, i.val
@@ -1960,15 +1969,8 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
         # UnionAll context is missing around this.
         pop!(argtypes)
     end
-    all_are_const = true
-    for i in 1:length(argtypes)
-        if !isa(argtypes[i], Const)
-            all_are_const = false
-            break
-        end
-    end
-    if all_are_const
-        return Const(ntuple(i::Int->argtypes[i].val, length(argtypes)))
+    if is_all_const_arg(argtypes, 1) # repeated from builtin_tfunction for the benefit of callers that use this tfunc directly
+        return Const(tuple(collect_const_args(argtypes, 1)...))
     end
     params = Vector{Any}(undef, length(argtypes))
     anyinfo = false
@@ -2334,14 +2336,17 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
     elseif f === Core.compilerbarrier
         na == 2 || return false
         return compilerbarrier_nothrow(argtypes[1], nothing)
+    elseif f === Core._svec_len
+        na == 1 || return false
+        return _svec_len_tfunc(𝕃, argtypes[1]) isa Const
     elseif f === Core._svec_ref
         na == 2 || return false
         return _svec_ref_tfunc(𝕃, argtypes[1], argtypes[2]) isa Const
     end
     return false
 end
 
-# known to be always effect-free (in particular nothrow)
+# known to be always effect-free (in particular also nothrow)
 const _PURE_BUILTINS = Any[
     tuple,
     svec,
@@ -2370,6 +2375,8 @@ const _CONSISTENT_BUILTINS = Any[
     donotdelete,
     memoryrefnew,
     memoryrefoffset,
+    Core._svec_len,
+    Core._svec_ref,
 ]
 
 # known to be effect-free (but not necessarily nothrow)
@@ -2394,6 +2401,7 @@ const _EFFECT_FREE_BUILTINS = [
     Core.throw_methoderror,
     getglobal,
     compilerbarrier,
+    Core._svec_len,
     Core._svec_ref,
 ]
 
@@ -2428,6 +2436,7 @@ const _ARGMEM_BUILTINS = Any[
     replacefield!,
     setfield!,
     swapfield!,
+    Core._svec_len,
     Core._svec_ref,
 ]
 
@@ -2571,6 +2580,7 @@ const _EFFECTS_KNOWN_BUILTINS = Any[
     # Core._primitivetype,
     # Core._setsuper!,
     # Core._structtype,
+    Core._svec_len,
     Core._svec_ref,
     # Core._typebody!,
     Core._typevar,
@@ -2675,7 +2685,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
     else
         if contains_is(_CONSISTENT_BUILTINS, f)
             consistent = ALWAYS_TRUE
-        elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_ref
+        elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_len || f === Core._svec_ref
             consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
         elseif f === Core._typevar || f === Core.memorynew
             consistent = CONSISTENT_IF_NOTRETURNED
@@ -2784,11 +2794,12 @@ end
 function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
                            sv::Union{AbsIntState, Nothing})
     𝕃ᵢ = typeinf_lattice(interp)
-    if isa(f, IntrinsicFunction)
-        if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes)
-            argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
+    # Early constant evaluation for foldable builtins with all const args
+    if isa(f, IntrinsicFunction) ? is_pure_intrinsic_infer(f) : (f in _PURE_BUILTINS || (f in _CONSISTENT_BUILTINS && f in _EFFECT_FREE_BUILTINS))
+        if is_all_const_arg(argtypes, 1)
+            argvals = collect_const_args(argtypes, 1)
             try
-                # unroll a few cases which have specialized codegen
+                # unroll a few common cases for better codegen
                 if length(argvals) == 1
                     return Const(f(argvals[1]))
                 elseif length(argvals) == 2
@@ -2802,6 +2813,8 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
                 return Bottom
             end
         end
+    end
+    if isa(f, IntrinsicFunction)
         iidx = Int(reinterpret(Int32, f)) + 1
         if iidx < 0 || iidx > length(T_IFUNC)
             # unknown intrinsic
@@ -2828,6 +2841,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         end
         tf = T_FFUNC_VAL[fidx]
     end
+
     if hasvarargtype(argtypes)
         if length(argtypes) - 1 > tf[2]
             # definitely too many arguments

diff --git a/Compiler/test/effects.jl b/Compiler/test/effects.jl
@@ -1474,7 +1474,7 @@ end
 let effects = Base.infer_effects((Core.SimpleVector,Int); optimize=false) do svec, i
         Core._svec_ref(svec, i)
     end
-    @test !Compiler.is_consistent(effects)
+    @test Compiler.is_consistent(effects)
     @test Compiler.is_effect_free(effects)
     @test !Compiler.is_nothrow(effects)
     @test Compiler.is_terminates(effects)

diff --git a/base/essentials.jl b/base/essentials.jl
@@ -979,11 +979,7 @@ setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomi
 
 getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i))
 function length(v::SimpleVector)
-    @_total_meta
-    t = @_gc_preserve_begin v
-    len = unsafe_load(Ptr{Int}(pointer_from_objref(v)))
-    @_gc_preserve_end t
-    return len
+    Core._svec_len(v)
 end
 firstindex(v::SimpleVector) = 1
 lastindex(v::SimpleVector) = length(v)

diff --git a/src/builtin_proto.h b/src/builtin_proto.h
@@ -20,6 +20,7 @@ extern "C" {
     XX(_primitivetype,"_primitivetype") \
     XX(_setsuper,"_setsuper!") \
     XX(_structtype,"_structtype") \
+    XX(_svec_len,"_svec_len") \
     XX(_svec_ref,"_svec_ref") \
     XX(_typebody,"_typebody!") \
     XX(_typevar,"_typevar") \

diff --git a/src/builtins.c b/src/builtins.c
@@ -701,9 +701,15 @@ JL_CALLABLE(jl_f__apply_iterate)
                 return (jl_value_t*)t;
             }
         }
-        else if (f == BUILTIN(tuple) && jl_is_tuple(args[1])) {
-            return args[1];
+        else if (f == BUILTIN(tuple)) {
+            if (jl_is_tuple(args[1]))
+                return args[1];
+            if (jl_is_svec(args[1]))
+                return jl_f_tuple(NULL, jl_svec_data(args[1]), jl_svec_len(args[1]));
         }
+        // optimization for `f(svec...)`
+        if (jl_is_svec(args[1]))
+            return jl_apply_generic(f, jl_svec_data(args[1]), jl_svec_len(args[1]));
     }
     // estimate how many real arguments we appear to have
     size_t precount = 1;
@@ -2151,6 +2157,14 @@ JL_CALLABLE(jl_f__compute_sparams)
     return (jl_value_t*)env;
 }
 
+JL_CALLABLE(jl_f__svec_len)
+{
+    JL_NARGS(_svec_len, 1, 1);
+    jl_svec_t *s = (jl_svec_t*)args[0];
+    JL_TYPECHK(_svec_len, simplevector, (jl_value_t*)s);
+    return jl_box_long(jl_svec_len(s));
+}
+
 JL_CALLABLE(jl_f__svec_ref)
 {
     JL_NARGS(_svec_ref, 2, 2);

diff --git a/src/cgutils.cpp b/src/cgutils.cpp
@@ -2228,6 +2228,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
     }
     Value *instr = nullptr;
     if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
+        //We don't specify the stronger expected memory ordering here because of fears it may interfere with vectorization and other optimizations
+        //if (Order == AtomicOrdering::NotAtomic)
+        //    Order = AtomicOrdering::Monotonic;
         // load these FCA as individual fields, so LLVM does not need to split them later
         Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0);
         LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
@@ -2403,11 +2406,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             instr = load;
         }
         if (r) {
-            StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
-            store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
             ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
-            ai.decorateInst(store);
+            if (false && !isboxed && Order == AtomicOrdering::NotAtomic && jl_is_genericmemoryref_type(jltype)) {
+                // if enabled, store these FCA as individual fields, so LLVM does not need to split them later and they can use release ordering
+                assert(r->getType() == ctx.types().T_jlgenericmemory);
+                Value *f1 = ctx.builder.CreateExtractValue(r, 0);
+                Value *f2 = ctx.builder.CreateExtractValue(r, 1);
+                static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
+                StoreInst *store = ctx.builder.CreateAlignedStore(f1, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 0), Align(alignment));
+                store->setOrdering(AtomicOrdering::Release);
+                ai.decorateInst(store);
+                store = ctx.builder.CreateAlignedStore(f2, ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, ptr, 1), Align(alignment));
+                store->setOrdering(AtomicOrdering::Release);
+                ai.decorateInst(store);
+            }
+            else {
+                StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+                store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
+                ai.decorateInst(store);
+            }
         }
         else {
             assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
@@ -4435,10 +4453,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
         for (size_t i = nargs; i < nf; i++) {
             if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strctinfo.tbaa);
-                ai.decorateInst(ctx.builder.CreateAlignedStore(
+                auto *store = ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
                         emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1),
-                        Align(1)));
+                        Align(1));
+                ai.decorateInst(store);
             }
         }
         // TODO: verify that nargs <= nf (currently handled by front-end)