diff --git a/.gitignore b/.gitignore index 0368b7d19efa0..f0072fec9c91e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ /source-dist.tmp /source-dist.tmp1 +*.expmap *.exe *.dll *.dwo diff --git a/.mailmap b/.mailmap index 5335c88a63d7d..e91501651d065 100644 --- a/.mailmap +++ b/.mailmap @@ -283,3 +283,15 @@ Daniel Karrasch Roger Luo Roger Luo + +Frames Catherine White +Frames Catherine White +Frames Catherine White + +Claire Foster + +Jishnu Bhattacharya +Jishnu Bhattacharya + +Shuhei Kadowaki +Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 099ef6b03509b..0131dcbc4a278 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -185,7 +185,14 @@ At the moment, this should always be done with the following `compat` admonition *By contributing code to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).* -The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR). You can make pull requests for incomplete features to get code review. The convention is to prefix the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready for merging. This will prevent accidental merging of work that is in progress. +The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR). + +Issues and pull requests should have self explanatory titles such that they can be understood from the list of PRs and Issues. +i.e. `Add {feature}` and `Fix {bug}` are good, `Fix #12345. Corrects the bug.` is bad. + +You can make pull requests for incomplete features to get code review. The convention is to open these a draft PRs and prefix +the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready +for merging. This will prevent accidental merging of work that is in progress. Note: These instructions are for adding to or improving functionality in the base library. Before getting started, it can be helpful to discuss the proposed changes or additions on the [Julia Discourse forum](https://discourse.julialang.org) or in a GitHub issue---it's possible your proposed change belongs in a package rather than the core language. Also, keep in mind that changing stuff in the base can potentially break a lot of things. Finally, because of the time required to build Julia, note that it's usually faster to develop your code in stand-alone files, get it working, and then migrate it into the base libraries. @@ -325,7 +332,7 @@ please remove the `backport-X.Y` tag from the originating pull request for the c - use lower case with underscores for method names - it is generally preferred to use ASCII operators and identifiers over Unicode equivalents whenever possible - - in docstring refer to the language as "Julia" and the executable as "`julia`" + - in docstrings refer to the language as "Julia" and the executable as "`julia`" #### General Formatting Guidelines For C code contributions diff --git a/LICENSE.md b/LICENSE.md index fdf24e7603d73..d4125f4fba221 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2009-2022: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors +Copyright (c) 2009-2023: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/Make.inc b/Make.inc index bef6d1747b7d7..916b64b189ca8 100644 --- a/Make.inc +++ b/Make.inc @@ -191,15 +191,17 @@ JULIA_MINOR_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'. JULIA_PATCH_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'+' -f 1 | cut -d'.' -f 3) # libjulia's SONAME will follow the format libjulia.so.$(SOMAJOR). Before v1.0.0, -# SOMAJOR will be a two-decimal value, e.g. libjulia.so.0.5, whereas at and beyond -# v1.0.0, SOMAJOR will be simply the major version number, e.g. libjulia.so.1 +# somajor was a two-decimal value (e.g. libjulia.so.0.5). During v1.0.x - v1.9.x, +# somajor was simply the major version number (e.g. libjulia.so.1). Starting in +# v1.10.0, somajor is major.minor again (e.g. libjulia.so.1.10) # The file itself will ultimately symlink to libjulia.so.$(SOMAJOR).$(SOMINOR) -ifeq ($(JULIA_MAJOR_VERSION),0) SOMAJOR := $(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION) SOMINOR := $(JULIA_PATCH_VERSION) -else -SOMAJOR := $(JULIA_MAJOR_VERSION) -SOMINOR := $(JULIA_MINOR_VERSION) + +# This suffix affects libjulia's SONAME and the symbol version associated with +# all of its exported symbols. +ifdef SYMBOL_VERSION_SUFFIX +SOMAJOR := $(SOMAJOR)_$(SYMBOL_VERSION_SUFFIX) endif ifneq ($(NO_GIT), 1) @@ -489,37 +491,61 @@ MACOSX_VERSION_MIN := 11.0 endif endif -ifeq ($(USEGCC),1) -CC := $(CROSS_COMPILE)gcc -CXX := $(CROSS_COMPILE)g++ -JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 +JCFLAGS_COMMON := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 +JCFLAGS_CLANG := $(JCFLAGS_COMMON) +JCFLAGS_GCC := $(JCFLAGS_COMMON) -fno-gnu-unique + # AArch64 needs this flag to generate the .eh_frame used by libunwind -JCPPFLAGS := -fasynchronous-unwind-tables -JCXXFLAGS := -pipe $(fPIC) -fno-rtti -std=c++14 +JCPPFLAGS_COMMON := -fasynchronous-unwind-tables +JCPPFLAGS_CLANG := $(JCPPFLAGS_COMMON) +JCPPFLAGS_GCC := $(JCPPFLAGS_COMMON) + +JCXXFLAGS_COMMON := -pipe $(fPIC) -fno-rtti -std=c++14 +JCXXFLAGS_CLANG := $(JCXXFLAGS_COMMON) -pedantic +JCXXFLAGS_GCC := $(JCXXFLAGS_COMMON) -fno-gnu-unique + +DEBUGFLAGS_COMMON := -O0 -DJL_DEBUG_BUILD -fstack-protector +DEBUGFLAGS_CLANG := $(DEBUGFLAGS_COMMON) -g +DEBUGFLAGS_GCC := $(DEBUGFLAGS_COMMON) -ggdb2 + +SHIPFLAGS_COMMON := -O3 +SHIPFLAGS_CLANG := $(SHIPFLAGS_COMMON) -g +SHIPFLAGS_GCC := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions + +ifeq ($(OS), Darwin) +JCPPFLAGS_CLANG += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1 +endif + ifneq ($(OS), WINNT) # Do not enable on windows to avoid warnings from libuv. -JCXXFLAGS += -pedantic +JCXXFLAGS_GCC += -pedantic endif -DEBUGFLAGS := -O0 -ggdb2 -DJL_DEBUG_BUILD -fstack-protector -SHIPFLAGS := -O3 -ggdb2 -falign-functions + +ifeq ($(USEGCC),1) +CC := $(CROSS_COMPILE)gcc +CXX := $(CROSS_COMPILE)g++ +JCFLAGS := $(JCFLAGS_GCC) +JCPPFLAGS := $(JCPPFLAGS_GCC) +JCXXFLAGS := $(JCXXFLAGS_GCC) +DEBUGFLAGS := $(DEBUGFLAGS_GCC) +SHIPFLAGS := $(SHIPFLAGS_GCC) endif ifeq ($(USECLANG),1) -CC := $(CROSS_COMPILE)clang -CXX := $(CROSS_COMPILE)clang++ -JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -# AArch64 needs this flag to generate the .eh_frame used by libunwind -JCPPFLAGS := -fasynchronous-unwind-tables -JCXXFLAGS := -pipe $(fPIC) -fno-rtti -pedantic -std=c++14 -DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector -SHIPFLAGS := -O3 -g +CC := $(CROSS_COMPILE)clang +CXX := $(CROSS_COMPILE)clang++ +JCFLAGS := $(JCFLAGS_CLANG) +JCPPFLAGS := $(JCPPFLAGS_CLANG) +JCXXFLAGS := $(JCXXFLAGS_CLANG) +DEBUGFLAGS := $(DEBUGFLAGS_CLANG) +SHIPFLAGS := $(SHIPFLAGS_CLANG) + ifeq ($(OS), Darwin) CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN) CXX += -mmacosx-version-min=$(MACOSX_VERSION_MIN) FC += -mmacosx-version-min=$(MACOSX_VERSION_MIN) # export MACOSX_DEPLOYMENT_TARGET so that ld picks it up, especially for deps export MACOSX_DEPLOYMENT_TARGET=$(MACOSX_VERSION_MIN) -JCPPFLAGS += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1 endif endif @@ -702,7 +728,7 @@ endif # OS Linux or FreeBSD endif # SANITIZE_MEMORY=1 ifeq ($(SANITIZE_ADDRESS),1) SANITIZE_OPTS += -fsanitize=address -SANITIZE_LDFLAGS += -fsanitize=address +SANITIZE_LDFLAGS += -fsanitize=address -shared-libasan endif ifeq ($(SANITIZE_THREAD),1) SANITIZE_OPTS += -fsanitize=thread @@ -1334,7 +1360,7 @@ ifeq (supported, $(shell echo $(IFUNC_DETECT_SRC) | $(CC) -Werror -x c - -S -o / JCPPFLAGS += -DJULIA_HAS_IFUNC_SUPPORT=1 endif JLDFLAGS += -Wl,-Bdynamic -OSLIBS += -Wl,--version-script=$(JULIAHOME)/src/julia.expmap +OSLIBS += -Wl,--version-script=$(BUILDROOT)/src/julia.expmap ifneq ($(SANITIZE),1) JLDFLAGS += -Wl,-no-undefined endif @@ -1359,7 +1385,7 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic # See #21788 OSLIBS += -lgcc_s -OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \ +OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \ $(NO_WHOLE_ARCHIVE) endif @@ -1374,7 +1400,7 @@ endif ifeq ($(OS), WINNT) HAVE_SSP := 1 -OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \ +OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \ $(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic JLDFLAGS += -Wl,--stack,8388608 ifeq ($(ARCH),i686) @@ -1517,6 +1543,12 @@ endef # Overridable in Make.user WINE ?= wine +ifeq ($(BINARY),32) +HEAPLIM := --heap-size-hint=500M +else +HEAPLIM := +endif + # many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later ifeq ($(BUILD_OS), WINNT) # MSYS spawn = $(1) diff --git a/Makefile b/Makefile index cfa5af6052db9..d5cce165dc596 100644 --- a/Makefile +++ b/Makefile @@ -237,6 +237,14 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libwinpthread else JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread endif +ifeq ($(SANITIZE),1) +ifeq ($(USECLANG),1) +JL_PRIVATE_LIBS-1 += libclang_rt.asan +else +JL_PRIVATE_LIBS-1 += libasan +endif +endif + ifeq ($(WITH_TRACY),1) JL_PRIVATE_LIBS-0 += libTracyClient endif @@ -365,6 +373,10 @@ endif # Remove various files which should not be installed -rm -f $(DESTDIR)$(datarootdir)/julia/base/version_git.sh -rm -f $(DESTDIR)$(datarootdir)/julia/test/Makefile + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/source-extracted + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-configured + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-compiled + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-checked -rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/source-extracted -rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/build-configured -rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/build-compiled diff --git a/NEWS.md b/NEWS.md index 5c42c469e4051..e9ae12847ed29 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ Julia v1.10 Release Notes New language features --------------------- +* JuliaSyntax.jl is now used as the default parser, providing better diagnostics and faster + parsing. Set environment variable `JULIA_USE_NEW_PARSER` to `0` to switch back to the old + parser if necessary (and if you find this necessary, please file an issue) ([#46372]). * `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`) may now be used as binary operators with arrow precedence. ([#45962]) @@ -18,12 +21,21 @@ Language changes that significantly improves load and inference times for heavily overloaded methods that dispatch on Types (such as traits and constructors). * The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127). +* The `@simd` macro now has a more limited and clearer semantics, it only enables reordering and contraction + of floating-point operations, instead of turning on all "fastmath" optimizations. + If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`, + if you are OK with all the optimizations enabled by the `@fastmath` macro. ([#49405]) +* When a method with keyword arguments is displayed in the stack trace view, the textual + representation of the keyword arguments' types is simplified using the new + `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]). Compiler/Runtime improvements ----------------------------- * The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]). * The mark phase of the Garbage Collector is now multi-threaded ([#48600]). +* [JITLink](https://llvm.org/docs/JITLink.html) is enabled by default on Linux aarch64 when Julia is linked to LLVM 15 or later versions ([#49745]). + This should resolve many segmentation faults previously observed on this platform. Command-line option changes --------------------------- @@ -44,6 +56,8 @@ New library functions * `tanpi` is now defined. It computes tan(πx) more accurately than `tan(pi*x)` ([#48575]). * `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`. It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot` ([#48899]). +* `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls. +* `Base.isprecompiled(pkg::PkgId)` to identify whether a package has already been precompiled ([#50218]). New library features -------------------- @@ -52,6 +66,7 @@ New library features * `binomial(x, k)` now supports non-integer `x` ([#48124]). * A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]). * `printstyled` now supports italic output ([#45164]). +* `parent` and `parentindices` support `SubString`s Standard library changes ------------------------ @@ -82,6 +97,11 @@ Standard library changes (real symmetric) part of a matrix ([#31836]). * The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]). +* `eigen(A, B)` and `eigvals(A, B)`, where one of `A` or `B` is symmetric or Hermitian, + are now fully supported ([#49533]) +* `eigvals/eigen(A, cholesky(B))` now computes the generalized eigenvalues (`eigen`: and eigenvectors) + of `A` and `B` via Cholesky decomposition for positive definite `B`. Note: The second argument is + the output of `cholesky`. #### Printf * Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]). @@ -94,6 +114,8 @@ Standard library changes #### REPL +* When stack traces are printed, the printed depth of types in function signatures will be limited + to avoid overly verbose output ([#49795]). #### SuiteSparse diff --git a/README.md b/README.md index 26fbb21a8a6a7..a4480ecf482cd 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ + diff --git a/base/.gitignore b/base/.gitignore index e572b8ea229d0..0fab5b41fda08 100644 --- a/base/.gitignore +++ b/base/.gitignore @@ -8,3 +8,4 @@ /version_git.jl /version_git.jl.phony /userimg.jl +/JuliaSyntax diff --git a/base/Base.jl b/base/Base.jl index 06df2edb276fd..1fc20293aa384 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -6,7 +6,7 @@ using Core.Intrinsics, Core.IR # to start, we're going to use a very simple definition of `include` # that doesn't require any function (except what we can get from the `Core` top-module) -const _included_files = Array{Tuple{Module,String},1}() +const _included_files = Array{Tuple{Module,String},1}(Core.undef, 1) function include(mod::Module, path::String) ccall(:jl_array_grow_end, Cvoid, (Any, UInt), _included_files, UInt(1)) Core.arrayset(true, _included_files, (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), arraylen(_included_files)) @@ -163,6 +163,7 @@ include("int.jl") include("operators.jl") include("pointer.jl") include("refvalue.jl") +include("cmem.jl") include("refpointer.jl") # now replace the Pair constructor (relevant for NamedTuples) with one that calls our Base.convert @@ -316,7 +317,7 @@ include("version.jl") # system & environment include("sysinfo.jl") include("libc.jl") -using .Libc: getpid, gethostname, time +using .Libc: getpid, gethostname, time, memcpy, memset, memmove, memcmp # These used to be in build_h.jl and are retained for backwards compatibility. # NOTE: keep in sync with `libblastrampoline_jll.libblastrampoline`. @@ -488,6 +489,10 @@ a_method_to_overwrite_in_test() = inferencebarrier(1) include(mod::Module, _path::AbstractString) = _include(identity, mod, _path) include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path) +# External libraries vendored into Base +Core.println("JuliaSyntax/src/JuliaSyntax.jl") +include(@__MODULE__, "JuliaSyntax/src/JuliaSyntax.jl") + end_base_include = time_ns() const _sysimage_modules = PkgId[] @@ -596,8 +601,12 @@ function __init__() ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle) errormonitor(Threads.@spawn(profile_printing_listener())) end + _require_world_age[] = get_world_counter() # Prevent spawned Julia process from getting stuck waiting on Tracy to connect. delete!(ENV, "JULIA_WAIT_FOR_TRACY") + if get_bool_env("JULIA_USE_NEW_PARSER", true) === true + JuliaSyntax.enable_in_core!() + end nothing end @@ -606,5 +615,8 @@ end end +# Ensure this file is also tracked +@assert !isassigned(_included_files, 1) +_included_files[1] = (parentmodule(Base), abspath(@__FILE__)) end # baremodule Base diff --git a/base/Enums.jl b/base/Enums.jl index 027677b432f37..45a1b66753484 100644 --- a/base/Enums.jl +++ b/base/Enums.jl @@ -21,6 +21,14 @@ Base.cconvert(::Type{T}, x::Enum{T2}) where {T<:Integer,T2<:Integer} = T(x)::T Base.write(io::IO, x::Enum{T}) where {T<:Integer} = write(io, T(x)) Base.read(io::IO, ::Type{T}) where {T<:Enum} = T(read(io, basetype(T))) +""" + _enum_hash(x::Enum, h::UInt) + +Compute hash for an enum value `x`. This internal method will be specialized +for every enum type created through [`@enum`](@ref). +""" +_enum_hash(x::Enum, h::UInt) = invoke(hash, Tuple{Any, UInt}, x, h) +Base.hash(x::Enum, h::UInt) = _enum_hash(x, h) Base.isless(x::T, y::T) where {T<:Enum} = isless(basetype(T)(x), basetype(T)(y)) Base.Symbol(x::Enum) = namemap(typeof(x))[Integer(x)]::Symbol @@ -206,8 +214,12 @@ macro enum(T::Union{Symbol,Expr}, syms...) Enums.namemap(::Type{$(esc(typename))}) = $(esc(namemap)) Base.typemin(x::Type{$(esc(typename))}) = $(esc(typename))($lo) Base.typemax(x::Type{$(esc(typename))}) = $(esc(typename))($hi) - let enum_hash = hash($(esc(typename))) - Base.hash(x::$(esc(typename)), h::UInt) = hash(enum_hash, hash(Integer(x), h)) + let type_hash = hash($(esc(typename))) + # Use internal `_enum_hash` to allow users to specialize + # `Base.hash` for their own enum types without overwriting the + # method we would define here. This avoids a warning for + # precompilation. + Enums._enum_hash(x::$(esc(typename)), h::UInt) = hash(type_hash, hash(Integer(x), h)) end let insts = (Any[ $(esc(typename))(v) for v in $values ]...,) Base.instances(::Type{$(esc(typename))}) = insts diff --git a/base/abstractarray.jl b/base/abstractarray.jl index cb3956eb7c6d4..1417987847ec4 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -604,20 +604,6 @@ end size_to_strides(s, d) = (s,) size_to_strides(s) = () - -function isassigned(a::AbstractArray, i::Integer...) - try - a[i...] - true - catch e - if isa(e, BoundsError) || isa(e, UndefRefError) - return false - else - rethrow() - end - end -end - function isstored(A::AbstractArray{<:Any,N}, I::Vararg{Integer,N}) where {N} @boundscheck checkbounds(A, I...) return true @@ -1442,7 +1428,7 @@ end """ parent(A) -Return the underlying "parent array”. This parent array of objects of types `SubArray`, `ReshapedArray` +Return the underlying parent object of the view. This parent of objects of types `SubArray`, `SubString`, `ReshapedArray` or `LinearAlgebra.Transpose` is what was passed as an argument to `view`, `reshape`, `transpose`, etc. during object creation. If the input is not a wrapped object, return the input itself. If the input is wrapped multiple times, only the outermost wrapper will be removed. @@ -1465,6 +1451,8 @@ julia> parent(V) 3 4 ``` """ +function parent end + parent(a::AbstractArray) = a ## rudimentary aliasing detection ## @@ -1644,6 +1632,14 @@ end typed_hcat(::Type{T}, A::AbstractVecOrMat...) where {T} = _typed_hcat(T, A) +# Catch indexing errors like v[i +1] (instead of v[i+1] or v[i + 1]), where indexing is +# interpreted as a typed concatenation. (issue #49676) +typed_hcat(::AbstractArray, other...) = throw(ArgumentError("It is unclear whether you \ + intend to perform an indexing operation or typed concatenation. If you intend to \ + perform indexing (v[1 + 2]), adjust spacing or insert missing operator to clarify. \ + If you intend to perform typed concatenation (T[1 2]), ensure that T is a type.")) + + hcat(A::AbstractVecOrMat...) = typed_hcat(promote_eltype(A...), A...) hcat(A::AbstractVecOrMat{T}...) where {T} = typed_hcat(T, A...) @@ -2408,18 +2404,22 @@ function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Num end function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple) + nr, nc = size(A, 1), size(A, 2) + na = prod(size(A)[3:end]) + len = length(xs) + nrc = nr * nc + if nrc * na != len + throw(ArgumentError("argument count $(len) does not match specified shape $(size(A))")) + end # putting these in separate functions leads to unnecessary allocations if row_first - nr, nc = size(A, 1), size(A, 2) - nrc = nr * nc - na = prod(size(A)[3:end]) k = 1 for d ∈ 1:na dd = nrc * (d - 1) for i ∈ 1:nr Ai = dd + i for j ∈ 1:nc - A[Ai] = xs[k] + @inbounds A[Ai] = xs[k] k += 1 Ai += nr end @@ -2427,7 +2427,7 @@ function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple) end else for k ∈ eachindex(xs) - A[k] = xs[k] + @inbounds A[k] = xs[k] end end end @@ -2613,28 +2613,36 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as:: return A end -function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N} +function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, + d1::Int, d2::Int, as::Tuple) where {T, N} + N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2")) + length(scratch1) == length(scratch2) == N || + throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions")) + 0 < d1 < 3 && + 0 < d2 < 3 && + d1 != d2 || + throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive.")) outdims = size(A) offsets = scratch1 inneroffsets = scratch2 for a ∈ as if isa(a, AbstractArray) for ai ∈ a - Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) A[Ai] = ai - for j ∈ 1:N + @inbounds for j ∈ 1:N inneroffsets[j] += 1 inneroffsets[j] < cat_size(a, j) && break inneroffsets[j] = 0 end end else - Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) A[Ai] = a end - for j ∈ (d1, d2, 3:N...) + @inbounds for j ∈ (d1, d2, 3:N...) offsets[j] += cat_size(a, j) offsets[j] < outdims[j] && break offsets[j] = 0 @@ -3273,7 +3281,7 @@ mapany(f, itr) = Any[f(x) for x in itr] map(f, c...) -> collection Transform collection `c` by applying `f` to each element. For multiple collection arguments, -apply `f` elementwise, and stop when when any of them is exhausted. +apply `f` elementwise, and stop when any of them is exhausted. See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref). diff --git a/base/array.jl b/base/array.jl index 68e3e38992731..3a12b38c5bc26 100644 --- a/base/array.jl +++ b/base/array.jl @@ -9,7 +9,7 @@ The objects called do not have matching dimensionality. Optional argument `msg` descriptive error string. """ struct DimensionMismatch <: Exception - msg::String + msg::AbstractString end DimensionMismatch() = DimensionMismatch("") @@ -252,9 +252,14 @@ function bitsunionsize(u::Union) return sz end -# Deprecate this, as it seems to have no documented meaning and is unused here, -# but is frequently accessed in packages elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T) +function elsize(::Type{Ptr{T}}) where T + # this only must return something valid for values which satisfy is_valid_intrinsic_elptr(T), + # which includes Any and most concrete datatypes + T === Any && return sizeof(Ptr{Any}) + T isa DataType || sizeof(Any) # throws + return LLT_ALIGN(Core.sizeof(T), datatype_alignment(T)) +end elsize(::Type{Union{}}, slurp...) = 0 sizeof(a::Array) = Core.sizeof(a) @@ -280,8 +285,7 @@ segfault your program, in the same manner as C. function unsafe_copyto!(dest::Ptr{T}, src::Ptr{T}, n) where T # Do not use this to copy data between pointer arrays. # It can't be made safe no matter how carefully you checked. - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), - dest, src, n * aligned_sizeof(T)) + memmove(dest, src, n * aligned_sizeof(T)) return dest end @@ -328,13 +332,11 @@ function unsafe_copyto!(dest::Array{T}, doffs, src::Array{T}, soffs, n) where T ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest, destp, src, srcp, n) elseif isbitstype(T) - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), - destp, srcp, n * aligned_sizeof(T)) + memmove(destp, srcp, n * aligned_sizeof(T)) elseif isbitsunion(T) - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), - destp, srcp, n * aligned_sizeof(T)) + memmove(destp, srcp, n * aligned_sizeof(T)) # copy selector bytes - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), + memmove( ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), dest) + doffs - 1, ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), src) + soffs - 1, n) @@ -467,7 +469,10 @@ end getindex(::Type{Any}) = Vector{Any}() function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer) - ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), a, x isa eltype(a) ? x : convert(eltype(a), x), length(a)) + t = @_gc_preserve_begin a + p = unsafe_convert(Ptr{Cvoid}, a) + memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a)) + @_gc_preserve_end t return a end @@ -723,7 +728,7 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it collect(collection) Return an `Array` of all items in a collection or iterator. For dictionaries, returns -`Pair{KeyType, ValType}`. If the argument is array-like or is an iterator with the +`Vector{Pair{KeyType, ValType}}`. If the argument is array-like or is an iterator with the [`HasShape`](@ref IteratorSize) trait, the result will have the same shape and number of dimensions as the argument. @@ -1834,23 +1839,50 @@ function empty!(a::Vector) return a end -_memcmp(a, b, len) = ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, len % Csize_t) % Int - # use memcmp for cmp on byte arrays function cmp(a::Array{UInt8,1}, b::Array{UInt8,1}) - c = _memcmp(a, b, min(length(a),length(b))) + ta = @_gc_preserve_begin a + tb = @_gc_preserve_begin b + pa = unsafe_convert(Ptr{Cvoid}, a) + pb = unsafe_convert(Ptr{Cvoid}, b) + c = memcmp(pa, pb, min(length(a),length(b))) + @_gc_preserve_end ta + @_gc_preserve_end tb return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b)) end const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where N # use memcmp for == on bit integer types -==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} = - size(a) == size(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * length(a)) +function ==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} + if size(a) == size(b) + ta = @_gc_preserve_begin a + tb = @_gc_preserve_begin b + pa = unsafe_convert(Ptr{Cvoid}, a) + pb = unsafe_convert(Ptr{Cvoid}, b) + c = memcmp(pa, pb, sizeof(eltype(Arr)) * length(a)) + @_gc_preserve_end ta + @_gc_preserve_end tb + return c == 0 + else + return false + end +end -# this is ~20% faster than the generic implementation above for very small arrays function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1} len = length(a) - len == length(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * len) + if len == length(b) + ta = @_gc_preserve_begin a + tb = @_gc_preserve_begin b + T = eltype(Arr) + pa = unsafe_convert(Ptr{T}, a) + pb = unsafe_convert(Ptr{T}, b) + c = memcmp(pa, pb, sizeof(T) * len) + @_gc_preserve_end ta + @_gc_preserve_end tb + return c == 0 + else + return false + end end """ diff --git a/base/asyncmap.jl b/base/asyncmap.jl index 0b3678f6b4b9b..be16ba1b27610 100644 --- a/base/asyncmap.jl +++ b/base/asyncmap.jl @@ -70,12 +70,6 @@ julia> asyncmap(batch_func, 1:5; ntasks=2, batch_size=2) "args_tuple: (4,), element_val: 4, task: 4904288162898683522" "args_tuple: (5,), element_val: 5, task: 9118321258196414413" ``` - -!!! note - Currently, all tasks in Julia are executed in a single OS thread co-operatively. Consequently, - `asyncmap` is beneficial only when the mapping function involves any I/O - disk, network, remote - worker invocation, etc. - """ function asyncmap(f, c...; ntasks=0, batch_size=nothing) return async_usemap(f, c...; ntasks=ntasks, batch_size=batch_size) diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl index a4935d060b74a..f96887d554af0 100644 --- a/base/binaryplatforms.jl +++ b/base/binaryplatforms.jl @@ -186,7 +186,7 @@ end function Base.show(io::IO, p::Platform) str = string(platform_name(p), " ", arch(p)) # Add on all the other tags not covered by os/arch: - other_tags = sort(collect(filter(kv -> kv[1] ∉ ("os", "arch"), tags(p)))) + other_tags = sort!(filter!(kv -> kv[1] ∉ ("os", "arch"), collect(tags(p)))) if !isempty(other_tags) str = string(str, " {", join([string(k, "=", v) for (k, v) in other_tags], ", "), "}") end @@ -835,7 +835,7 @@ Inspects the current Julia process to determine the libgfortran version this Jul linked against (if any). """ function detect_libgfortran_version() - libgfortran_paths = filter(x -> occursin("libgfortran", x), Libdl.dllist()) + libgfortran_paths = filter!(x -> occursin("libgfortran", x), Libdl.dllist()) if isempty(libgfortran_paths) # One day, I hope to not be linking against libgfortran in base Julia return nothing @@ -865,7 +865,7 @@ it is linked against (if any). `max_minor_version` is the latest version in the 3.4 series of GLIBCXX where the search is performed. """ function detect_libstdcxx_version(max_minor_version::Int=30) - libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist()) + libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist()) if isempty(libstdcxx_paths) # This can happen if we were built by clang, so we don't link against # libstdc++ at all. @@ -897,7 +897,7 @@ between Julia and LLVM; they must match. """ function detect_cxxstring_abi() # First, if we're not linked against libstdc++, then early-exit because this doesn't matter. - libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist()) + libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist()) if isempty(libstdcxx_paths) # We were probably built by `clang`; we don't link against `libstdc++`` at all. return nothing @@ -1080,7 +1080,7 @@ function select_platform(download_info::Dict, platform::AbstractPlatform = HostP # We prefer these better matches, and secondarily reverse-sort by triplet so # as to generally choose the latest release (e.g. a `libgfortran5` tarball # over a `libgfortran3` tarball). - ps = sort(ps, lt = (a, b) -> begin + sort!(ps, lt = (a, b) -> begin loss_a = match_loss(a, platform) loss_b = match_loss(b, platform) if loss_a != loss_b diff --git a/base/bitset.jl b/base/bitset.jl index 5ce07389c771e..240be822fa263 100644 --- a/base/bitset.jl +++ b/base/bitset.jl @@ -391,7 +391,7 @@ function ==(s1::BitSet, s2::BitSet) if overlap > 0 t1 = @_gc_preserve_begin a1 t2 = @_gc_preserve_begin a2 - _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false + memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false @_gc_preserve_end t2 @_gc_preserve_end t1 end diff --git a/base/boot.jl b/base/boot.jl index 43ced22c043d5..78b7daaf47d64 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -510,9 +510,11 @@ end) function Symbol(s::String) @_foldable_meta + @noinline return _Symbol(ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), sizeof(s), s) end function Symbol(a::Array{UInt8,1}) + @noinline return _Symbol(ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a), Intrinsics.arraylen(a), a) end Symbol(s::Symbol) = s @@ -533,11 +535,10 @@ import Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, Return end # module IR # docsystem basics -const unescape = Symbol("hygienic-scope") macro doc(x...) docex = atdoc(__source__, __module__, x...) isa(docex, Expr) && docex.head === :escape && return docex - return Expr(:escape, Expr(unescape, docex, typeof(atdoc).name.module)) + return Expr(:escape, Expr(:var"hygienic-scope", docex, typeof(atdoc).name.module, __source__)) end macro __doc__(x) return Expr(:escape, Expr(:block, Expr(:meta, :doc), x)) @@ -826,9 +827,11 @@ Integer(x::Union{Float16, Float32, Float64}) = Int(x) # `_parse` must return an `svec` containing an `Expr` and the new offset as an # `Int`. # -# The internal jl_parse which will call into Core._parse if not `nothing`. +# The internal jl_parse will call into Core._parse if not `nothing`. _parse = nothing +_setparser!(parser) = setglobal!(Core, :_parse, parser) + # support for deprecated uses of internal _apply function _apply(x...) = Core._apply_iterate(Main.Base.iterate, x...) diff --git a/base/c.jl b/base/c.jl index d94447650b9fb..662986501d59d 100644 --- a/base/c.jl +++ b/base/c.jl @@ -640,13 +640,11 @@ end function ccall_macro_lower(convention, func, rettype, types, args, nreq) - lowering = [] - realargs = [] - gcroots = [] + statements = [] - # if interpolation was used, ensure variable is a function pointer at runtime. + # if interpolation was used, ensure the value is a function pointer at runtime. if Meta.isexpr(func, :$) - push!(lowering, Expr(:(=), :func, esc(func.args[1]))) + push!(statements, Expr(:(=), :func, esc(func.args[1]))) name = QuoteNode(func.args[1]) func = :func check = quote @@ -655,31 +653,14 @@ function ccall_macro_lower(convention, func, rettype, types, args, nreq) throw(ArgumentError("interpolated function `$name` was not a Ptr{Cvoid}, but $(typeof(func))")) end end - push!(lowering, check) + push!(statements, check) else func = esc(func) end - for (i, (arg, type)) in enumerate(zip(args, types)) - sym = Symbol(string("arg", i, "root")) - sym2 = Symbol(string("arg", i, )) - earg, etype = esc(arg), esc(type) - push!(lowering, :(local $sym = $(GlobalRef(Base, :cconvert))($etype, $earg))) - push!(lowering, :(local $sym2 = $(GlobalRef(Base, :unsafe_convert))($etype, $sym))) - push!(realargs, sym2) - push!(gcroots, sym) - end - etypes = Expr(:call, Expr(:core, :svec), types...) - exp = Expr(:foreigncall, - func, - esc(rettype), - esc(etypes), - nreq, - QuoteNode(convention), - realargs..., gcroots...) - push!(lowering, exp) - - return Expr(:block, lowering...) + return Expr(:block, statements..., + Expr(:call, :ccall, func, Expr(:cconv, convention, nreq), esc(rettype), + Expr(:tuple, map(esc, types)...), map(esc, args)...)) end """ diff --git a/base/client.jl b/base/client.jl index dd529dad5281e..6e30c9991e45e 100644 --- a/base/client.jl +++ b/base/client.jl @@ -202,10 +202,7 @@ parse_input_line(s::AbstractString) = parse_input_line(String(s)) # detect the reason which caused an :incomplete expression # from the error message # NOTE: the error messages are defined in src/julia-parser.scm -incomplete_tag(ex) = :none -function incomplete_tag(ex::Expr) - Meta.isexpr(ex, :incomplete) || return :none - msg = ex.args[1] +function fl_incomplete_tag(msg::AbstractString) occursin("string", msg) && return :string occursin("comment", msg) && return :comment occursin("requires end", msg) && return :block @@ -214,6 +211,20 @@ function incomplete_tag(ex::Expr) return :other end +incomplete_tag(ex) = :none +function incomplete_tag(ex::Expr) + if ex.head !== :incomplete + return :none + elseif isempty(ex.args) + return :other + elseif ex.args[1] isa String + return fl_incomplete_tag(ex.args[1]) + else + return incomplete_tag(ex.args[1]) + end +end +incomplete_tag(exc::Meta.ParseError) = incomplete_tag(exc.detail) + function exec_options(opts) quiet = (opts.quiet != 0) startup = (opts.startupfile != 2) diff --git a/base/cmd.jl b/base/cmd.jl index 9e274b61b5e9e..475a62a82d4d7 100644 --- a/base/cmd.jl +++ b/base/cmd.jl @@ -41,6 +41,7 @@ has_nondefault_cmd_flags(c::Cmd) = """ Cmd(cmd::Cmd; ignorestatus, detach, windows_verbatim, windows_hide, env, dir) + Cmd(exec::Vector{String}) Construct a new `Cmd` object, representing an external program and arguments, from `cmd`, while changing the settings of the optional keyword arguments: @@ -70,8 +71,15 @@ while changing the settings of the optional keyword arguments: * `dir::AbstractString`: Specify a working directory for the command (instead of the current directory). -For any keywords that are not specified, the current settings from `cmd` are used. Normally, -to create a `Cmd` object in the first place, one uses backticks, e.g. +For any keywords that are not specified, the current settings from `cmd` are used. + +Note that the `Cmd(exec)` constructor does not create a copy of `exec`. Any subsequent changes to `exec` will be reflected in the `Cmd` object. + +The most common way to construct a `Cmd` object is with command literals (backticks), e.g. + + `ls -l` + +This can then be passed to the `Cmd` constructor to modify its settings, e.g. Cmd(`echo "Hello world"`, ignorestatus=true, detach=false) """ diff --git a/base/cmem.jl b/base/cmem.jl new file mode 100644 index 0000000000000..8b0b99b3a6ebd --- /dev/null +++ b/base/cmem.jl @@ -0,0 +1,53 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" + memcpy(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid} + +Call `memcpy` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memcpy` requires at least Julia 1.10. + +""" +function memcpy(dst::Ptr, src::Ptr, n::Integer) + ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n) +end + +""" + memmove(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid} + +Call `memmove` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memmove` requires at least Julia 1.10. + +""" +function memmove(dst::Ptr, src::Ptr, n::Integer) + ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n) +end + +""" + memset(dst::Ptr, val, n::Integer) -> Ptr{Cvoid} + +Call `memset` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memset` requires at least Julia 1.10. + +""" +function memset(p::Ptr, val, n::Integer) + ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), p, val, n) +end + +""" + memcmp(a::Ptr, b::Ptr, n::Integer) -> Int + +Call `memcmp` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memcmp` requires at least Julia 1.9. + +""" +function memcmp(a::Ptr, b::Ptr, n::Integer) + ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, n % Csize_t) % Int +end diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 097eb7a5d098e..0cf3e6c00a1b7 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -156,7 +156,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f), all_effects = Effects(all_effects; nothrow=false) end - rettype = from_interprocedural!(𝕃ₚ, rettype, sv, arginfo, conditionals) + rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals) # Also considering inferring the compilation signature for this method, so # it is available to the compiler in case it ends up needing it. @@ -303,7 +303,8 @@ function find_matching_methods(𝕃::AbstractLattice, end """ - from_interprocedural!(𝕃ₚ::AbstractLattice, rt, sv::AbsIntState, arginfo::ArgInfo, maybecondinfo) -> newrt + from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState, + arginfo::ArgInfo, maybecondinfo) -> newrt Converts inter-procedural return type `rt` into a local lattice element `newrt`, that is appropriate in the context of current local analysis frame `sv`, especially: @@ -322,15 +323,16 @@ In such cases `maybecondinfo` should be either of: When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by `tmerge`ing argument signature type of each method call. """ -function from_interprocedural!(𝕃ₚ::AbstractLattice, @nospecialize(rt), sv::AbsIntState, arginfo::ArgInfo, @nospecialize(maybecondinfo)) +function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState, + arginfo::ArgInfo, @nospecialize(maybecondinfo)) rt = collect_limitations!(rt, sv) if isa(rt, InterMustAlias) rt = from_intermustalias(rt, arginfo) - elseif is_lattice_bool(𝕃ₚ, rt) + elseif is_lattice_bool(ipo_lattice(interp), rt) if maybecondinfo === nothing rt = widenconditional(rt) else - rt = from_interconditional(𝕃ₚ, rt, sv, arginfo, maybecondinfo) + rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo) end end @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context" @@ -361,34 +363,32 @@ function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo) return widenmustalias(rt) end -function from_interconditional(𝕃ₚ::AbstractLattice, - typ, sv::AbsIntState, arginfo::ArgInfo, maybecondinfo) - @nospecialize typ maybecondinfo - has_conditional(𝕃ₚ, sv) || return widenconditional(typ) +function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState, + arginfo::ArgInfo, @nospecialize(maybecondinfo)) + has_conditional(𝕃ᵢ, sv) || return widenconditional(rt) (; fargs, argtypes) = arginfo - fargs === nothing && return widenconditional(typ) - 𝕃 = widenlattice(𝕃ₚ) + fargs === nothing && return widenconditional(rt) slot = 0 alias = nothing thentype = elsetype = Any - condval = maybe_extract_const_bool(typ) + condval = maybe_extract_const_bool(rt) for i in 1:length(fargs) # find the first argument which supports refinement, # and intersect all equivalent arguments with it argtyp = argtypes[i] if alias === nothing - if argtyp isa MustAlias - old = argtyp.fldtyp - id = argtyp.slot - elseif alias === nothing && argtyp isa Type - arg = ssa_def_slot(fargs[i], sv) - arg isa SlotNumber || continue # can't refine + arg = ssa_def_slot(fargs[i], sv) + if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type old = argtyp id = slot_id(arg) + elseif argtyp isa MustAlias + old = argtyp.fldtyp + id = argtyp.slot else continue # unlikely to refine end elseif argtyp isa MustAlias && issubalias(argtyp, alias) + arg = nothing old = alias.fldtyp id = alias.slot else @@ -401,32 +401,32 @@ function from_interconditional(𝕃ₚ::AbstractLattice, new_elsetype = maybecondinfo[2][i] else # otherwise compute it on the fly - cnd = conditional_argtype(typ, maybecondinfo, argtypes, i) + cnd = conditional_argtype(rt, maybecondinfo, argtypes, i) new_thentype = cnd.thentype new_elsetype = cnd.elsetype end if condval === false thentype = Bottom - elseif ⊑(𝕃, new_thentype, thentype) + elseif ⊑(𝕃ᵢ, new_thentype, thentype) thentype = new_thentype else - thentype = tmeet(𝕃, thentype, widenconst(new_thentype)) + thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype)) end if condval === true elsetype = Bottom - elseif ⊑(𝕃, new_elsetype, elsetype) + elseif ⊑(𝕃ᵢ, new_elsetype, elsetype) elsetype = new_elsetype else - elsetype = tmeet(𝕃, elsetype, widenconst(new_elsetype)) + elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype)) end - if (slot > 0 || condval !== false) && ⋤(𝕃, thentype, old) + if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old) slot = id - if argtyp isa MustAlias + if !(arg isa SlotNumber) && argtyp isa MustAlias alias = argtyp end - elseif (slot > 0 || condval !== true) && ⋤(𝕃, elsetype, old) + elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old) slot = id - if argtyp isa MustAlias + if !(arg isa SlotNumber) && argtyp isa MustAlias alias = argtyp end else # reset: no new useful information for this slot @@ -444,7 +444,7 @@ function from_interconditional(𝕃ₚ::AbstractLattice, end return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot end - return widenconditional(typ) + return widenconditional(rt) end function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int) @@ -508,6 +508,10 @@ function abstract_call_method(interp::AbstractInterpreter, sigtuple = unwrap_unionall(sig) sigtuple isa DataType || return MethodCallResult(Any, false, false, nothing, Effects()) + if is_nospecializeinfer(method) + sig = get_nospecializeinfer_sig(method, sig, sparams) + end + # Limit argument type tuple growth of functions: # look through the parents list to see if there's a call to the same method # and from the same method. @@ -758,32 +762,106 @@ struct MethodCallResult end end -# - true: eligible for concrete evaluation -# - false: eligible for semi-concrete evaluation -# - nothing: not eligible for either of it +struct InvokeCall + types # ::Type + lookupsig # ::Type + InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig) +end + +struct ConstCallResults + rt::Any + const_result::ConstResult + effects::Effects + edge::MethodInstance + ConstCallResults(@nospecialize(rt), + const_result::ConstResult, + effects::Effects, + edge::MethodInstance) = + new(rt, const_result, effects, edge) +end + +function abstract_call_method_with_const_args(interp::AbstractInterpreter, + result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, + match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing) + if !const_prop_enabled(interp, sv, match) + return nothing + end + if bail_out_const_call(interp, result, si) + add_remark!(interp, sv, "[constprop] No more information to be gained") + return nothing + end + eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv) + if eligibility === :concrete_eval + return concrete_eval_call(interp, f, result, arginfo, sv, invokecall) + end + mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv) + mi === nothing && return nothing + if is_constprop_recursed(result, mi, sv) + add_remark!(interp, sv, "[constprop] Edge cycle encountered") + return nothing + end + # try semi-concrete evaluation + if eligibility === :semi_concrete_eval + res = semi_concrete_eval_call(interp, mi, result, arginfo, sv) + if res !== nothing + return res + end + end + # try constant prop' + return const_prop_call(interp, mi, result, arginfo, sv) +end + +function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch) + if !InferenceParams(interp).ipo_constant_propagation + add_remark!(interp, sv, "[constprop] Disabled by parameter") + return false + end + if is_no_constprop(match.method) + add_remark!(interp, sv, "[constprop] Disabled by method parameter") + return false + end + return true +end + +function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo) + if is_removable_if_unused(result.effects) + if isa(result.rt, Const) || call_result_unused(si) + return true + end + end + return false +end + function concrete_eval_eligible(interp::AbstractInterpreter, @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) - # disable all concrete-evaluation if this function call is tainted by some overlayed - # method since currently there is no direct way to execute overlayed methods + (;effects) = result if inbounds_option() === :off - # Disable concrete evaluation in `--check-bounds=no` mode, since we cannot be sure - # that inferred effects are accurate. - return nothing - elseif !result.effects.noinbounds && stmt_taints_inbounds_consistency(sv) + if !is_nothrow(effects) + # Disable concrete evaluation in `--check-bounds=no` mode, + # unless it is known to not throw. + return :none + end + end + if !effects.noinbounds && stmt_taints_inbounds_consistency(sv) # If the current statement is @inbounds or we propagate inbounds, the call's consistency # is tainted and not consteval eligible. add_remark!(interp, sv, "[constprop] Concrete evel disabled for inbounds") - return nothing + return :none + end + if isoverlayed(method_table(interp)) && !is_nonoverlayed(effects) + # disable concrete-evaluation if this function call is tainted by some overlayed + # method since currently there is no direct way to execute overlayed methods + add_remark!(interp, sv, "[constprop] Concrete evel disabled for overlayed methods") + return :none end - isoverlayed(method_table(interp)) && !is_nonoverlayed(result.effects) && return nothing - if result.edge !== nothing && is_foldable(result.effects) + if result.edge !== nothing && is_foldable(effects) if f !== nothing && is_all_const_arg(arginfo, #=start=#2) - return true - else - return false + return :concrete_eval + elseif !any_conditional(arginfo) + return :semi_concrete_eval end end - return nothing + return :none end is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int) @@ -795,6 +873,9 @@ function is_all_const_arg(argtypes::Vector{Any}, start::Int) return true end +any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes) +any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes) + collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start) function collect_const_args(argtypes::Vector{Any}, start::Int) return Any[ let a = widenslotwrapper(argtypes[i]) @@ -804,128 +885,24 @@ function collect_const_args(argtypes::Vector{Any}, start::Int) end for i = start:length(argtypes) ] end -struct InvokeCall - types # ::Type - lookupsig # ::Type - InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig) -end - function concrete_eval_call(interp::AbstractInterpreter, - @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, si::StmtInfo, - sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing) - eligible = concrete_eval_eligible(interp, f, result, arginfo, sv) - eligible === nothing && return false - if eligible - args = collect_const_args(arginfo, #=start=#2) - if invokecall !== nothing - # this call should be `invoke`d, rewrite `args` back now - pushfirst!(args, f, invokecall.types) - f = invoke - end - world = get_world_counter(interp) - edge = result.edge::MethodInstance - value = try - Core._call_in_world_total(world, f, args...) - catch - # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime - return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge) - end - return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge) - else # eligible for semi-concrete evaluation - return true - end -end - -any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes) -any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes) - -function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch) - if !InferenceParams(interp).ipo_constant_propagation - add_remark!(interp, sv, "[constprop] Disabled by parameter") - return false - end - if is_no_constprop(match.method) - add_remark!(interp, sv, "[constprop] Disabled by method parameter") - return false + @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, + sv::AbsIntState, invokecall::Union{InvokeCall,Nothing}) + args = collect_const_args(arginfo, #=start=#2) + if invokecall !== nothing + # this call should be `invoke`d, rewrite `args` back now + pushfirst!(args, f, invokecall.types) + f = invoke end - return true -end - -struct ConstCallResults - rt::Any - const_result::ConstResult - effects::Effects - edge::MethodInstance - ConstCallResults(@nospecialize(rt), - const_result::ConstResult, - effects::Effects, - edge::MethodInstance) = - new(rt, const_result, effects, edge) -end - -# TODO implement MustAlias forwarding - -struct ConditionalArgtypes <: ForwardableArgtypes - arginfo::ArgInfo - sv::InferenceState -end - -""" - matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ConditionalArgtypes) - -The implementation is able to forward `Conditional` of `argtypes`, -as well as the other general extended lattice inforamtion. -""" -function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ConditionalArgtypes) - (; arginfo, sv) = argtypes - (; fargs, argtypes) = arginfo - given_argtypes = Vector{Any}(undef, length(argtypes)) - def = linfo.def::Method - nargs = Int(def.nargs) - cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo) - local condargs = nothing - for i in 1:length(argtypes) - argtype = argtypes[i] - # forward `Conditional` if it conveys a constraint on any other argument - if isa(argtype, Conditional) && fargs !== nothing - cnd = argtype - slotid = find_constrained_arg(cnd, fargs, sv) - if slotid !== nothing - # using union-split signature, we may be able to narrow down `Conditional` - sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid]) - thentype = tmeet(cnd.thentype, sigt) - elsetype = tmeet(cnd.elsetype, sigt) - if thentype === Bottom && elsetype === Bottom - # we accidentally proved this method match is impossible - # TODO bail out here immediately rather than just propagating Bottom ? - given_argtypes[i] = Bottom - else - if condargs === nothing - condargs = Tuple{Int,Int}[] - end - push!(condargs, (slotid, i)) - given_argtypes[i] = Conditional(slotid, thentype, elsetype) - end - continue - end - end - given_argtypes[i] = widenslotwrapper(argtype) - end - if condargs !== nothing - given_argtypes = let condargs=condargs - va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int - # invalidate `Conditional` imposed on varargs - for (slotid, i) in condargs - if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise - isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i]) - end - end - end - end - else - given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo) + world = get_world_counter(interp) + edge = result.edge::MethodInstance + value = try + Core._call_in_world_total(world, f, args...) + catch + # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime + return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge) end - return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes) + return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge) end # check if there is a cycle and duplicated inference of `mi` @@ -942,82 +919,6 @@ function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv: end end -function abstract_call_method_with_const_args(interp::AbstractInterpreter, - result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, match::MethodMatch, - sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing) - if !const_prop_enabled(interp, sv, match) - return nothing - end - if is_removable_if_unused(result.effects) - if isa(result.rt, Const) || call_result_unused(si) - add_remark!(interp, sv, "[constprop] No more information to be gained") - return nothing - end - end - res = concrete_eval_call(interp, f, result, arginfo, si, sv, invokecall) - isa(res, ConstCallResults) && return res - mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv) - mi === nothing && return nothing - if is_constprop_recursed(result, mi, sv) - add_remark!(interp, sv, "[constprop] Edge cycle encountered") - return nothing - end - # try semi-concrete evaluation - if res::Bool && !any_conditional(arginfo) - world = frame_world(sv) - mi_cache = WorldView(code_cache(interp), world) - code = get(mi_cache, mi, nothing) - if code !== nothing - irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world) - if irsv !== nothing - irsv.parent = sv - rt, nothrow = ir_abstract_constant_propagation(interp, irsv) - @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp" - if !(isa(rt, Type) && hasintersect(rt, Bool)) - ir = irsv.ir - # TODO (#48913) enable double inlining pass when there are any calls - # that are newly resovled by irinterp - # state = InliningState(interp) - # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv)) - new_effects = Effects(result.effects; nothrow) - return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi) - end - end - end - end - # try constant prop' - inf_cache = get_inference_cache(interp) - 𝕃ᵢ = typeinf_lattice(interp) - inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache) - if inf_result === nothing - # fresh constant prop' - argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes) - inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp)) - if !any(inf_result.overridden_by_const) - add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes") - return nothing - end - frame = InferenceState(inf_result, #=cache=#:local, interp) - if frame === nothing - add_remark!(interp, sv, "[constprop] Could not retrieve the source") - return nothing # this is probably a bad generated function (unsound), but just ignore it - end - frame.parent = sv - if !typeinf(interp, frame) - add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle") - return nothing - end - @assert inf_result.result !== nothing - else - # found the cache for this constant prop' - if inf_result.result === nothing - add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle") - return nothing - end - end - return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi) -end - # if there's a possibility we could get a better result with these constant arguments # (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise function maybe_get_const_prop_profitable(interp::AbstractInterpreter, @@ -1240,6 +1141,132 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful end +function semi_concrete_eval_call(interp::AbstractInterpreter, + mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) + world = frame_world(sv) + mi_cache = WorldView(code_cache(interp), world) + code = get(mi_cache, mi, nothing) + if code !== nothing + irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world) + if irsv !== nothing + irsv.parent = sv + rt, nothrow = ir_abstract_constant_propagation(interp, irsv) + @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp" + if !(isa(rt, Type) && hasintersect(rt, Bool)) + ir = irsv.ir + # TODO (#48913) enable double inlining pass when there are any calls + # that are newly resovled by irinterp + # state = InliningState(interp) + # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv)) + new_effects = Effects(result.effects; nothrow) + return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi) + end + end + end + return nothing +end + +function const_prop_call(interp::AbstractInterpreter, + mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) + inf_cache = get_inference_cache(interp) + 𝕃ᵢ = typeinf_lattice(interp) + inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache) + if inf_result === nothing + # fresh constant prop' + argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes) + inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp)) + if !any(inf_result.overridden_by_const) + add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes") + return nothing + end + frame = InferenceState(inf_result, #=cache=#:local, interp) + if frame === nothing + add_remark!(interp, sv, "[constprop] Could not retrieve the source") + return nothing # this is probably a bad generated function (unsound), but just ignore it + end + frame.parent = sv + if !typeinf(interp, frame) + add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle") + return nothing + end + @assert inf_result.result !== nothing + else + # found the cache for this constant prop' + if inf_result.result === nothing + add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle") + return nothing + end + end + return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi) +end + +# TODO implement MustAlias forwarding + +struct ConditionalArgtypes <: ForwardableArgtypes + arginfo::ArgInfo + sv::InferenceState +end + +""" + matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, + conditional_argtypes::ConditionalArgtypes) + +The implementation is able to forward `Conditional` of `conditional_argtypes`, +as well as the other general extended lattice inforamtion. +""" +function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, + conditional_argtypes::ConditionalArgtypes) + (; arginfo, sv) = conditional_argtypes + (; fargs, argtypes) = arginfo + given_argtypes = Vector{Any}(undef, length(argtypes)) + def = linfo.def::Method + nargs = Int(def.nargs) + cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo) + local condargs = nothing + for i in 1:length(argtypes) + argtype = argtypes[i] + # forward `Conditional` if it conveys a constraint on any other argument + if isa(argtype, Conditional) && fargs !== nothing + cnd = argtype + slotid = find_constrained_arg(cnd, fargs, sv) + if slotid !== nothing + # using union-split signature, we may be able to narrow down `Conditional` + sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid]) + thentype = tmeet(cnd.thentype, sigt) + elsetype = tmeet(cnd.elsetype, sigt) + if thentype === Bottom && elsetype === Bottom + # we accidentally proved this method match is impossible + # TODO bail out here immediately rather than just propagating Bottom ? + given_argtypes[i] = Bottom + else + if condargs === nothing + condargs = Tuple{Int,Int}[] + end + push!(condargs, (slotid, i)) + given_argtypes[i] = Conditional(slotid, thentype, elsetype) + end + continue + end + end + given_argtypes[i] = widenslotwrapper(argtype) + end + if condargs !== nothing + given_argtypes = let condargs=condargs + va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int + # invalidate `Conditional` imposed on varargs + for (slotid, i) in condargs + if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise + isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i]) + end + end + end + end + else + given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo) + end + return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes) +end + # This is only for use with `Conditional`. # In general, usage of this is wrong. function ssa_def_slot(@nospecialize(arg), sv::InferenceState) @@ -1561,7 +1588,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si:: call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods) seen += 1 push!(retinfos, ApplyCallInfo(call.info, arginfo)) - res = tmerge(res, call.rt) + res = tmerge(typeinf_lattice(interp), res, call.rt) effects = merge_effects(effects, call.effects) if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv) add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.") @@ -1657,7 +1684,7 @@ end end function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo, - sv::AbsIntState, max_methods::Int) + sv::AbsIntState) @nospecialize f la = length(argtypes) 𝕃ᵢ = typeinf_lattice(interp) @@ -1854,7 +1881,7 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An ret = canconst ? Const(body) : Type{body} return CallMeta(ret, Effects(EFFECTS_TOTAL; nothrow), NoCallInfo()) end - return CallMeta(Any, EFFECTS_UNKNOWN, NoCallInfo()) + return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) end function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState) @@ -1906,7 +1933,7 @@ function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgIn (; rt, effects, const_result, edge) = const_call_result end end - rt = from_interprocedural!(𝕃ₚ, rt, sv, arginfo, sig) + rt = from_interprocedural!(interp, rt, sv, arginfo, sig) effects = Effects(effects; nonoverlayed=!overlayed) info = InvokeCallInfo(match, const_result) edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge) @@ -1949,7 +1976,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), elseif f === applicable return abstract_applicable(interp, argtypes, sv, max_methods) end - rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) + rt = abstract_call_builtin(interp, f, arginfo, sv) effects = builtin_effects(𝕃ᵢ, f, arginfo, rt) if f === getfield && (fargs !== nothing && isexpr(fargs[end], :boundscheck)) && !is_nothrow(effects) && isa(sv, InferenceState) # As a special case, we delayed tainting `noinbounds` for getfield calls in case we can prove @@ -1966,7 +1993,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), elseif f === TypeVar # Manually look through the definition of TypeVar to # make sure to be able to get `PartialTypeVar`s out. - (la < 2 || la > 4) && return CallMeta(Union{}, EFFECTS_UNKNOWN, NoCallInfo()) + (la < 2 || la > 4) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) n = argtypes[2] ub_var = Const(Any) lb_var = Const(Union{}) @@ -2053,7 +2080,7 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter, effects = Effects(effects; nothrow=false) end end - rt = from_interprocedural!(𝕃ₚ, rt, sv, arginfo, match.spec_types) + rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types) info = OpaqueClosureCallInfo(match, const_result) edge !== nothing && add_backedge!(sv, edge) return CallMeta(rt, effects, info) @@ -2069,35 +2096,41 @@ function most_general_argtypes(closure::PartialOpaque) return Any[argt.parameters...] end +function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft), + arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, + max_methods::Int) + if isa(ft, PartialOpaque) + newargtypes = copy(arginfo.argtypes) + newargtypes[1] = ft.env + return abstract_call_opaque_closure(interp, + ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true) + end + wft = widenconst(ft) + if hasintersect(wft, Builtin) + add_remark!(interp, sv, "Could not identify method table for call") + return CallMeta(Any, Effects(), NoCallInfo()) + elseif hasintersect(wft, Core.OpaqueClosure) + uft = unwrap_unionall(wft) + if isa(uft, DataType) + return CallMeta(rewrap_unionall(uft.parameters[2], wft), Effects(), NoCallInfo()) + end + return CallMeta(Any, Effects(), NoCallInfo()) + end + # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic + atype = argtypes_to_type(arginfo.argtypes) + return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods) +end + # call where the function is any lattice element function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo, - sv::AbsIntState, max_methods::Union{Int, Nothing} = nothing) - argtypes = arginfo.argtypes - ft = widenslotwrapper(argtypes[1]) + sv::AbsIntState, max_methods::Int=typemin(Int)) + ft = widenslotwrapper(arginfo.argtypes[1]) f = singleton_type(ft) if f === nothing - if isa(ft, PartialOpaque) - newargtypes = copy(argtypes) - newargtypes[1] = ft.env - return abstract_call_opaque_closure(interp, - ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true) - end - wft = widenconst(ft) - if hasintersect(wft, Builtin) - add_remark!(interp, sv, "Could not identify method table for call") - return CallMeta(Any, Effects(), NoCallInfo()) - elseif hasintersect(wft, Core.OpaqueClosure) - uft = unwrap_unionall(wft) - if isa(uft, DataType) - return CallMeta(rewrap_unionall(uft.parameters[2], wft), Effects(), NoCallInfo()) - end - return CallMeta(Any, Effects(), NoCallInfo()) - end - # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic - max_methods = max_methods === nothing ? get_max_methods(interp, sv) : max_methods - return abstract_call_gf_by_type(interp, nothing, arginfo, si, argtypes_to_type(argtypes), sv, max_methods) + max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods + return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods) end - max_methods = max_methods === nothing ? get_max_methods(interp, f, sv) : max_methods + max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods return abstract_call_known(interp, f, arginfo, si, sv, max_methods) end @@ -2253,17 +2286,33 @@ struct RTEffects RTEffects(@nospecialize(rt), effects::Effects) = new(rt, effects) end +function mark_curr_effect_flags!(sv::AbsIntState, effects::Effects) + if isa(sv, InferenceState) + if is_effect_free(effects) + add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) + else + sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) + end + if is_nothrow(effects) + add_curr_ssaflag!(sv, IR_FLAG_NOTHROW) + else + sub_curr_ssaflag!(sv, IR_FLAG_NOTHROW) + end + if is_consistent(effects) + add_curr_ssaflag!(sv, IR_FLAG_CONSISTENT) + else + sub_curr_ssaflag!(sv, IR_FLAG_CONSISTENT) + end + end +end + function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState) si = StmtInfo(!call_result_unused(sv, sv.currpc)) (; rt, effects, info) = abstract_call(interp, arginfo, si, sv) sv.stmt_info[sv.currpc] = info # mark this call statement as DCE-elgible # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret? - if is_removable_if_unused(effects) - add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - else - sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - end + mark_curr_effect_flags!(sv, effects) return RTEffects(rt, effects) end @@ -2280,7 +2329,7 @@ end function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState) - effects = EFFECTS_UNKNOWN + effects = Effects() ehead = e.head 𝕃ᵢ = typeinf_lattice(interp) ⊑ᵢ = ⊑(𝕃ᵢ) @@ -2402,14 +2451,7 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp elseif ehead === :foreigncall (; rt, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv) t = rt - if isa(sv, InferenceState) - # mark this call statement as DCE-elgible - if is_removable_if_unused(effects) - add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - else - sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - end - end + mark_curr_effect_flags!(sv, effects) elseif ehead === :cfunction effects = EFFECTS_UNKNOWN t = e.args[1] @@ -2531,7 +2573,7 @@ end function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState) if !isa(e, Expr) if isa(e, PhiNode) - add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) + add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) return abstract_eval_phi(interp, e, vtypes, sv) end return abstract_eval_special_value(interp, e, vtypes, sv) @@ -2582,11 +2624,9 @@ function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv:: nothrow = false if isa(rt, Const) consistent = ALWAYS_TRUE + nothrow = true if is_mutation_free_argtype(rt) inaccessiblememonly = ALWAYS_TRUE - nothrow = true - else - nothrow = true end elseif isdefined_globalref(g) nothrow = true @@ -2628,18 +2668,18 @@ struct BestguessInfo{Interp<:AbstractInterpreter} end end -function widenreturn(@nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo) return widenreturn(typeinf_lattice(info.interp), rt, info) end -function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo) if isa(rt, MustAlias) if 1 ≤ rt.slot ≤ info.nargs rt = InterMustAlias(rt) @@ -2651,7 +2691,7 @@ function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::Bestg return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo) ⊑ᵢ = ⊑(𝕃ᵢ) if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool # give up inter-procedural constraint back-propagation @@ -2688,7 +2728,7 @@ function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::Best isa(rt, InterConditional) && return rt return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo) bestguess = info.bestguess if isa(bestguess, InterConditional) # if the bestguess so far is already `Conditional`, try to convert @@ -2706,7 +2746,7 @@ function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo) end return rt end -function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo) +@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo) ⊑ᵢ = ⊑(typeinf_lattice(info.interp)) old = info.slottypes[slot_id] new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional @@ -2725,13 +2765,13 @@ function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::Bestguess return rt end -function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn_partials(𝕃ᵢ, rt, info) end -function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn_partials(𝕃ᵢ, rt, info) end -function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) if isa(rt, PartialStruct) fields = copy(rt.fields) local anyrefine = false @@ -2754,21 +2794,21 @@ function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info: return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) return widenreturn_consts(rt) end -function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) return widenreturn_consts(rt) end -function widenreturn_consts(@nospecialize(rt)) +@nospecializeinfer function widenreturn_consts(@nospecialize(rt)) isa(rt, Const) && return rt return widenconst(rt) end -function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) return widenconst(rt) end -function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) return widenconst(rt) end diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl index a84050816cb21..3c6c874a9a09c 100644 --- a/base/compiler/abstractlattice.jl +++ b/base/compiler/abstractlattice.jl @@ -95,19 +95,6 @@ end widenlattice(𝕃::InferenceLattice) = 𝕃.parent is_valid_lattice_norec(::InferenceLattice, @nospecialize(elem)) = isa(elem, LimitedAccuracy) -""" - struct OptimizerLattice{𝕃<:AbstractLattice} <: AbstractLattice - -The lattice used by the optimizer. -Extends a base lattice `𝕃` and adjoins `MaybeUndef`. -""" -struct OptimizerLattice{𝕃<:AbstractLattice} <: AbstractLattice - parent::𝕃 -end -OptimizerLattice() = OptimizerLattice(SimpleInferenceLattice.instance) -widenlattice(𝕃::OptimizerLattice) = 𝕃.parent -is_valid_lattice_norec(::OptimizerLattice, @nospecialize(elem)) = isa(elem, MaybeUndef) - """ tmeet(𝕃::AbstractLattice, a, b::Type) @@ -161,7 +148,7 @@ If `𝕃` is `JLTypeLattice`, this is equivalent to subtyping. """ function ⊑ end -⊑(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type)) = a <: b +@nospecializeinfer ⊑(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type)) = a <: b """ ⊏(𝕃::AbstractLattice, a, b) -> Bool @@ -169,7 +156,7 @@ function ⊑ end The strict partial order over the type inference lattice. This is defined as the irreflexive kernel of `⊑`. """ -⊏(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = ⊑(𝕃, a, b) && !⊑(𝕃, b, a) +@nospecializeinfer ⊏(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = ⊑(𝕃, a, b) && !⊑(𝕃, b, a) """ ⋤(𝕃::AbstractLattice, a, b) -> Bool @@ -177,7 +164,7 @@ This is defined as the irreflexive kernel of `⊑`. This order could be used as a slightly more efficient version of the strict order `⊏`, where we can safely assume `a ⊑ b` holds. """ -⋤(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = !⊑(𝕃, b, a) +@nospecializeinfer ⋤(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = !⊑(𝕃, b, a) """ is_lattice_equal(𝕃::AbstractLattice, a, b) -> Bool @@ -186,7 +173,7 @@ Check if two lattice elements are partial order equivalent. This is basically `a ⊑ b && b ⊑ a` in the lattice of `𝕃` but (optionally) with extra performance optimizations. """ -function is_lattice_equal(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) a === b && return true return ⊑(𝕃, a, b) && ⊑(𝕃, b, a) end @@ -197,14 +184,14 @@ end Determines whether the given lattice element `t` of `𝕃` has non-trivial extended lattice information that would not be available from the type itself. """ -has_nontrivial_extended_info(𝕃::AbstractLattice, @nospecialize t) = +@nospecializeinfer has_nontrivial_extended_info(𝕃::AbstractLattice, @nospecialize t) = has_nontrivial_extended_info(widenlattice(𝕃), t) -function has_nontrivial_extended_info(𝕃::PartialsLattice, @nospecialize t) +@nospecializeinfer function has_nontrivial_extended_info(𝕃::PartialsLattice, @nospecialize t) isa(t, PartialStruct) && return true isa(t, PartialOpaque) && return true return has_nontrivial_extended_info(widenlattice(𝕃), t) end -function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t) +@nospecializeinfer function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t) isa(t, PartialTypeVar) && return true if isa(t, Const) val = t.val @@ -212,7 +199,7 @@ function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t) end return has_nontrivial_extended_info(widenlattice(𝕃), t) end -has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false +@nospecializeinfer has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false """ is_const_prop_profitable_arg(𝕃::AbstractLattice, t) -> Bool @@ -220,9 +207,9 @@ has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false Determines whether the given lattice element `t` of `𝕃` has new extended lattice information that should be forwarded along with constant propagation. """ -is_const_prop_profitable_arg(𝕃::AbstractLattice, @nospecialize t) = +@nospecializeinfer is_const_prop_profitable_arg(𝕃::AbstractLattice, @nospecialize t) = is_const_prop_profitable_arg(widenlattice(𝕃), t) -function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t) +@nospecializeinfer function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t) if isa(t, PartialStruct) return true # might be a bit aggressive, may want to enable some check like follows: # for i = 1:length(t.fields) @@ -236,7 +223,7 @@ function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t) isa(t, PartialOpaque) && return true return is_const_prop_profitable_arg(widenlattice(𝕃), t) end -function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t) +@nospecializeinfer function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t) if isa(t, Const) # don't consider mutable values useful constants val = t.val @@ -245,24 +232,24 @@ function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t) isa(t, PartialTypeVar) && return false # this isn't forwardable return is_const_prop_profitable_arg(widenlattice(𝕃), t) end -is_const_prop_profitable_arg(::JLTypeLattice, @nospecialize t) = false +@nospecializeinfer is_const_prop_profitable_arg(::JLTypeLattice, @nospecialize t) = false -is_forwardable_argtype(𝕃::AbstractLattice, @nospecialize(x)) = +@nospecializeinfer is_forwardable_argtype(𝕃::AbstractLattice, @nospecialize(x)) = is_forwardable_argtype(widenlattice(𝕃), x) -function is_forwardable_argtype(𝕃::ConditionalsLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(𝕃::ConditionalsLattice, @nospecialize x) isa(x, Conditional) && return true return is_forwardable_argtype(widenlattice(𝕃), x) end -function is_forwardable_argtype(𝕃::PartialsLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(𝕃::PartialsLattice, @nospecialize x) isa(x, PartialStruct) && return true isa(x, PartialOpaque) && return true return is_forwardable_argtype(widenlattice(𝕃), x) end -function is_forwardable_argtype(𝕃::ConstsLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(𝕃::ConstsLattice, @nospecialize x) isa(x, Const) && return true return is_forwardable_argtype(widenlattice(𝕃), x) end -function is_forwardable_argtype(::JLTypeLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(::JLTypeLattice, @nospecialize x) return false end @@ -281,9 +268,9 @@ External lattice `𝕃ᵢ::ExternalLattice` may overload: """ function widenreturn end, function widenreturn_noslotwrapper end -is_valid_lattice(𝕃::AbstractLattice, @nospecialize(elem)) = +@nospecializeinfer is_valid_lattice(𝕃::AbstractLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem) && is_valid_lattice(widenlattice(𝕃), elem) -is_valid_lattice(𝕃::JLTypeLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem) +@nospecializeinfer is_valid_lattice(𝕃::JLTypeLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem) has_conditional(𝕃::AbstractLattice) = has_conditional(widenlattice(𝕃)) has_conditional(::AnyConditionalsLattice) = true @@ -306,12 +293,12 @@ has_extended_unionsplit(::JLTypeLattice) = false const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance) const fallback_ipo_lattice = InferenceLattice(IPOResultLattice.instance) -⊑(@nospecialize(a), @nospecialize(b)) = ⊑(fallback_lattice, a, b) -tmeet(@nospecialize(a), @nospecialize(b)) = tmeet(fallback_lattice, a, b) -tmerge(@nospecialize(a), @nospecialize(b)) = tmerge(fallback_lattice, a, b) -⊏(@nospecialize(a), @nospecialize(b)) = ⊏(fallback_lattice, a, b) -⋤(@nospecialize(a), @nospecialize(b)) = ⋤(fallback_lattice, a, b) -is_lattice_equal(@nospecialize(a), @nospecialize(b)) = is_lattice_equal(fallback_lattice, a, b) +@nospecializeinfer @nospecialize(a) ⊑ @nospecialize(b) = ⊑(fallback_lattice, a, b) +@nospecializeinfer @nospecialize(a) ⊏ @nospecialize(b) = ⊏(fallback_lattice, a, b) +@nospecializeinfer @nospecialize(a) ⋤ @nospecialize(b) = ⋤(fallback_lattice, a, b) +@nospecializeinfer tmeet(@nospecialize(a), @nospecialize(b)) = tmeet(fallback_lattice, a, b) +@nospecializeinfer tmerge(@nospecialize(a), @nospecialize(b)) = tmerge(fallback_lattice, a, b) +@nospecializeinfer is_lattice_equal(@nospecialize(a), @nospecialize(b)) = is_lattice_equal(fallback_lattice, a, b) # Widenlattice with argument widenlattice(::JLTypeLattice, @nospecialize(t)) = widenconst(t) diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl index 0a1b852b052f9..04b0791d9a79e 100644 --- a/base/compiler/compiler.jl +++ b/base/compiler/compiler.jl @@ -33,6 +33,7 @@ convert(::Type{T}, x::T) where {T} = x # mostly used by compiler/methodtable.jl, but also by reflection.jl abstract type MethodTableView end +abstract type AbstractInterpreter end # essential files and libraries include("essentials.jl") @@ -99,6 +100,7 @@ add_with_overflow(x::T, y::T) where {T<:SignedInt} = checked_sadd_int(x, y) add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y) add_with_overflow(x::Bool, y::Bool) = (x+y, false) +include("cmem.jl") include("strings/lazy.jl") # core array operations @@ -169,7 +171,7 @@ include("compiler/bootstrap.jl") ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) include("compiler/parsing.jl") -Core.eval(Core, :(_parse = Compiler.fl_parse)) +Core._setparser!(fl_parse) end # baremodule Compiler )) diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl index ec64b7601bc76..7d09769e5b31b 100644 --- a/base/compiler/effects.jl +++ b/base/compiler/effects.jl @@ -131,7 +131,7 @@ const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1 const EFFECTS_TOTAL = Effects(ALWAYS_TRUE, ALWAYS_TRUE, true, true, true, ALWAYS_TRUE, true, true) const EFFECTS_THROWS = Effects(ALWAYS_TRUE, ALWAYS_TRUE, false, true, true, ALWAYS_TRUE, true, true) -const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true, false) # unknown mostly, but it's not overlayed at least (e.g. it's not a call) +const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true, true) # unknown mostly, but it's not overlayed and noinbounds at least (e.g. it's not a call) const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, false, false) # unknown really function Effects(e::Effects = _EFFECTS_UNKNOWN; diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index 97a7ed66ab9b5..c4608dd5781e1 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -148,8 +148,9 @@ function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int) ndata = tpdum.data[idx+1] ndata == 0 && break tpdum.data[idx] = ndata + idx += 1 end - tpdum.data[idx + 1] = 0 + tpdum.data[idx] = 0 end end kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) = diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 71eeb15d53eb0..3a8de06811cc2 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -66,26 +66,6 @@ is_declared_noinline(@nospecialize src::MaybeCompressed) = # OptimizationState # ##################### -struct EdgeTracker - edges::Vector{Any} - valid_worlds::RefValue{WorldRange} - EdgeTracker(edges::Vector{Any}, range::WorldRange) = - new(edges, RefValue{WorldRange}(range)) -end -EdgeTracker() = EdgeTracker(Any[], 0:typemax(UInt)) - -intersect!(et::EdgeTracker, range::WorldRange) = - et.valid_worlds[] = intersect(et.valid_worlds[], range) - -function add_backedge!(et::EdgeTracker, mi::MethodInstance) - push!(et.edges, mi) - return nothing -end -function add_invoke_backedge!(et::EdgeTracker, @nospecialize(invokesig), mi::MethodInstance) - push!(et.edges, invokesig, mi) - return nothing -end - is_source_inferred(@nospecialize src::MaybeCompressed) = ccall(:jl_ir_flag_inferred, Bool, (Any,), src) @@ -125,16 +105,16 @@ function inlining_policy(interp::AbstractInterpreter, end struct InliningState{Interp<:AbstractInterpreter} - et::Union{EdgeTracker,Nothing} + edges::Vector{Any} world::UInt interp::Interp end function InliningState(sv::InferenceState, interp::AbstractInterpreter) - et = EdgeTracker(sv.stmt_edges[1]::Vector{Any}, sv.valid_worlds) - return InliningState(et, sv.world, interp) + edges = sv.stmt_edges[1]::Vector{Any} + return InliningState(edges, sv.world, interp) end function InliningState(interp::AbstractInterpreter) - return InliningState(nothing, get_world_counter(interp), interp) + return InliningState(Any[], get_world_counter(interp), interp) end # get `code_cache(::AbstractInterpreter)` from `state::InliningState` @@ -372,7 +352,9 @@ function argextype( elseif x.head === :copyast return argextype(x.args[1], src, sptypes, slottypes) end - @assert false "argextype only works on argument-position values" + Core.println("argextype called on Expr with head ", x.head, + " which is not valid for IR in argument-position.") + @assert false elseif isa(x, SlotNumber) return slottypes[x.id] elseif isa(x, TypedSlot) @@ -530,7 +512,6 @@ function run_passes( @pass "compact 2" ir = compact!(ir) @pass "SROA" ir = sroa_pass!(ir, sv.inlining) @pass "ADCE" ir = adce_pass!(ir, sv.inlining) - @pass "type lift" ir = type_lift_pass!(ir) @pass "compact 3" ir = compact!(ir) if JLOptions().debug_level == 2 @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable)) diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl index 8bc173add6eaa..2469507fd3699 100644 --- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl +++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl @@ -27,7 +27,7 @@ import ._TOP_MOD: # Base definitions pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall, ismutabletype import Core.Compiler: # Core.Compiler specific definitions - Bottom, OptimizerLattice, InferenceResult, IRCode, IR_FLAG_NOTHROW, + Bottom, InferenceResult, IRCode, IR_FLAG_NOTHROW, SimpleInferenceLattice, isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type, fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑, intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck, @@ -42,7 +42,7 @@ end const AInfo = IdSet{Any} const LivenessSet = BitSet -const 𝕃ₒ = OptimizerLattice() +const 𝕃ₒ = SimpleInferenceLattice.instance """ x::EscapeInfo @@ -707,7 +707,6 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape continue elseif head === :static_parameter || # this exists statically, not interested in its escape head === :copyast || # XXX can this account for some escapes? - head === :undefcheck || # XXX can this account for some escapes? head === :isdefined || # just returns `Bool`, nothing accounts for any escapes head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere head === :gc_preserve_end # `GC.@preserve` expressions themselves won't be used anywhere diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 3c444894dd4b6..170725f231761 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -57,18 +57,17 @@ struct UnionSplit end struct InliningEdgeTracker - et::Union{Nothing,EdgeTracker} + edges::Vector{Any} invokesig::Union{Nothing,Vector{Any}} + InliningEdgeTracker(state::InliningState, invokesig::Union{Nothing,Vector{Any}}=nothing) = + new(state.edges, invokesig) end -InliningEdgeTracker(et::Union{Nothing,EdgeTracker}) = InliningEdgeTracker(et, nothing) -function add_inlining_backedge!((; et, invokesig)::InliningEdgeTracker, mi::MethodInstance) - if et !== nothing - if invokesig === nothing - add_backedge!(et, mi) - else - add_invoke_backedge!(et, invoke_signature(invokesig), mi) - end +function add_inlining_backedge!((; edges, invokesig)::InliningEdgeTracker, mi::MethodInstance) + if invokesig === nothing + push!(edges, mi) + else # invoke backedge + push!(edges, invoke_signature(invokesig), mi) end return nothing end @@ -371,7 +370,7 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod if !validate_sparams(sparam_vals) # N.B. This works on the caller-side argexprs, (i.e. before the va fixup below) sp_ssa = insert_node!( - effect_free(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline))) + effect_free_and_nothrow(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline))) end if def.isva nargs_def = Int(def.nargs::Int32) @@ -427,7 +426,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector inline_compact.result[idx′][:type] = argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact) # Everything legal in value position is guaranteed to be effect free in stmt position - inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE + inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW break end inline_compact[idx′] = stmt′ @@ -489,7 +488,7 @@ function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, I push!(tuple_call.args, arg) push!(tuple_typs, argextype(arg, inline_target)) end - tuple_typ = tuple_tfunc(OptimizerLattice(), tuple_typs) + tuple_typ = tuple_tfunc(SimpleInferenceLattice.instance, tuple_typs) tuple_inst = NewInstruction(tuple_call, tuple_typ, line_idx) push!(newargexprs, insert_node!(tuple_inst)) return newargexprs @@ -703,7 +702,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun for aidx in 1:length(argexprs) aexpr = argexprs[aidx] if isa(aexpr, Expr) || isa(aexpr, GlobalRef) - ninst = effect_free(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line])) + ninst = effect_free_and_nothrow(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line])) argexprs[aidx] = insert_node_here!(compact, ninst) end end @@ -871,9 +870,8 @@ end function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceResult}, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8, state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing) - et = InliningEdgeTracker(state.et, invokesig) + et = InliningEdgeTracker(state, invokesig) - #XXX: update_valid_age!(min_valid[1], max_valid[1], sv) if isa(result, InferenceResult) src = result.src effects = result.ipo_effects @@ -916,7 +914,7 @@ function resolve_todo(mi::MethodInstance, argtypes::Vector{Any}, return nothing end - et = InliningEdgeTracker(state.et, nothing) + et = InliningEdgeTracker(state) cached_result = get_cached_result(state, mi) if cached_result isa ConstantCase @@ -994,16 +992,17 @@ function flags_for_effects(effects::Effects) if is_consistent(effects) flags |= IR_FLAG_CONSISTENT end - if is_removable_if_unused(effects) - flags |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW - elseif is_nothrow(effects) + if is_effect_free(effects) + flags |= IR_FLAG_EFFECT_FREE + end + if is_nothrow(effects) flags |= IR_FLAG_NOTHROW end return flags end function handle_single_case!(todo::Vector{Pair{Int,Any}}, - ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case), params::OptimizationParams, + ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case), isinvoke::Bool = false) if isa(case, ConstantCase) ir[SSAValue(idx)][:inst] = case.val @@ -1191,13 +1190,13 @@ function handle_invoke_call!(todo::Vector{Pair{Int,Any}}, validate_sparams(mi.sparam_vals) || return nothing if Union{} !== argtypes_to_type(argtypes) <: mi.def.sig item = resolve_todo(mi, result.result, argtypes, info, flag, state; invokesig) - handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp), true) + handle_single_case!(todo, ir, idx, stmt, item, true) return nothing end end item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig) end - handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp), true) + handle_single_case!(todo, ir, idx, stmt, item, true) return nothing end @@ -1408,7 +1407,7 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt8, sig: fully_covered &= split_fully_covered end - joint_effects = Effects(joint_effects; nothrow=fully_covered) + fully_covered || (joint_effects = Effects(joint_effects; nothrow=false)) if handled_all_cases && revisit_idx !== nothing # we handled everything except one match with unmatched sparams, @@ -1451,7 +1450,7 @@ function handle_call!(todo::Vector{Pair{Int,Any}}, cases === nothing && return nothing cases, all_covered, joint_effects = cases handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, - all_covered, joint_effects, OptimizationParams(state.interp)) + all_covered, joint_effects) end function handle_match!(cases::Vector{InliningCase}, @@ -1490,7 +1489,7 @@ function semiconcrete_result_item(result::SemiConcreteResult, @nospecialize(info::CallInfo), flag::UInt8, state::InliningState) mi = result.mi if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag) - et = InliningEdgeTracker(state.et, nothing) + et = InliningEdgeTracker(state) return compileable_specialization(mi, result.effects, et, info; compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) else @@ -1524,7 +1523,7 @@ may_inline_concrete_result(result::ConcreteResult) = function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing) if !may_inline_concrete_result(result) - et = InliningEdgeTracker(state.et, invokesig) + et = InliningEdgeTracker(state, invokesig) return compileable_specialization(result.mi, result.effects, et, info; compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) end @@ -1534,12 +1533,12 @@ end function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool, - joint_effects::Effects, params::OptimizationParams) + joint_effects::Effects) # If we only have one case and that case is fully covered, we may either # be able to do the inlining now (for constant cases), or push it directly # onto the todo list if fully_covered && length(cases) == 1 - handle_single_case!(todo, ir, idx, stmt, cases[1].item, params) + handle_single_case!(todo, ir, idx, stmt, cases[1].item) elseif length(cases) > 0 isa(atype, DataType) || return nothing for case in cases @@ -1572,7 +1571,7 @@ function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}}, item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false) end end - handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp)) + handle_single_case!(todo, ir, idx, stmt, item) return nothing end @@ -1584,7 +1583,7 @@ function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::Modif length(info.results) == 1 || return nothing match = info.results[1]::MethodMatch match.fully_covers || return nothing - case = compileable_specialization(match, Effects(), InliningEdgeTracker(state.et), info; + case = compileable_specialization(match, Effects(), InliningEdgeTracker(state), info; compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) case === nothing && return nothing stmt.head = :invoke_modify @@ -1652,7 +1651,7 @@ function inline_const_if_inlineable!(inst::Instruction) inst[:inst] = quoted(rt.val) return true end - inst[:flag] |= IR_FLAG_EFFECT_FREE + inst[:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW return false end @@ -1775,7 +1774,7 @@ function late_inline_special_case!( return SomeCase(quoted(type.val)) end cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3]) - cmp_call_ssa = insert_node!(ir, idx, effect_free(NewInstruction(cmp_call, Bool))) + cmp_call_ssa = insert_node!(ir, idx, effect_free_and_nothrow(NewInstruction(cmp_call, Bool))) not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa) return SomeCase(not_call) elseif length(argtypes) == 3 && istopfunction(f, :(>:)) @@ -1818,13 +1817,13 @@ end function insert_spval!(insert_node!::Inserter, spvals_ssa::SSAValue, spidx::Int, do_isdefined::Bool) ret = insert_node!( - effect_free(NewInstruction(Expr(:call, Core._svec_ref, false, spvals_ssa, spidx), Any))) + effect_free_and_nothrow(NewInstruction(Expr(:call, Core._svec_ref, false, spvals_ssa, spidx), Any))) tcheck_not = nothing if do_isdefined tcheck = insert_node!( - effect_free(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool))) + effect_free_and_nothrow(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool))) tcheck_not = insert_node!( - effect_free(NewInstruction(Expr(:call, not_int, tcheck), Bool))) + effect_free_and_nothrow(NewInstruction(Expr(:call, not_int, tcheck), Bool))) end return (ret, tcheck_not) end @@ -1851,7 +1850,7 @@ function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction, (ret, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, maybe_undef) if maybe_undef insert_node!( - non_effect_free(NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing))) + NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing)) end return ret end diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index c5415add51cc5..5c6751c1e1dda 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -23,14 +23,14 @@ function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) preds = cfg.blocks[to].preds succs = cfg.blocks[from].succs # Assumes that blocks appear at most once in preds and succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + deleteat!(preds, findfirst(x::Int->x==from, preds)::Int) + deleteat!(succs, findfirst(x::Int->x==to, succs)::Int) nothing end function bb_ordering() - lt=(<=) - by=x->first(x.stmts) + lt = (<=) + by = x::BasicBlock -> first(x.stmts) ord(lt, by, nothing, Forward) end @@ -316,8 +316,7 @@ function NewInstruction(inst::Instruction; return NewInstruction(stmt, type, info, line, flag) end @specialize -effect_free(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE)) -non_effect_free(newinst::NewInstruction) = NewInstruction(newinst; flag=sub_flag(newinst, IR_FLAG_EFFECT_FREE)) +effect_free_and_nothrow(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) with_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=add_flag(newinst, flags)) without_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=sub_flag(newinst, flags)) function add_flag(newinst::NewInstruction, newflag::UInt8) @@ -458,7 +457,7 @@ function is_relevant_expr(e::Expr) :new, :splatnew, :(=), :(&), :gc_preserve_begin, :gc_preserve_end, :foreigncall, :isdefined, :copyast, - :undefcheck, :throw_undef_if_not, + :throw_undef_if_not, :cfunction, :method, :pop_exception, :new_opaque_closure) end @@ -594,7 +593,7 @@ function CFGTransformState!(blocks::Vector{BasicBlock}, allow_cfg_transforms::Bo end end # Dead blocks get removed from the predecessor list - filter!(x->x !== -1, preds) + filter!(x::Int->x≠-1, preds) # Rename succs for j = 1:length(succs) succs[j] = bb_rename[succs[j]] @@ -637,7 +636,7 @@ mutable struct IncrementalCompact function IncrementalCompact(code::IRCode, cfg_transform::CFGTransformState) # Sort by position with attach after nodes after regular ones info = code.new_nodes.info - perm = sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i)) + perm = sort!(collect(eachindex(info)); by=i::Int->(2info[i].pos+info[i].attach_after, i)) new_len = length(code.stmts) + length(info) result = InstructionStream(new_len) used_ssas = fill(0, new_len) @@ -656,7 +655,7 @@ mutable struct IncrementalCompact # For inlining function IncrementalCompact(parent::IncrementalCompact, code::IRCode, result_offset) info = code.new_nodes.info - perm = sort!(collect(eachindex(info)); by=i->(info[i].pos, i)) + perm = sort!(collect(eachindex(info)); by=i::Int->(info[i].pos, i)) new_len = length(code.stmts) + length(info) ssa_rename = Any[SSAValue(i) for i = 1:new_len] bb_rename = Vector{Int}() @@ -783,7 +782,7 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV return dominates(domtree, xb, yb) end -function _count_added_node!(compact, @nospecialize(val)) +function _count_added_node!(compact::IncrementalCompact, @nospecialize(val)) if isa(val, SSAValue) compact.used_ssas[val.id] += 1 return false @@ -805,7 +804,7 @@ end function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool) node = add_inst!(compact.pending_nodes, pos, attach_after) - heappush!(compact.pending_perm, length(compact.pending_nodes), By(x -> compact.pending_nodes.info[x].pos)) + heappush!(compact.pending_perm, length(compact.pending_nodes), By(x::Int->compact.pending_nodes.info[x].pos)) return node end @@ -997,7 +996,7 @@ const __check_ssa_counts__ = fill(false) should_check_ssa_counts() = __check_ssa_counts__[] # specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas) -function find_ssavalue_uses1(compact) +function find_ssavalue_uses1(compact::IncrementalCompact) body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst nvals = length(compact.used_ssas) nvalsnew = length(compact.new_new_used_ssas) @@ -1078,11 +1077,19 @@ function getindex(view::TypesView, idx::NewSSAValue) return view.ir[idx][:type] end +# N.B.: Don't make this <: Function to avoid ::Function deopt +struct Refiner + result_flags::Vector{UInt8} + result_idx::Int +end +(this::Refiner)() = (this.result_flags[this.result_idx] |= IR_FLAG_REFINED; nothing) + function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}, processed_idx::Int, result_idx::Int, ssa_rename::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, - do_rename_ssa::Bool) + do_rename_ssa::Bool, + mark_refined!::Union{Refiner, Nothing}) values = Vector{Any}(undef, length(old_values)) for i = 1:length(old_values) isassigned(old_values, i) || continue @@ -1093,7 +1100,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} push!(late_fixup, result_idx) val = OldSSAValue(val.id) else - val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa) + val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!) end else used_ssas[val.id] += 1 @@ -1103,7 +1110,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} push!(late_fixup, result_idx) else # Always renumber these. do_rename_ssa applies only to actual SSAValues - val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true) + val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true, mark_refined!) end elseif isa(val, NewSSAValue) if val.id < 0 @@ -1120,14 +1127,18 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} end function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, - new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool) + new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing}) id = val.id - if id > length(ssanums) - return val - end if do_rename_ssa + if id > length(ssanums) + return val + end val = ssanums[id] end + if isa(val, Refined) + val = val.val + mark_refined! !== nothing && mark_refined!() + end if isa(val, SSAValue) used_ssas[val.id] += 1 end @@ -1135,7 +1146,7 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{In end function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, - new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool) + new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing}) if val.id < 0 new_new_used_ssas[-val.id] += 1 return val @@ -1145,7 +1156,7 @@ function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector end end -function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) +function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing}) urs = userefs(stmt) for op in urs val = op[] @@ -1153,7 +1164,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Ve push!(late_fixup, result_idx) end if isa(val, Union{SSAValue, NewSSAValue}) - val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa) + val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!) end if isa(val, OldSSAValue) || isa(val, NewSSAValue) push!(late_fixup, result_idx) @@ -1166,8 +1177,8 @@ end # Used in inlining before we start compacting - Only works at the CFG level function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int, callback=nothing) preds, succs = bbs[to].preds, bbs[from].succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + deleteat!(preds, findfirst(x::Int->x==from, preds)::Int) + deleteat!(succs, findfirst(x::Int->x==to, succs)::Int) if length(preds) == 0 for succ in copy(bbs[to].succs) kill_edge!(bbs, to, succ, callback) @@ -1190,12 +1201,12 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: (; bb_rename_pred, bb_rename_succ, result_bbs) = compact.cfg_transform preds = result_bbs[bb_rename_succ[to]].preds succs = result_bbs[bb_rename_pred[from]].succs - deleteat!(preds, findfirst(x->x === bb_rename_pred[from], preds)::Int) - deleteat!(succs, findfirst(x->x === bb_rename_succ[to], succs)::Int) + deleteat!(preds, findfirst(x::Int->x==bb_rename_pred[from], preds)::Int) + deleteat!(succs, findfirst(x::Int->x==bb_rename_succ[to], succs)::Int) # Check if the block is now dead if length(preds) == 0 for succ in copy(result_bbs[bb_rename_succ[to]].succs) - kill_edge!(compact, active_bb, to, findfirst(x->x === succ, bb_rename_pred)::Int) + kill_edge!(compact, active_bb, to, findfirst(x::Int->x==succ, bb_rename_pred)::Int) end if to < active_bb # Kill all statements in the block @@ -1222,7 +1233,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: stmt = compact.result[idx][:inst] stmt === nothing && continue isa(stmt, PhiNode) || break - i = findfirst(x-> x == bb_rename_pred[from], stmt.edges) + i = findfirst(x::Int32->x==bb_rename_pred[from], stmt.edges) if i !== nothing deleteat!(stmt.edges, i) deleteat!(stmt.values, i) @@ -1234,7 +1245,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) stmt === nothing && continue isa(stmt, PhiNode) || break - i = findfirst(x-> x == from, stmt.edges) + i = findfirst(x::Int32->x==from, stmt.edges) if i !== nothing deleteat!(stmt.edges, i) deleteat!(stmt.values, i) @@ -1245,11 +1256,16 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: nothing end +struct Refined + val::Any + Refined(@nospecialize(val)) = new(val) +end + function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool) stmt = inst[:inst] (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact (; cfg_transforms_enabled, fold_constant_branches, bb_rename_succ, bb_rename_pred, result_bbs) = compact.cfg_transform - ssa_rename[idx] = SSAValue(result_idx) + mark_refined! = Refiner(result.flag, result_idx) if stmt === nothing ssa_rename[idx] = stmt elseif isa(stmt, OldSSAValue) @@ -1257,6 +1273,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr elseif isa(stmt, GotoNode) && cfg_transforms_enabled label = bb_rename_succ[stmt.label] @assert label > 0 + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = GotoNode(label) result_idx += 1 elseif isa(stmt, GlobalRef) @@ -1265,14 +1282,16 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr if (flag & total_flags) == total_flags ssa_rename[idx] = stmt else + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 end elseif isa(stmt, GotoNode) + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 elseif isa(stmt, GotoIfNot) && cfg_transforms_enabled - stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::GotoIfNot + stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::GotoIfNot result[result_idx][:inst] = stmt cond = stmt.cond if fold_constant_branches @@ -1284,12 +1303,14 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr isa(cond, Bool) || @goto bail end if cond + ssa_rename[idx] = nothing result[result_idx][:inst] = nothing kill_edge!(compact, active_bb, active_bb, stmt.dest) # Don't increment result_idx => Drop this statement else label = bb_rename_succ[stmt.dest] @assert label > 0 + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = GotoNode(label) kill_edge!(compact, active_bb, active_bb, active_bb+1) result_idx += 1 @@ -1298,11 +1319,12 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr @label bail label = bb_rename_succ[stmt.dest] @assert label > 0 + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = GotoIfNot(cond, label) result_idx += 1 end elseif isa(stmt, Expr) - stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::Expr + stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::Expr if cfg_transforms_enabled && isexpr(stmt, :enter) label = bb_rename_succ[stmt.args[1]::Int] @assert label > 0 @@ -1312,16 +1334,18 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr if isa(cond, Bool) && cond === true # cond was folded to true - this statement # is dead. + ssa_rename[idx] = nothing return result_idx end end + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 elseif isa(stmt, PiNode) # As an optimization, we eliminate any trivial pinodes. For performance, we use === # type equality. We may want to consider using == in either a separate pass or if # performance turns out ok - stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::PiNode + stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::PiNode pi_val = stmt.val if isa(pi_val, SSAValue) if stmt.typ === result[pi_val.id][:type] @@ -1341,16 +1365,21 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr return result_idx end end + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot) - result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa) + ssa_rename[idx] = SSAValue(result_idx) + result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!) result_idx += 1 elseif isa(stmt, PhiNode) + # N.B.: For PhiNodes, this needs to be at the top, since PhiNodes + # can self-reference. + ssa_rename[idx] = SSAValue(result_idx) if cfg_transforms_enabled # Rename phi node edges let bb_rename_pred=bb_rename_pred - map!(i::Int32 -> bb_rename_pred[i], stmt.edges, stmt.edges) + map!(i::Int32->bb_rename_pred[i], stmt.edges, stmt.edges) end # Remove edges and values associated with dead blocks. Entries in @@ -1364,6 +1393,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr # not a value we can copy), we copy only the edges and (defined) # values we want to keep to new arrays initialized with undefined # elements. + edges = Vector{Int32}(undef, length(stmt.edges)) values = Vector{Any}(undef, length(stmt.values)) new_index = 1 @@ -1383,7 +1413,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr values = stmt.values end - values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa) + values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!) # Don't remove the phi node if it is before the definition of its value # because doing so can create forward references. This should only # happen with dead loops, but can cause problems when optimization @@ -1407,24 +1437,31 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr result_idx += 1 end elseif isa(stmt, PhiCNode) - result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa)) + ssa_rename[idx] = SSAValue(result_idx) + result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)) result_idx += 1 - elseif isa(stmt, SSAValue) - # identity assign, replace uses of this ssa value with its result - if do_rename_ssa - stmt = ssa_rename[stmt.id] - end - ssa_rename[idx] = stmt - elseif isa(stmt, NewSSAValue) - ssa_rename[idx] = SSAValue(stmt.id) else - # Constant assign, replace uses of this ssa value with its result + if isa(stmt, SSAValue) + # identity assign, replace uses of this ssa value with its result + if do_rename_ssa + stmt = ssa_rename[stmt.id] + end + elseif isa(stmt, NewSSAValue) + stmt = SSAValue(stmt.id) + else + # Constant assign, replace uses of this ssa value with its result + end + if (inst[:flag] & IR_FLAG_REFINED) != 0 && !isa(stmt, Refined) + # If we're compacting away an instruction that was marked as refined, + # leave a marker in the ssa_rename, so we can taint any users. + stmt = Refined(stmt) + end ssa_rename[idx] = stmt end return result_idx end -function resize!(compact::IncrementalCompact, nnewnodes) +function resize!(compact::IncrementalCompact, nnewnodes::Int) old_length = length(compact.result) resize!(compact.result, nnewnodes) resize!(compact.used_ssas, nnewnodes) @@ -1434,7 +1471,8 @@ function resize!(compact::IncrementalCompact, nnewnodes) return compact end -function finish_current_bb!(compact::IncrementalCompact, active_bb, old_result_idx=compact.result_idx, unreachable=false) +function finish_current_bb!(compact::IncrementalCompact, active_bb::Int, + old_result_idx::Int=compact.result_idx, unreachable::Bool=false) (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform if compact.active_result_bb > length(result_bbs) #@assert compact.bb_rename[active_bb] == -1 @@ -1573,7 +1611,7 @@ function iterate_compact(compact::IncrementalCompact) if !(info.attach_after ? info.pos <= compact.idx - 1 : info.pos <= compact.idx) break end - heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos)) + heappop!(compact.pending_perm, By(x::Int -> compact.pending_nodes.info[x].pos)) end # Move to next block compact.idx += 1 @@ -1600,7 +1638,7 @@ function iterate_compact(compact::IncrementalCompact) elseif !isempty(compact.pending_perm) && (info = compact.pending_nodes.info[compact.pending_perm[1]]; info.attach_after ? info.pos == idx - 1 : info.pos == idx) - new_idx = heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos)) + new_idx = heappop!(compact.pending_perm, By(x::Int -> compact.pending_nodes.info[x].pos)) new_node_entry = compact.pending_nodes.stmts[new_idx] new_node_info = compact.pending_nodes.info[new_idx] new_idx += length(compact.ir.stmts) + length(compact.ir.new_nodes) @@ -1638,7 +1676,7 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id stmt = inst[:inst] stmt === nothing && return false inst[:type] === Bottom && return false - effect_free = (inst[:flag] & IR_FLAG_EFFECT_FREE) ≠ 0 + effect_free = (inst[:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW effect_free || return false foreachssa(stmt) do val::SSAValue if compact.used_ssas[val.id] == 1 @@ -1688,6 +1726,9 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt), reify_new_ end elseif isa(stmt, OldSSAValue) val = compact.ssa_rename[stmt.id] + if isa(val, Refined) + val = val.val + end if isa(val, SSAValue) compact.used_ssas[val.id] += 1 end diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index 8d75ad3948ee2..f4c04ea4e1380 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -43,14 +43,6 @@ function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int return abstract_eval_phi(interp, phi, nothing, irsv) end -function propagate_control_effects!(interp::AbstractInterpreter, idx::Int, stmt::GotoIfNot, - irsv::IRInterpretationState, extra_reprocess::Union{Nothing,BitSet,BitSetBoundedMinPrioritySet}) - # Nothing to do for most abstract interpreters, but if the abstract - # interpreter has control-dependent lattice effects, it can override - # this method. - return false -end - function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState) si = StmtInfo(true) # TODO better job here? (; rt, effects, info) = abstract_call(interp, arginfo, si, irsv) @@ -102,8 +94,7 @@ function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb:: end function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union{Int,Nothing}, - @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState, - extra_reprocess::Union{Nothing,BitSet,BitSetBoundedMinPrioritySet}) + @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState) ir = irsv.ir if isa(inst, GotoIfNot) cond = inst.cond @@ -126,12 +117,12 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union end return true end - return propagate_control_effects!(interp, idx, inst, irsv, extra_reprocess) + return false end rt = nothing if isa(inst, Expr) head = inst.head - if head === :call || head === :foreigncall || head === :new || head === :splatnew + if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, irsv) ir.stmts[idx][:flag] |= flags_for_effects(effects) elseif head === :invoke @@ -139,8 +130,16 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union if nothrow ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW end - elseif head === :throw_undef_if_not || # TODO: Terminate interpretation early if known false? - head === :gc_preserve_begin || + elseif head === :throw_undef_if_not + condval = maybe_extract_const_bool(argextype(inst.args[2], ir)) + condval isa Bool || return false + if condval + ir.stmts[idx][:inst] = nothing + # We simplified the IR, but we did not update the type + return false + end + rt = Union{} + elseif head === :gc_preserve_begin || head === :gc_preserve_end return false else @@ -158,12 +157,12 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union elseif isa(inst, GlobalRef) # GlobalRef is not refinable else - error("reprocess_instruction!: unhandled instruction found") + rt = argextype(inst, irsv.ir) end if rt !== nothing if isa(rt, Const) ir.stmts[idx][:type] = rt - if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & IR_FLAG_EFFECT_FREE) != 0 + if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW ir.stmts[idx][:inst] = quoted(rt.val) end return true @@ -204,9 +203,8 @@ function process_terminator!(ir::IRCode, @nospecialize(inst), idx::Int, bb::Int, end end -default_reprocess(::AbstractInterpreter, ::IRInterpretationState) = nothing function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState; - extra_reprocess::Union{Nothing,BitSet} = default_reprocess(interp, irsv)) + externally_refined::Union{Nothing,BitSet} = nothing) interp = switch_to_irinterp(interp) (; ir, tpdum, ssa_refined) = irsv @@ -227,12 +225,11 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR irsv.curridx = idx inst = ir.stmts[idx][:inst] typ = ir.stmts[idx][:type] + flag = ir.stmts[idx][:flag] any_refined = false - if extra_reprocess !== nothing - if idx in extra_reprocess - pop!(extra_reprocess, idx) - any_refined = true - end + if (flag & IR_FLAG_REFINED) != 0 + any_refined = true + ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED end for ur in userefs(inst) val = ur[] @@ -251,8 +248,9 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR if typ === Bottom && (idx != lstmt || !is_terminator_or_phi) continue end - if any_refined && reprocess_instruction!(interp, - idx, bb, inst, typ, irsv, extra_reprocess) + if (any_refined && reprocess_instruction!(interp, + idx, bb, inst, typ, irsv)) || + (externally_refined !== nothing && idx in externally_refined) push!(ssa_refined, idx) inst = ir.stmts[idx][:inst] typ = ir.stmts[idx][:type] @@ -277,9 +275,6 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR # Slow path begin @label residual_scan stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts)) - if extra_reprocess !== nothing - append!(stmt_ip, extra_reprocess) - end # Slow Path Phase 1.A: Complete use scanning while !isempty(bb_ip) @@ -289,6 +284,11 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR for idx = stmts irsv.curridx = idx inst = ir.stmts[idx][:inst] + flag = ir.stmts[idx][:flag] + if (flag & IR_FLAG_REFINED) != 0 + ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED + push!(stmt_ip, idx) + end for ur in userefs(inst) val = ur[] if isa(val, Argument) @@ -335,7 +335,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR inst = ir.stmts[idx][:inst] typ = ir.stmts[idx][:type] if reprocess_instruction!(interp, - idx, nothing, inst, typ, irsv, stmt_ip) + idx, nothing, inst, typ, irsv) append!(stmt_ip, tpdum[idx]) end end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 4bfb5f3fcde56..9a312bec8f202 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -176,11 +176,12 @@ function find_def_for_use( return def, useblock, curblock end -function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice) +function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice, + predecessors = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing)) if isa(val, Union{OldSSAValue, SSAValue}) val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint) end - return walk_to_defs(compact, val, typeconstraint, 𝕃ₒ) + return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ) end function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), @@ -189,6 +190,9 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA if isa(defssa, OldSSAValue) if already_inserted(compact, defssa) rename = compact.ssa_rename[defssa.id] + if isa(rename, Refined) + rename = rename.val + end if isa(rename, AnySSAValue) defssa = rename continue @@ -235,16 +239,21 @@ function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defss end """ - walk_to_defs(compact, val, typeconstraint) + walk_to_defs(compact, val, typeconstraint, predecessors) Starting at `val` walk use-def chains to get all the leaves feeding into this `val` -(pruning those leaves rules out by path conditions). +(pruning those leaves ruled out by path conditions). + +`predecessors(def, compact)` is a callback which should return the set of possible +predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwise. """ -function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice) - visited_phinodes = AnySSAValue[] - isa(defssa, AnySSAValue) || return Any[defssa], visited_phinodes +function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice) + visited_philikes = AnySSAValue[] + isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes def = compact[defssa][:inst] - isa(def, PhiNode) || return Any[defssa], visited_phinodes + if predecessors(def, compact) === nothing + return Any[defssa], visited_philikes + end visited_constraints = IdDict{AnySSAValue, Any}() worklist_defs = AnySSAValue[] worklist_constraints = Any[] @@ -256,12 +265,14 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe typeconstraint = pop!(worklist_constraints) visited_constraints[defssa] = typeconstraint def = compact[defssa][:inst] - if isa(def, PhiNode) - push!(visited_phinodes, defssa) + values = predecessors(def, compact) + if values !== nothing + push!(visited_philikes, defssa) possible_predecessors = Int[] - for n in 1:length(def.edges) - isassigned(def.values, n) || continue - val = def.values[n] + + for n in 1:length(values) + isassigned(values, n) || continue + val = values[n] if is_old(compact, defssa) && isa(val, SSAValue) val = OldSSAValue(val.id) end @@ -270,8 +281,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe push!(possible_predecessors, n) end for n in possible_predecessors - pred = def.edges[n] - val = def.values[n] + val = values[n] if is_old(compact, defssa) && isa(val, SSAValue) val = OldSSAValue(val.id) end @@ -306,7 +316,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe push!(leaves, defssa) end end - return leaves, visited_phinodes + return leaves, visited_philikes end function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compact::IncrementalCompact) @@ -353,10 +363,11 @@ struct LiftedValue LiftedValue(@nospecialize val) = new(val) end const LiftedLeaves = IdDict{Any, Union{Nothing,LiftedValue}} +const LiftedDefs = IdDict{Any, Bool} # try to compute lifted values that can replace `getfield(x, field)` call # where `x` is an immutable struct that are defined at any of `leaves` -function lift_leaves(compact::IncrementalCompact, @nospecialize(result_t), field::Int, +function lift_leaves(compact::IncrementalCompact, field::Int, leaves::Vector{Any}, 𝕃ₒ::AbstractLattice) # For every leaf, the lifted value lifted_leaves = LiftedLeaves() @@ -386,15 +397,6 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(result_t), field continue end return nothing - # Expand the Expr(:new) to include it's element Expr(:new) nodes up until the one we want - compact[leaf] = nothing - for i = (length(def.args) + 1):(1+field) - ftyp = fieldtype(typ, i - 1) - isbitstype(ftyp) || return nothing - ninst = effect_free(NewInstruction(Expr(:new, ftyp), result_t)) - push!(def.args, insert_node!(compact, leaf, ninst)) - end - compact[leaf] = def end lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves) continue @@ -464,10 +466,13 @@ function lift_arg!( lifted = OldSSAValue(lifted.id) if already_inserted(compact, lifted) lifted = compact.ssa_rename[lifted.id] + if isa(lifted, Refined) + lifted = lifted.val + end end end if isa(lifted, GlobalRef) || isa(lifted, Expr) - lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, argextype(lifted, compact)))) + lifted = insert_node!(compact, leaf, effect_free_and_nothrow(NewInstruction(lifted, argextype(lifted, compact)))) compact[leaf] = nothing stmt.args[argidx] = lifted compact[leaf] = stmt @@ -482,6 +487,9 @@ end function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf)) if isa(leaf, OldSSAValue) && already_inserted(compact, leaf) leaf = compact.ssa_rename[leaf.id] + if isa(leaf, Refined) + leaf = leaf.val + end if isa(leaf, AnySSAValue) leaf = simple_walk(compact, leaf) end @@ -498,8 +506,6 @@ function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf)) return Pair{Any, Any}(def, leaf) end -make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ) - """ lift_comparison!(cmp, compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice) @@ -566,7 +572,13 @@ function lift_comparison_leaves!(@specialize(tfunc), val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint) end isa(typeconstraint, Union) || return # bail out if there won't be a good chance for lifting - leaves, visited_phinodes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ) + + predecessors = function (@nospecialize(def), compact::IncrementalCompact) + isa(def, PhiNode) && return def.values + is_known_call(def, Core.ifelse, compact) && return def.args[3:4] + return nothing + end + leaves, visited_philikes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ, predecessors) length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves # check if we can evaluate the comparison for each one of the leaves @@ -586,18 +598,54 @@ function lift_comparison_leaves!(@specialize(tfunc), # perform lifting lifted_val = perform_lifting!(compact, - visited_phinodes, cmp, lifting_cache, Bool, + visited_philikes, cmp, lifting_cache, Bool, lifted_leaves::LiftedLeaves, val, nothing)::LiftedValue compact[idx] = lifted_val.val end -struct LiftedPhi +struct IfElseCall + call::Expr +end + +# An intermediate data structure used for lifting expressions through a +# "phi-like" instruction (either a PhiNode or a call to Core.ifelse) +struct LiftedPhilike ssa::AnySSAValue - node::PhiNode + node::Union{PhiNode,IfElseCall} need_argupdate::Bool end +struct SkipToken end; const SKIP_TOKEN = SkipToken() + +function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value), + lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int}) + val = old_value + if is_old(compact, old_node_ssa) && isa(val, SSAValue) + val = OldSSAValue(val.id) + end + if isa(val, AnySSAValue) + val = simple_walk(compact, val) + end + if val in keys(lifted_leaves) + lifted_val = lifted_leaves[val] + if isa(lifted_leaves, LiftedDefs) + return lifted_val + end + lifted_val === nothing && return UNDEF_TOKEN + val = lifted_val.val + if isa(val, AnySSAValue) + callback = (@nospecialize(pi), @nospecialize(idx)) -> true + val = simple_walk(compact, val, callback) + end + return val + elseif isa(val, AnySSAValue) && val in keys(reverse_mapping) + return lifted_philikes[reverse_mapping[val]].ssa + else + return SKIP_TOKEN # Probably ignored by path condition, skip this + end +end + function is_old(compact, @nospecialize(old_node_ssa)) isa(old_node_ssa, OldSSAValue) && !is_pending(compact, old_node_ssa) && @@ -605,13 +653,13 @@ function is_old(compact, @nospecialize(old_node_ssa)) end function perform_lifting!(compact::IncrementalCompact, - visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key), + visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key), lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, - @nospecialize(result_t), lifted_leaves::LiftedLeaves, @nospecialize(stmt_val), + @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val), lazydomtree::Union{LazyDomtree,Nothing}) reverse_mapping = IdDict{AnySSAValue, Int}() - for id in 1:length(visited_phinodes) - reverse_mapping[visited_phinodes[id]] = id + for id in 1:length(visited_philikes) + reverse_mapping[visited_philikes[id]] = id end # Check if all the lifted leaves are the same @@ -636,7 +684,7 @@ function perform_lifting!(compact::IncrementalCompact, dominates_all = true if lazydomtree !== nothing domtree = get!(lazydomtree) - for item in visited_phinodes + for item in visited_philikes if !dominates_ssa(compact, domtree, the_leaf_val, item) dominates_all = false break @@ -649,64 +697,101 @@ function perform_lifting!(compact::IncrementalCompact, end # Insert PhiNodes - nphis = length(visited_phinodes) - lifted_phis = Vector{LiftedPhi}(undef, nphis) - for i = 1:nphis - item = visited_phinodes[i] + nphilikes = length(visited_philikes) + lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes) + for i = 1:nphilikes + old_ssa = visited_philikes[i] + old_inst = compact[old_ssa] + old_node = old_inst[:inst]::Union{PhiNode,Expr} # FIXME this cache is broken somehow - # ckey = Pair{AnySSAValue, Any}(item, cache_key) + # ckey = Pair{AnySSAValue, Any}(old_ssa, cache_key) # cached = ckey in keys(lifting_cache) cached = false if cached ssa = lifting_cache[ckey] - lifted_phis[i] = LiftedPhi(ssa, compact[ssa][:inst]::PhiNode, false) + if isa(old_node, PhiNode) + lifted_philikes[i] = LiftedPhilike(ssa, old_node, false) + else + lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(old_node), false) + end continue end - n = PhiNode() - ssa = insert_node!(compact, item, effect_free(NewInstruction(n, result_t))) + if isa(old_node, PhiNode) + new_node = PhiNode() + ssa = insert_node!(compact, old_ssa, effect_free_and_nothrow(NewInstruction(new_node, result_t))) + lifted_philikes[i] = LiftedPhilike(ssa, new_node, true) + else + @assert is_known_call(old_node, Core.ifelse, compact) + ifelse_func, condition = old_node.args + if is_old(compact, old_ssa) && isa(condition, SSAValue) + condition = OldSSAValue(condition.id) + end + + new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below + new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag]) + + ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true) + lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true) + end # lifting_cache[ckey] = ssa - lifted_phis[i] = LiftedPhi(ssa, n, true) end # Fix up arguments - for i = 1:nphis - (old_node_ssa, lf) = visited_phinodes[i], lifted_phis[i] - old_node = compact[old_node_ssa][:inst]::PhiNode - new_node = lf.node - should_count = !isa(lf.ssa, OldSSAValue) || already_inserted(compact, lf.ssa) + for i = 1:nphilikes + (old_node_ssa, lf) = visited_philikes[i], lifted_philikes[i] lf.need_argupdate || continue - for i = 1:length(old_node.edges) - edge = old_node.edges[i] - isassigned(old_node.values, i) || continue - val = old_node.values[i] - if is_old(compact, old_node_ssa) && isa(val, SSAValue) - val = OldSSAValue(val.id) - end - if isa(val, AnySSAValue) - val = simple_walk(compact, val) - end - if val in keys(lifted_leaves) - push!(new_node.edges, edge) - lifted_val = lifted_leaves[val] - if lifted_val === nothing + should_count = !isa(lf.ssa, OldSSAValue) || already_inserted(compact, lf.ssa) + + lfnode = lf.node + if isa(lfnode, PhiNode) + old_node = compact[old_node_ssa][:inst]::PhiNode + new_node = lfnode + for i = 1:length(old_node.values) + isassigned(old_node.values, i) || continue + val = lifted_value(compact, old_node_ssa, old_node.values[i], + lifted_philikes, lifted_leaves, reverse_mapping) + val !== SKIP_TOKEN && push!(new_node.edges, old_node.edges[i]) + if val === UNDEF_TOKEN resize!(new_node.values, length(new_node.values)+1) - continue - end - val = lifted_val.val - if isa(val, AnySSAValue) - callback = (@nospecialize(pi), @nospecialize(idx)) -> true - val = simple_walk(compact, val, callback) + elseif val !== SKIP_TOKEN + should_count && _count_added_node!(compact, val) + push!(new_node.values, val) end - should_count && _count_added_node!(compact, val) - push!(new_node.values, val) - elseif isa(val, AnySSAValue) && val in keys(reverse_mapping) - push!(new_node.edges, edge) - newval = lifted_phis[reverse_mapping[val]].ssa - should_count && _count_added_node!(compact, newval) - push!(new_node.values, newval) - else - # Probably ignored by path condition, skip this end + elseif isa(lfnode, IfElseCall) + old_node = compact[old_node_ssa][:inst]::Expr + then_result, else_result = old_node.args[3], old_node.args[4] + + then_result = lifted_value(compact, old_node_ssa, then_result, + lifted_philikes, lifted_leaves, reverse_mapping) + else_result = lifted_value(compact, old_node_ssa, else_result, + lifted_philikes, lifted_leaves, reverse_mapping) + + # In cases where the Core.ifelse condition is statically-known, e.g., thanks + # to a PiNode from a guarding conditional, replace with the remaining branch. + if then_result === SKIP_TOKEN || else_result === SKIP_TOKEN + only_result = (then_result === SKIP_TOKEN) ? else_result : then_result + + # Replace Core.ifelse(%cond, %a, %b) with %a + compact[lf.ssa][:inst] = only_result + should_count && _count_added_node!(compact, only_result) + + # Note: Core.ifelse(%cond, %a, %b) has observable effects (!nothrow), but since + # we have not deleted the preceding statement that this was derived from, this + # replacement is safe, i.e. it will not affect the effects observed. + continue + end + + @assert then_result !== SKIP_TOKEN && then_result !== UNDEF_TOKEN + @assert else_result !== SKIP_TOKEN && else_result !== UNDEF_TOKEN + + if should_count + _count_added_node!(compact, then_result) + _count_added_node!(compact, else_result) + end + + push!(lfnode.call.args, then_result) + push!(lfnode.call.args, else_result) end end @@ -718,7 +803,7 @@ function perform_lifting!(compact::IncrementalCompact, if stmt_val in keys(lifted_leaves) return lifted_leaves[stmt_val] elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping) - return LiftedValue(lifted_phis[reverse_mapping[stmt_val]].ssa) + return LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa) end return stmt_val # N.B. should never happen @@ -773,11 +858,11 @@ end length(sig.parameters) >= 1 || return nothing i = let sig=sig - findfirst(j->has_typevar(sig.parameters[j], tvar), 1:length(sig.parameters)) + findfirst(j::Int->has_typevar(sig.parameters[j], tvar), 1:length(sig.parameters)) end i === nothing && return nothing let sig=sig - any(j->has_typevar(sig.parameters[j], tvar), i+1:length(sig.parameters)) + any(j::Int->has_typevar(sig.parameters[j], tvar), i+1:length(sig.parameters)) end && return nothing arg = sig.parameters[i] @@ -825,6 +910,14 @@ end # which can be very large sometimes, and program counters in question are often very sparse const SPCSet = IdSet{Int} +struct IntermediaryCollector + intermediaries::SPCSet +end +function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa)) + push!(this.intermediaries, ssa.id) + return false +end + """ sroa_pass!(ir::IRCode) -> newir::IRCode @@ -844,10 +937,11 @@ In a case when all usages are fully eliminated, `struct` allocation may also be a result of succeeding dead code elimination. """ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) - 𝕃ₒ = inlining === nothing ? OptimizerLattice() : optimizer_lattice(inlining.interp) + 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() + def_lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() # initialization of domtree is delayed to avoid the expensive computation in many cases lazydomtree = LazyDomtree(ir) for ((_, idx), stmt) in compact @@ -951,7 +1045,6 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) # analyze `getfield` / `isdefined` / `setfield!` call val = stmt.args[2] end - struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) if isa(struct_typ, Union) && struct_typ <: Tuple struct_typ = unswitchtupleunion(struct_typ) @@ -968,14 +1061,12 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) continue end + # analyze this mutable struct here for the later pass if ismutabletype(struct_typ) isa(val, SSAValue) || continue let intermediaries = SPCSet() - callback = function (@nospecialize(pi), @nospecialize(ssa)) - push!(intermediaries, ssa.id) - return false - end + callback = IntermediaryCollector(intermediaries) def = simple_walk(compact, val, callback) # Mutable stuff here isa(def, SSAValue) || continue @@ -1006,32 +1097,48 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) field = try_compute_fieldidx_stmt(compact, stmt, struct_typ) field === nothing && continue - leaves, visited_phinodes = collect_leaves(compact, val, struct_typ, 𝕃ₒ) + leaves, visited_philikes = collect_leaves(compact, val, struct_typ, 𝕃ₒ) isempty(leaves) && continue - result_t = argextype(SSAValue(idx), compact) - lifted_result = lift_leaves(compact, result_t, field, leaves, 𝕃ₒ) + lifted_result = lift_leaves(compact, field, leaves, 𝕃ₒ) lifted_result === nothing && continue lifted_leaves, any_undef = lifted_result - if any_undef - result_t = make_MaybeUndef(result_t) + result_t = Union{} + for v in values(lifted_leaves) + v === nothing && continue + result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact)) end - val = perform_lifting!(compact, - visited_phinodes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree) + lifted_val = perform_lifting!(compact, + visited_philikes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree) # Insert the undef check if necessary - if any_undef && val === nothing - insert_node!(compact, SSAValue(idx), non_effect_free(NewInstruction( - Expr(:throw_undef_if_not, Symbol("##getfield##"), false), Nothing))) + if any_undef + if lifted_val === nothing + def_val = false + else + lifted_leaves_def = LiftedDefs() + for (k, v) in pairs(lifted_leaves) + lifted_leaves_def[k] = v === nothing ? false : true + end + def_val = perform_lifting!(compact, + visited_philikes, field, def_lifting_cache, Bool, lifted_leaves_def, val, lazydomtree).val + end + insert_node!(compact, SSAValue(idx), NewInstruction( + Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing)) + else # val must be defined - @assert val !== nothing + @assert lifted_val !== nothing end - compact[idx] = val === nothing ? nothing : val.val - compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED + compact[idx] = lifted_val === nothing ? nothing : lifted_val.val + if lifted_val !== nothing + if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t) + compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED + end + end end non_dce_finish!(compact) @@ -1060,7 +1167,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, @nospecialize(info::CallInfo), inlining::InliningState, attach_after::Bool) code = get(code_cache(inlining), mi, nothing) - et = InliningEdgeTracker(inlining.et) + et = InliningEdgeTracker(inlining) if code isa CodeInstance if use_const_api(code) # No code in the function - Nothing to do @@ -1244,6 +1351,7 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse end function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) + 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) lazypostdomtree = LazyPostDomtree(ir) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) @@ -1399,11 +1507,14 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse end for b in phiblocks n = ir[phinodes[b]][:inst]::PhiNode + result_t = Bottom for p in ir.cfg.blocks[b].preds push!(n.edges, p) - push!(n.values, compute_value_for_block(ir, domtree, - allblocks, du, phinodes, fidx, p)) + v = compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, p) + push!(n.values, v) + result_t = tmerge(𝕃ₒ, result_t, argextype(v, ir)) end + ir[phinodes[b]][:type] = result_t end end all_eliminated || continue @@ -1414,8 +1525,16 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse idx == newidx && continue # this is allocation # verify this statement won't throw, otherwise it can't be eliminated safely ssa = SSAValue(idx) - is_nothrow(ir, ssa) || continue - ir[ssa][:inst] = nothing + if is_nothrow(ir, ssa) + ir[ssa][:inst] = nothing + else + # We can't eliminate this statement, because it might still + # throw an error, but we can mark it as effect-free since we + # know we have removed all uses of the mutable allocation. + # As a result, if we ever do prove nothrow, we can delete + # this statement then. + ir[ssa][:flag] |= IR_FLAG_EFFECT_FREE + end end end preserve_uses === nothing && continue @@ -1504,7 +1623,6 @@ function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::I end function is_some_union(@nospecialize(t)) - isa(t, MaybeUndef) && (t = t.typ) return isa(t, Union) end @@ -1558,7 +1676,7 @@ the `typeassert` elimination depends on the transformation by `canonicalize_type within `sroa_pass!` which redirects references of `typeassert`ed value to the corresponding `PiNode`. """ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) - 𝕃ₒ = inlining === nothing ? OptimizerLattice() : optimizer_lattice(inlining.interp) + 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes)) all_phis = Int[] unionphis = Pair{Int,Any}[] # sorted @@ -1683,153 +1801,6 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) return complete(compact) end -function type_lift_pass!(ir::IRCode) - lifted_undef = IdDict{Int, Any}() - insts = ir.stmts - for idx in 1:length(insts) - stmt = insts[idx][:inst] - stmt isa Expr || continue - if (stmt.head === :isdefined || stmt.head === :undefcheck) - # after optimization, undef can only show up by being introduced in - # a phi node (or an UpsilonNode() argument to a PhiC node), so lift - # all these nodes that have maybe undef values - val = stmt.args[(stmt.head === :isdefined) ? 1 : 2] - if stmt.head === :isdefined && (val isa GlobalRef || isexpr(val, :static_parameter) || - val isa Argument || val isa Symbol) - # this is a legal node, so assume it was not introduced by - # slot2ssa (at worst, we might leave in a runtime check that - # shouldn't have been there) - continue - end - # otherwise, we definitely have a corrupt node from slot2ssa, and - # must fix or delete that now - processed = IdDict{Int, Union{SSAValue, Bool}}() - def = val - while true - # peek through PiNodes - isa(val, SSAValue) || break - def = insts[val.id][:inst] - isa(def, PiNode) || break - val = def.val - end - if !isa(val, SSAValue) || (!isa(def, PhiNode) && !isa(def, PhiCNode)) - # in most cases, reaching this statement implies we had a value - if stmt.head === :undefcheck - insts[idx][:inst] = nothing - else - insts[idx][:inst] = true - end - continue - end - stmt_id = val.id - worklist = Tuple{Int, Int, SSAValue, Int}[(stmt_id, 0, SSAValue(0), 0)] - if !haskey(lifted_undef, stmt_id) - first = true - while !isempty(worklist) - item, w_up_id, which, use = pop!(worklist) - def = insts[item][:inst] - if isa(def, PhiNode) - edges = copy(def.edges) - values = Vector{Any}(undef, length(edges)) - new_phi = if length(values) == 0 - false - else - insert_node!(ir, item, NewInstruction(PhiNode(edges, values), Bool)) - end - else - def = def::PhiCNode - values = Vector{Any}(undef, length(def.values)) - new_phi = if length(values) == 0 - false - else - insert_node!(ir, item, NewInstruction(PhiCNode(values), Bool)) - end - end - processed[item] = new_phi - if first - lifted_undef[stmt_id] = new_phi - first = false - end - local id::Int = 0 - all_same = true - local last_val - for i = 1:length(values) - if !isassigned(def.values, i) - val = false - elseif !isa(def.values[i], SSAValue) - val = true - else - up_id = id = (def.values[i]::SSAValue).id - @label restart - if !isa(ir.stmts[id][:type], MaybeUndef) - val = true - else - node = insts[id][:inst] - if isa(node, UpsilonNode) - if !isdefined(node, :val) - val = false - elseif !isa(node.val, SSAValue) - val = true - else - id = (node.val::SSAValue).id - @goto restart - end - else - while isa(node, PiNode) - id = (node.val::SSAValue).id - node = insts[id][:inst] - end - if isa(node, Union{PhiNode, PhiCNode}) - if haskey(processed, id) - val = processed[id] - else - # TODO: Re-check after convergence whether all the values are the same - all_same = false - push!(worklist, (id, up_id, new_phi::SSAValue, i)) - continue - end - else - val = true - end - end - end - end - if isa(def, PhiNode) - if !@isdefined(last_val) - last_val = val - elseif all_same - all_same &= last_val === val - end - values[i] = val - else - values[i] = insert_node!(ir, up_id, NewInstruction(UpsilonNode(val), Bool)) - end - end - if all_same && @isdefined(last_val) - # Decay the PhiNode back to the single value - ir[new_phi][:inst] = last_val - isa(last_val, Bool) && (processed[item] = last_val) - end - if which !== SSAValue(0) - phi = ir[which][:inst] - if isa(phi, PhiNode) - phi.values[use] = new_phi - elseif isa(phi, PhiCNode) - phi.values[use] = insert_node!(ir, w_up_id, NewInstruction(UpsilonNode(new_phi), Bool)) - end - end - end - end - inst = lifted_undef[stmt_id] - if stmt.head === :undefcheck - inst = Expr(:throw_undef_if_not, stmt.args[1], inst) - end - insts[idx][:inst] = inst - end - end - ir -end - function is_bb_empty(ir::IRCode, bb::BasicBlock) isempty(bb.stmts) && return true if length(bb.stmts) == 1 @@ -2114,15 +2085,15 @@ function cfg_simplify!(ir::IRCode) bb_rename_succ = bb_rename_succ # Compute (renamed) successors and predecessors given (renamed) block - function compute_succs(i) + function compute_succs(i::Int) orig_bb = follow_merged_succ(result_bbs[i]) return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs] end - function compute_preds(i) + function compute_preds(i::Int) orig_bb = result_bbs[i] preds = bbs[orig_bb].preds res = Int[] - function scan_preds!(preds) + function scan_preds!(preds::Vector{Int}) for pred in preds if pred == 0 push!(res, 0) @@ -2155,7 +2126,7 @@ function cfg_simplify!(ir::IRCode) @assert length(new_bb.succs) <= 2 length(new_bb.succs) <= 1 && continue if new_bb.succs[1] == new_bb.succs[2] - old_bb2 = findfirst(x->x==bbidx, bb_rename_pred) + old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred) terminator = ir[SSAValue(last(bbs[old_bb2].stmts))] @assert terminator[:inst] isa GotoIfNot # N.B.: The dest will be renamed in process_node! below @@ -2178,6 +2149,7 @@ function cfg_simplify!(ir::IRCode) result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb + bb_start = true while ms != 0 for i in bbs[ms].stmts node = ir.stmts[i] @@ -2191,7 +2163,7 @@ function cfg_simplify!(ir::IRCode) (; ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact ssa_rename[i] = SSAValue(compact.result_idx) processed_idx = i - renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true) + renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing) edges = Int32[] values = Any[] sizehint!(edges, length(phi.edges)); sizehint!(values, length(renamed_values)) @@ -2236,7 +2208,14 @@ function cfg_simplify!(ir::IRCode) isassigned(renamed_values, old_index) && kill_current_use!(compact, renamed_values[old_index]) end end - compact.result[compact.result_idx][:inst] = PhiNode(edges, values) + if length(edges) == 0 || (length(edges) == 1 && !isassigned(values, 1)) + compact.result[compact.result_idx][:inst] = nothing + elseif length(edges) == 1 && !bb_start + compact.result[compact.result_idx][:inst] = values[1] + else + @assert bb_start + compact.result[compact.result_idx][:inst] = PhiNode(edges, values) + end else ri = process_node!(compact, compact.result_idx, node, i, i, ms, true) if ri == compact.result_idx @@ -2250,6 +2229,7 @@ function cfg_simplify!(ir::IRCode) compact.result_idx += 1 end ms = merged_succ[ms] + bb_start = false end end compact.idx = length(ir.stmts) diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 757fa1b98bedc..73bdb51702ded 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -89,18 +89,16 @@ function new_to_regular(@nospecialize(stmt), new_offset::Int) return urs[] end -function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, stmt::UnoptSlot, @nospecialize(ssa)) +function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, stmt::UnoptSlot, @nospecialize(ssa), @nospecialize(def_ssa)) # We don't really have the information here to get rid of these. # We'll do so later if ssa === UNDEF_TOKEN insert_node!(ir, idx, NewInstruction( Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any)) return UNDEF_TOKEN - end - if !isa(ssa, Argument) && !(ssa === nothing) && ((ci.slotflags[slot] & SLOT_USEDUNDEF) != 0) - # insert a temporary node. type_lift_pass! will remove it + elseif def_ssa !== true insert_node!(ir, idx, NewInstruction( - Expr(:undefcheck, ci.slotnames[slot], ssa), Any)) + Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any)) end if isa(stmt, SlotNumber) return ssa @@ -112,7 +110,7 @@ end function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt)) if isa(stmt, UnoptSlot) && slot_filter(stmt) - return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename_slot(stmt)) + return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename_slot(stmt)...) end if isexpr(stmt, :(=)) stmt.args[2] = fixemup!(slot_filter, rename_slot, ir, ci, idx, stmt.args[2]) @@ -126,7 +124,7 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode slot_filter(val) || continue bb_idx = block_for_inst(ir.cfg, Int(stmt.edges[i])) from_bb_terminator = last(ir.cfg.blocks[bb_idx].stmts) - stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename_slot(val)) + stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename_slot(val)...) end return stmt end @@ -137,15 +135,14 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode if (ci.slotflags[slot] & SLOT_USEDUNDEF) == 0 return true else - ssa = rename_slot(val) + ssa, undef_ssa = rename_slot(val) if ssa === UNDEF_TOKEN return false elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue) return true end + return undef_ssa end - # temporarily corrupt the isdefined node. type_lift_pass! will fix it - stmt.args[1] = ssa end return stmt end @@ -153,7 +150,7 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode for op in urs val = op[] if isa(val, UnoptSlot) && slot_filter(val) - x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename_slot(val)) + x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename_slot(val)...) # We inserted an undef error node. Delete subsequent statement # to avoid confusing the optimizer if x === UNDEF_TOKEN @@ -178,11 +175,11 @@ end function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa)) for use in uses - code[use] = fixemup!(x::UnoptSlot->slot_id(x)==slot, stmt::UnoptSlot->ssa, ir, ci, use, code[use]) + code[use] = fixemup!(x::UnoptSlot->slot_id(x)==slot, stmt::UnoptSlot->(ssa, true), ir, ci, use, code[use]) end end -function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Any}) +function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}}) return fixemup!(stmt::UnoptSlot->true, stmt::UnoptSlot->renames[slot_id(stmt)], ir, ci, idx, stmt) end @@ -577,22 +574,13 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode new_typ = Union{} for i = 1:length(node.values) if isa(node, PhiNode) && !isassigned(node.values, i) - if !isa(new_typ, MaybeUndef) - new_typ = MaybeUndef(new_typ) - end continue end typ = typ_for_val(node.values[i], ci, sptypes, -1, slottypes) - was_maybe_undef = false - if isa(typ, MaybeUndef) - typ = typ.typ - was_maybe_undef = true - end - @assert !isa(typ, MaybeUndef) while isa(typ, DelayedTyp) typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)] end - new_typ = tmerge(𝕃ₒ, new_typ, was_maybe_undef ? MaybeUndef(typ) : typ) + new_typ = tmerge(𝕃ₒ, new_typ, typ) end return new_typ end @@ -601,14 +589,18 @@ struct TryCatchRegion enter_block::Int leave_block::Int end -struct NewPhiNode +struct NewSlotPhi{Phi} ssaval::NewSSAValue - node::PhiNode + node::Phi + undef_ssaval::Union{NewSSAValue, Nothing} + undef_node::Union{Phi, Nothing} end -struct NewPhiCNode + +const NewPhiNode2 = NewSlotPhi{PhiNode} + +struct NewPhiCNode2 slot::SlotNumber - ssaval::NewSSAValue - node::PhiCNode + insert::NewSlotPhi{PhiCNode} end function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, @@ -638,10 +630,10 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, end phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)] - new_phi_nodes = Vector{NewPhiNode}[NewPhiNode[] for _ = 1:length(cfg.blocks)] - new_phic_nodes = IdDict{Int, Vector{NewPhiCNode}}() + new_phi_nodes = Vector{NewPhiNode2}[NewPhiNode2[] for _ = 1:length(cfg.blocks)] + new_phic_nodes = IdDict{Int, Vector{NewPhiCNode2}}() for (; leave_block) in catch_entry_blocks - new_phic_nodes[leave_block] = NewPhiCNode[] + new_phic_nodes[leave_block] = NewPhiCNode2[] end @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses) # No uses => no need for phi nodes @@ -653,7 +645,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, ssaval = Argument(idx) fixup_uses!(ir, ci, code, slot.uses, idx, ssaval) elseif isa(code[slot.defs[]], NewvarNode) - typ = MaybeUndef(Union{}) + typ = Union{} ssaval = nothing for use in slot.uses[] insert_node!(ir, use, @@ -676,10 +668,18 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, # Create a PhiC node in the catch entry block and # an upsilon node in the corresponding enter block node = PhiCNode(Any[]) + insertpoint = first_insert_for_bb(code, cfg, li) phic_ssa = NewSSAValue( - insert_node!(ir, first_insert_for_bb(code, cfg, li), + insert_node!(ir, insertpoint, NewInstruction(node, Union{})).id - length(ir.stmts)) - push!(new_phic_nodes[li], NewPhiCNode(SlotNumber(idx), phic_ssa, node)) + undef_node = undef_ssaval = nothing + if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0 + undef_node = PhiCNode(Any[]) + undef_ssaval = NewSSAValue(insert_node!(ir, + insertpoint, NewInstruction(undef_node, Bool)).id - length(ir.stmts)) + end + push!(new_phic_nodes[li], NewPhiCNode2(SlotNumber(idx), + NewSlotPhi{PhiCNode}(phic_ssa, node, undef_ssaval, undef_node))) # Inform IDF that we now have a def in the catch block if !(li in live.def_bbs) push!(live.def_bbs, li) @@ -692,20 +692,26 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, node = PhiNode() ssaval = NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts)) - push!(new_phi_nodes[block], NewPhiNode(ssaval, node)) + undef_node = undef_ssaval = nothing + if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0 + undef_node = PhiNode() + undef_ssaval = NewSSAValue(insert_node!(ir, + first_insert_for_bb(code, cfg, block), NewInstruction(undef_node, Bool)).id - length(ir.stmts)) + end + push!(new_phi_nodes[block], NewPhiNode2(ssaval, node, undef_ssaval, undef_node)) end end # Perform SSA renaming - initial_incoming_vals = Any[ + initial_incoming_vals = Pair{Any, Any}[ if 0 in defuses[x].defs - Argument(x) + Pair{Any, Any}(Argument(x), true) elseif !defuses[x].any_newvar - UNDEF_TOKEN + Pair{Any, Any}(UNDEF_TOKEN, false) else - SSAValue(-2) + Pair{Any, Any}(SSAValue(-2), false) end for x in 1:length(ci.slotflags) ] - worklist = Tuple{Int, Int, Vector{Any}}[(1, 0, initial_incoming_vals)] + worklist = Tuple{Int, Int, Vector{Pair{Any, Any}}}[(1, 0, initial_incoming_vals)] visited = BitSet() type_refine_phi = BitSet() new_nodes = ir.new_nodes @@ -729,8 +735,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, end # Insert phi nodes if necessary for (idx, slot) in Iterators.enumerate(phi_slots[item]) - (; ssaval, node) = new_phi_nodes[item][idx] - incoming_val = incoming_vals[slot] + (; ssaval, node, undef_ssaval, undef_node) = new_phi_nodes[item][idx] + (incoming_val, incoming_def) = incoming_vals[slot] if incoming_val === SSAValue(-1) # Optimistically omit this path. # Liveness analysis would probably have prevented us from inserting this phi node @@ -742,12 +748,18 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, else push!(node.values, incoming_val) end + outgoing_def = true + if (ci.slotflags[slot] & SLOT_USEDUNDEF) != 0 + push!(undef_node.edges, pred) + push!(undef_node.values, incoming_def) + outgoing_def = undef_ssaval + end # TODO: Remove the next line, it shouldn't be necessary push!(type_refine_phi, ssaval.id) if isa(incoming_val, NewSSAValue) push!(type_refine_phi, ssaval.id) end - typ = incoming_val === UNDEF_TOKEN ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes) + typ = incoming_val === UNDEF_TOKEN ? Union{} : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes) old_entry = new_nodes.stmts[ssaval.id] if isa(typ, DelayedTyp) push!(type_refine_phi, ssaval.id) @@ -755,26 +767,34 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, new_typ = isa(typ, DelayedTyp) ? Union{} : tmerge(𝕃ₒ, old_entry[:type], typ) old_entry[:type] = new_typ old_entry[:inst] = node - incoming_vals[slot] = ssaval + incoming_vals[slot] = Pair{Any, Any}(ssaval, outgoing_def) end (item in visited) && continue # Record phi_C nodes if necessary if haskey(new_phic_nodes, item) - for (; slot, ssaval) in new_phic_nodes[item] - incoming_vals[slot_id(slot)] = ssaval + for (; slot, insert) in new_phic_nodes[item] + (; ssaval, undef_ssaval) = insert + incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval === nothing ? true : undef_ssaval) end end # Record initial upsilon nodes if necessary eidx = findfirst((; enter_block)::TryCatchRegion->enter_block==item, catch_entry_blocks) if eidx !== nothing - for (; slot, node) in new_phic_nodes[catch_entry_blocks[eidx].leave_block] - ival = incoming_vals[slot_id(slot)] + for (; slot, insert) in new_phic_nodes[catch_entry_blocks[eidx].leave_block] + (; node, undef_node) = insert + (ival, idef) = incoming_vals[slot_id(slot)] ivalundef = ival === UNDEF_TOKEN Υ = NewInstruction(ivalundef ? UpsilonNode() : UpsilonNode(ival), - ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)) + ivalundef ? Union{} : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)) + insertpos = first_insert_for_bb(code, cfg, item) # insert `UpsilonNode` immediately before the `:enter` expression - Υssa = insert_node!(ir, first_insert_for_bb(code, cfg, item), Υ) + Υssa = insert_node!(ir, insertpos, Υ) push!(node.values, NewSSAValue(Υssa.id - length(ir.stmts))) + if undef_node !== nothing + Υundef = NewInstruction(UpsilonNode(idef), Bool) + Υssaundef = insert_node!(ir, insertpos, Υundef) + push!(undef_node.values, NewSSAValue(Υssaundef.id - length(ir.stmts))) + end end end push!(visited, item) @@ -782,7 +802,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, stmt = code[idx] (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue if isa(stmt, NewvarNode) - incoming_vals[slot_id(stmt.slot)] = UNDEF_TOKEN + incoming_vals[slot_id(stmt.slot)] = Pair{Any, Any}(UNDEF_TOKEN, false) code[idx] = nothing else stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals) @@ -801,23 +821,31 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch. # Do something reasonable here, by marking the LHS as undef as well. if val !== UNDEF_TOKEN - incoming_vals[id] = make_ssa!(ci, code, idx, typ) + thisdef = true + thisval = make_ssa!(ci, code, idx, typ) else code[idx] = nothing - incoming_vals[id] = UNDEF_TOKEN + thisval = UNDEF_TOKEN + thisdef = false end + incoming_vals[id] = Pair{Any, Any}(thisval, thisdef) enter_block = item while haskey(exc_handlers, enter_block) (; enter_block, leave_block) = exc_handlers[enter_block] - cidx = findfirst((; slot)::NewPhiCNode->slot_id(slot)==id, new_phic_nodes[leave_block]) + cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id, new_phic_nodes[leave_block]) if cidx !== nothing - node = UpsilonNode(incoming_vals[id]) + node = thisdef ? UpsilonNode(thisval) : UpsilonNode() if incoming_vals[id] === UNDEF_TOKEN node = UpsilonNode() - typ = MaybeUndef(Union{}) + typ = Union{} end - push!(new_phic_nodes[leave_block][cidx].node.values, + insert = new_phic_nodes[leave_block][cidx].insert + push!(insert.node.values, NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts))) + if insert.undef_node !== nothing + push!(insert.undef_node.values, + NewSSAValue(insert_node!(ir, idx, NewInstruction(UpsilonNode(thisdef), Bool), true).id - length(ir.stmts))) + end end end end @@ -876,7 +904,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, end end for (_, nodes) in new_phic_nodes - for (; ssaval, node) in nodes + for (; insert) in nodes + (; node, ssaval) = insert new_typ = Union{} # TODO: This could just be the ones that depend on other phis push!(type_refine_phi, ssaval.id) @@ -885,7 +914,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, phic_values = (node[:inst]::PhiCNode).values for i = 1:length(phic_values) orig_typ = typ = typ_for_val(phic_values[i], ci, ir.sptypes, -1, slottypes) - @assert !isa(typ, MaybeUndef) while isa(typ, DelayedTyp) typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)] end diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index bf06d6bb3e523..39f56a47e1908 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -20,6 +20,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error")) end end +is_value_pos_expr_head(head::Symbol) = head === :boundscheck function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool) if isa(op, SSAValue) if op.id > length(ir.stmts) @@ -60,7 +61,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, # Allow a tuple in symbol position for foreigncall - this isn't actually # a real call - it's interpreted in global scope by codegen. However, # we do need to keep this a real use, because it could also be a pointer. - elseif op.head !== :boundscheck + elseif !is_value_pos_expr_head(op.head) if !allow_frontend_forms || op.head !== :opaque_closure_method @verify_error "Expr not allowed in value position" error("") @@ -87,7 +88,7 @@ end function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=false, - 𝕃ₒ::AbstractLattice = OptimizerLattice()) + 𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance) # For now require compact IR # @assert isempty(ir.new_nodes) # Verify CFG @@ -187,20 +188,34 @@ function verify_ir(ir::IRCode, print::Bool=true, end end end + lastbb = 0 + is_phinode_block = false + firstidx = 1 + lastphi = 1 for (bb, idx) in bbidxiter(ir) + if bb != lastbb + is_phinode_block = true + lastphi = firstidx = idx + lastbb = bb + end # We allow invalid IR in dead code to avoid passes having to detect when # they're generating dead code. bb_unreachable(domtree, bb) && continue stmt = ir.stmts[idx][:inst] stmt === nothing && continue if isa(stmt, PhiNode) + if !is_phinode_block + @verify_error "φ node $idx is not at the beginning of the basic block $bb" + error("") + end + lastphi = idx @assert length(stmt.edges) == length(stmt.values) for i = 1:length(stmt.edges) edge = stmt.edges[i] for j = (i+1):length(stmt.edges) edge′ = stmt.edges[j] if edge == edge′ - # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is + # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is always unique. @verify_error "Edge list φ node $idx in bb $bb not unique (double edge?)" error("") end @@ -233,7 +248,21 @@ function verify_ir(ir::IRCode, print::Bool=true, end check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms) end - elseif isa(stmt, PhiCNode) + continue + end + + if is_phinode_block && isa(stmt, Union{Expr, UpsilonNode, PhiCNode, SSAValue}) + if !isa(stmt, Expr) || !is_value_pos_expr_head(stmt.head) + # Go back and check that all non-PhiNodes are valid value-position + for validate_idx in firstidx:(lastphi-1) + validate_stmt = ir.stmts[validate_idx][:inst] + isa(validate_stmt, PhiNode) && continue + check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0, allow_frontend_forms) + end + is_phinode_block = false + end + end + if isa(stmt, PhiCNode) for i = 1:length(stmt.values) val = stmt.values[i] if !isa(val, SSAValue) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index f894d4ab3f4a5..f5690f4e5b8d6 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -1086,13 +1086,6 @@ end return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield) end -@nospecs function _getfield_tfunc(𝕃::OptimizerLattice, s00, name, setfield::Bool) - # If undef, that's a Union, but that doesn't affect the rt when tmerged - # into the unwrapped result. - isa(s00, MaybeUndef) && (s00 = s00.typ) - return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield) -end - @nospecs function _getfield_tfunc(𝕃::AnyConditionalsLattice, s00, name, setfield::Bool) if isa(s00, AnyConditional) return Bottom # Bool has no fields @@ -1374,7 +1367,7 @@ function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any nargs = length(argtypes) if !isempty(argtypes) && isvarargtype(argtypes[nargs]) nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) - nargs > 3 || return CallMeta(Any, EFFECTS_UNKNOWN, NoCallInfo()) + nargs > 3 || return CallMeta(Any, Effects(), NoCallInfo()) else 5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) end @@ -1389,7 +1382,7 @@ function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any op = unwrapva(argtypes[4]) v = unwrapva(argtypes[5]) TF = getfield_tfunc(𝕃ᵢ, o, f) - callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=# 1) + callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1) TF2 = tmeet(callinfo.rt, widenconst(TF)) if TF2 === Bottom RT = Bottom @@ -1665,7 +1658,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe end else istype || return false - if !(T <: u.var.ub) + if isa(u.var.ub, TypeVar) || !(T <: u.var.ub) return false end if exact ? !(u.var.lb <: T) : !(u.var.lb === Bottom) @@ -2041,8 +2034,9 @@ function array_type_undefable(@nospecialize(arytype)) end end -function array_builtin_common_nothrow(argtypes::Vector{Any}, first_idx_idx::Int, isarrayref::Bool) - length(argtypes) >= 4 || return false +function array_builtin_common_nothrow(argtypes::Vector{Any}, isarrayref::Bool) + first_idx_idx = isarrayref ? 3 : 4 + length(argtypes) ≥ first_idx_idx || return false boundscheck = argtypes[1] arytype = argtypes[2] array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false @@ -2086,11 +2080,11 @@ end @nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt) ⊑ = Core.Compiler.:⊑(𝕃) if f === arrayset - array_builtin_common_nothrow(argtypes, 4, #=isarrayref=#false) || return false + array_builtin_common_nothrow(argtypes, #=isarrayref=#false) || return false # Additionally check element type compatibility return arrayset_typecheck(argtypes[2], argtypes[3]) elseif f === arrayref || f === const_arrayref - return array_builtin_common_nothrow(argtypes, 3, #=isarrayref=#true) + return array_builtin_common_nothrow(argtypes, #=isarrayref=#true) elseif f === Core._expr length(argtypes) >= 1 || return false return argtypes[1] ⊑ Symbol @@ -2646,10 +2640,10 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s if isa(sv, InferenceState) old_restrict = sv.restrict_abstract_call_sites sv.restrict_abstract_call_sites = false - call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1) + call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) sv.restrict_abstract_call_sites = old_restrict else - call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1) + call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) end info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure() rt = widenslotwrapper(call.rt) @@ -2681,7 +2675,7 @@ end # a simplified model of abstract_call_gf_by_type for applicable function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState, max_methods::Int) - length(argtypes) < 2 && return CallMeta(Union{}, EFFECTS_UNKNOWN, NoCallInfo()) + length(argtypes) < 2 && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) isvarargtype(argtypes[2]) && return CallMeta(Bool, EFFECTS_UNKNOWN, NoCallInfo()) argtypes = argtypes[2:end] atype = argtypes_to_type(argtypes) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 7d983ec5420db..77e1fd02de8d0 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -269,8 +269,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState) for (caller, _, _) in results opt = caller.src if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true` - analyzed = optimize(interp, opt, caller) - caller.valid_worlds = (opt.inlining.et::EdgeTracker).valid_worlds[] + optimize(interp, opt, caller) end end for (caller, edges, cached) in results @@ -370,10 +369,9 @@ end function transform_result_for_cache(interp::AbstractInterpreter, linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult) inferred_result = result.src - # If we decided not to optimize, drop the OptimizationState now. - # External interpreters can override as necessary to cache additional information if inferred_result isa OptimizationState{typeof(interp)} - inferred_result = ir_to_codeinf!(inferred_result) + # TODO respect must_be_codeinf setting here? + result.src = inferred_result = ir_to_codeinf!(inferred_result) end if inferred_result isa CodeInfo inferred_result.min_world = first(valid_worlds) diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl index 700a6d333cbc4..324f2b600cc44 100644 --- a/base/compiler/typelattice.jl +++ b/base/compiler/typelattice.jl @@ -156,14 +156,6 @@ struct PartialTypeVar PartialTypeVar(tv::TypeVar, lb_certain::Bool, ub_certain::Bool) = new(tv, lb_certain, ub_certain) end -# Wraps a type and represents that the value may also be undef at this point. -# (only used in optimize, not abstractinterpret) -# N.B. in the lattice, this is epsilon bigger than `typ` (even Any) -struct MaybeUndef - typ - MaybeUndef(@nospecialize(typ)) = new(typ) -end - struct StateUpdate var::SlotNumber vtype::VarState @@ -232,7 +224,7 @@ struct NotFound end const NOT_FOUND = NotFound() -const CompilerTypes = Union{MaybeUndef, Const, Conditional, MustAlias, NotFound, PartialStruct} +const CompilerTypes = Union{Const, Conditional, MustAlias, NotFound, PartialStruct} ==(x::CompilerTypes, y::CompilerTypes) = x === y ==(x::Type, y::CompilerTypes) = false ==(x::CompilerTypes, y::Type) = false @@ -244,7 +236,7 @@ const CompilerTypes = Union{MaybeUndef, Const, Conditional, MustAlias, NotFound, # slot wrappers # ============= -function assert_nested_slotwrapper(@nospecialize t) +@nospecializeinfer function assert_nested_slotwrapper(@nospecialize t) @assert !(t isa Conditional) "found nested Conditional" @assert !(t isa InterConditional) "found nested InterConditional" @assert !(t isa MustAlias) "found nested MustAlias" @@ -252,7 +244,7 @@ function assert_nested_slotwrapper(@nospecialize t) return t end -function widenslotwrapper(@nospecialize typ) +@nospecializeinfer function widenslotwrapper(@nospecialize typ) if isa(typ, AnyConditional) return widenconditional(typ) elseif isa(typ, AnyMustAlias) @@ -261,7 +253,7 @@ function widenslotwrapper(@nospecialize typ) return typ end -function widenwrappedslotwrapper(@nospecialize typ) +@nospecializeinfer function widenwrappedslotwrapper(@nospecialize typ) if isa(typ, LimitedAccuracy) return LimitedAccuracy(widenslotwrapper(typ.typ), typ.causes) end @@ -271,7 +263,7 @@ end # Conditional # =========== -function widenconditional(@nospecialize typ) +@nospecializeinfer function widenconditional(@nospecialize typ) if isa(typ, AnyConditional) if typ.thentype === Union{} return Const(false) @@ -285,7 +277,7 @@ function widenconditional(@nospecialize typ) end return typ end -function widenwrappedconditional(@nospecialize typ) +@nospecializeinfer function widenwrappedconditional(@nospecialize typ) if isa(typ, LimitedAccuracy) return LimitedAccuracy(widenconditional(typ.typ), typ.causes) end @@ -294,7 +286,7 @@ end # `Conditional` and `InterConditional` are valid in opposite contexts # (i.e. local inference and inter-procedural call), as such they will never be compared -function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional} +@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional} if is_same_conditionals(a, b) if ⊑(lattice, a.thentype, b.thentype) if ⊑(lattice, a.elsetype, b.elsetype) @@ -307,7 +299,7 @@ end is_same_conditionals(a::C, b::C) where C<:AnyConditional = a.slot == b.slot -is_lattice_bool(lattice::AbstractLattice, @nospecialize(typ)) = typ !== Bottom && ⊑(lattice, typ, Bool) +@nospecializeinfer is_lattice_bool(lattice::AbstractLattice, @nospecialize(typ)) = typ !== Bottom && ⊑(lattice, typ, Bool) maybe_extract_const_bool(c::Const) = (val = c.val; isa(val, Bool)) ? val : nothing function maybe_extract_const_bool(c::AnyConditional) @@ -315,12 +307,12 @@ function maybe_extract_const_bool(c::AnyConditional) (c.elsetype === Bottom && !(c.thentype === Bottom)) && return true nothing end -maybe_extract_const_bool(@nospecialize c) = nothing +@nospecializeinfer maybe_extract_const_bool(@nospecialize c) = nothing # MustAlias # ========= -function widenmustalias(@nospecialize typ) +@nospecializeinfer function widenmustalias(@nospecialize typ) if isa(typ, AnyMustAlias) return typ.fldtyp elseif isa(typ, LimitedAccuracy) @@ -329,13 +321,13 @@ function widenmustalias(@nospecialize typ) return typ end -function isalreadyconst(@nospecialize t) +@nospecializeinfer function isalreadyconst(@nospecialize t) isa(t, Const) && return true isa(t, DataType) && isdefined(t, :instance) && return true return isconstType(t) end -function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval)) +@nospecializeinfer function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval)) t = widenconst(objtyp) if isa(fldval, Int) fldidx = fldval @@ -352,7 +344,7 @@ function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval)) return fldidx end -function form_mustalias_conditional(alias::MustAlias, @nospecialize(thentype), @nospecialize(elsetype)) +@nospecializeinfer function form_mustalias_conditional(alias::MustAlias, @nospecialize(thentype), @nospecialize(elsetype)) (; slot, vartyp, fldidx) = alias if isa(vartyp, PartialStruct) fields = vartyp.fields @@ -401,7 +393,7 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ # lattice order # ============= -function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b)) r || return false isa(b, LimitedAccuracy) || return true @@ -420,17 +412,7 @@ function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) return b.causes ⊆ a.causes end -function ⊑(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) - if isa(a, MaybeUndef) - isa(b, MaybeUndef) || return false - a, b = a.typ, b.typ - elseif isa(b, MaybeUndef) - b = b.typ - end - return ⊑(widenlattice(lattice), a, b) -end - -function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) # Fast paths for common cases b === Any && return true a === Any && return false @@ -450,7 +432,7 @@ function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b) return ⊑(widenlattice(lattice), a, b) end -function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b)) MustAliasT = isa(𝕃, MustAliasesLattice) ? MustAlias : InterMustAlias if isa(a, MustAliasT) if isa(b, MustAliasT) @@ -463,7 +445,7 @@ function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(𝕃), a, b) end -function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) if isa(a, PartialStruct) if isa(b, PartialStruct) if !(length(a.fields) == length(b.fields) && a.typ <: b.typ) @@ -526,7 +508,7 @@ function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(lattice), a, b) end -function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) if isa(a, Const) if isa(b, Const) return a.val === b.val @@ -548,7 +530,7 @@ function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) if isa(a, LimitedAccuracy) isa(b, LimitedAccuracy) || return false a.causes == b.causes || return false @@ -560,15 +542,7 @@ function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecia return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) - if isa(a, MaybeUndef) || isa(b, MaybeUndef) - # TODO: Unwrap these and recurse to is_lattice_equal - return ⊑(lattice, a, b) && ⊑(lattice, b, a) - end - return is_lattice_equal(widenlattice(lattice), a, b) -end - -function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional if isa(a, ConditionalT) || isa(b, ConditionalT) # TODO: Unwrap these and recurse to is_lattice_equal @@ -577,7 +551,7 @@ function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @no return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) if isa(a, PartialStruct) isa(b, PartialStruct) || return false length(a.fields) == length(b.fields) || return false @@ -600,7 +574,7 @@ function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecial return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) a === b && return true if a isa Const if issingletontype(b) @@ -625,7 +599,7 @@ end # lattice operations # ================== -function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, PartialStruct) has_free_typevars(t) && return v widev = widenconst(v) @@ -663,7 +637,7 @@ function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type return tmeet(widenlattice(lattice), v, t) end -function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, Const) if !has_free_typevars(t) && !isa(v.val, t) return Bottom @@ -673,7 +647,7 @@ function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type)) tmeet(widenlattice(lattice), widenconst(v), t) end -function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, Conditional) if !(Bool <: t) return Bottom @@ -683,38 +657,32 @@ function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t:: tmeet(widenlattice(lattice), v, t) end -function tmeet(𝕃::MustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(𝕃::MustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, MustAlias) v = widenmustalias(v) end return tmeet(widenlattice(𝕃), v, t) end -function tmeet(lattice::InferenceLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::InferenceLattice, @nospecialize(v), @nospecialize(t::Type)) # TODO: This can probably happen and should be handled @assert !isa(v, LimitedAccuracy) tmeet(widenlattice(lattice), v, t) end -function tmeet(lattice::InterConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::InterConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) # TODO: This can probably happen and should be handled @assert !isa(v, AnyConditional) tmeet(widenlattice(lattice), v, t) end -function tmeet(𝕃::InterMustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(𝕃::InterMustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, InterMustAlias) v = widenmustalias(v) end return tmeet(widenlattice(𝕃), v, t) end -function tmeet(lattice::OptimizerLattice, @nospecialize(v), @nospecialize(t::Type)) - # TODO: This can probably happen and should be handled - @assert !isa(v, MaybeUndef) - tmeet(widenlattice(lattice), v, t) -end - """ widenconst(x) -> t::Type @@ -723,11 +691,10 @@ Widens extended lattice element `x` to native `Type` representation. widenconst(::AnyConditional) = Bool widenconst(a::AnyMustAlias) = widenconst(widenmustalias(a)) widenconst(c::Const) = (v = c.val; isa(v, Type) ? Type{v} : typeof(v)) -widenconst(m::MaybeUndef) = widenconst(m.typ) widenconst(::PartialTypeVar) = TypeVar widenconst(t::PartialStruct) = t.typ widenconst(t::PartialOpaque) = t.typ -widenconst(t::Type) = t +@nospecializeinfer widenconst(@nospecialize t::Type) = t widenconst(::TypeVar) = error("unhandled TypeVar") widenconst(::TypeofVararg) = error("unhandled Vararg") widenconst(::LimitedAccuracy) = error("unhandled LimitedAccuracy") @@ -743,7 +710,7 @@ function smerge(lattice::AbstractLattice, sa::Union{NotFound,VarState}, sb::Unio return VarState(tmerge(lattice, sa.typ, sb.typ), sa.undef | sb.undef) end -@inline schanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) = +@nospecializeinfer @inline schanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !(n.undef <= o.undef && ⊑(lattice, n.typ, o.typ)))) # remove any lattice elements that wrap the reassigned slot object from the vartable diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 191820951fae1..b648144ea3bd1 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -304,9 +304,7 @@ end # A simplified type_more_complex query over the extended lattice # (assumes typeb ⊑ typea) -function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb)) - typea isa MaybeUndef && (typea = typea.typ) # n.b. does not appear in inference - typeb isa MaybeUndef && (typeb = typeb.typ) # n.b. does not appear in inference +@nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb)) @assert !isa(typea, LimitedAccuracy) && !isa(typeb, LimitedAccuracy) "LimitedAccuracy not supported by simplertype lattice" # n.b. the caller was supposed to handle these typea === typeb && return true if typea isa PartialStruct @@ -379,19 +377,6 @@ end return nothing end -function tmerge(lattice::OptimizerLattice, @nospecialize(typea), @nospecialize(typeb)) - r = tmerge_fast_path(lattice, typea, typeb) - r !== nothing && return r - - # type-lattice for MaybeUndef wrapper - if isa(typea, MaybeUndef) || isa(typeb, MaybeUndef) - return MaybeUndef(tmerge( - isa(typea, MaybeUndef) ? typea.typ : typea, - isa(typeb, MaybeUndef) ? typeb.typ : typeb)) - end - return tmerge(widenlattice(lattice), typea, typeb) -end - function union_causes(causesa::IdSet{InferenceState}, causesb::IdSet{InferenceState}) if causesa ⊆ causesb return causesb @@ -415,7 +400,7 @@ function merge_causes(causesa::IdSet{InferenceState}, causesb::IdSet{InferenceSt end end -@noinline function tmerge_limited(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer @noinline function tmerge_limited(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) typea === Union{} && return typeb typeb === Union{} && return typea @@ -466,17 +451,15 @@ end return LimitedAccuracy(tmerge(widenlattice(lattice), typea, typeb), causes) end -function tmerge(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) if isa(typea, LimitedAccuracy) || isa(typeb, LimitedAccuracy) return tmerge_limited(lattice, typea, typeb) end - r = tmerge_fast_path(widenlattice(lattice), typea, typeb) - r !== nothing && return r return tmerge(widenlattice(lattice), typea, typeb) end -function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) # type-lattice for Conditional wrapper (NOTE never be merged with InterConditional) if isa(typea, Conditional) && isa(typeb, Const) if typeb.val === true @@ -506,12 +489,17 @@ function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecializ end return Bool end - typea = widenconditional(typea) - typeb = widenconditional(typeb) + if isa(typea, Conditional) + typeb === Union{} && return typea + typea = widenconditional(typea) + elseif isa(typeb, Conditional) + typea === Union{} && return typeb + typeb = widenconditional(typeb) + end return tmerge(widenlattice(lattice), typea, typeb) end -function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) # type-lattice for InterConditional wrapper (NOTE never be merged with Conditional) if isa(typea, InterConditional) && isa(typeb, Const) if typeb.val === true @@ -541,20 +529,31 @@ function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospec end return Bool end - typea = widenconditional(typea) - typeb = widenconditional(typeb) + if isa(typea, InterConditional) + typeb === Union{} && return typea + typea = widenconditional(typea) + elseif isa(typeb, InterConditional) + typea === Union{} && return typeb + typeb = widenconditional(typeb) + end return tmerge(widenlattice(lattice), typea, typeb) end -function tmerge(𝕃::AnyMustAliasesLattice, @nospecialize(typea), @nospecialize(typeb)) - typea = widenmustalias(typea) - typeb = widenmustalias(typeb) +@nospecializeinfer function tmerge(𝕃::AnyMustAliasesLattice, @nospecialize(typea), @nospecialize(typeb)) + if is_valid_lattice_norec(𝕃, typea) + typeb === Union{} && return typea + typea = widenmustalias(typea) + end + if is_valid_lattice_norec(𝕃, typeb) + typea === Union{} && return typeb + typeb = widenmustalias(typeb) + end return tmerge(widenlattice(𝕃), typea, typeb) end # N.B. This can also be called with both typea::Const and typeb::Const to # to recover PartialStruct from `Const`s with overlapping fields. -function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) aty = widenconst(typea) bty = widenconst(typeb) if aty === bty @@ -612,7 +611,10 @@ function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @ return nothing end -function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) + r = tmerge_fast_path(lattice, typea, typeb) + r !== nothing && return r + # type-lattice for Const and PartialStruct wrappers aps = isa(typea, PartialStruct) bps = isa(typeb, PartialStruct) @@ -655,8 +657,7 @@ function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(ty return tmerge(wl, typea, typeb) end - -function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(typeb)) acp = isa(typea, Const) || isa(typea, PartialTypeVar) bcp = isa(typeb, Const) || isa(typeb, PartialTypeVar) if acp && bcp @@ -668,7 +669,7 @@ function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(type return tmerge(wl, typea, typeb) end -function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type)) +@nospecializeinfer function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type)) # it's always ok to form a Union of two concrete types act = isconcretetype(typea) bct = isconcretetype(typeb) @@ -684,7 +685,7 @@ function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb return tmerge_types_slow(typea, typeb) end -@noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type)) +@nospecializeinfer @noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type)) # collect the list of types from past tmerge calls returning Union # and then reduce over that list types = Any[] diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 4a4f27c9c27c2..14f1c90dca0e9 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -16,7 +16,8 @@ the following methods to satisfy the `AbstractInterpreter` API requirement: - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache - `code_cache(interp::NewInterpreter)` - return the global inference cache """ -abstract type AbstractInterpreter end +:(AbstractInterpreter) + abstract type AbstractLattice end struct ArgInfo @@ -464,15 +465,15 @@ infer_compilation_signature(::NativeInterpreter) = true typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.instance) ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance) -optimizer_lattice(::AbstractInterpreter) = OptimizerLattice(SimpleInferenceLattice.instance) +optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance typeinf_lattice(interp::NativeInterpreter) = interp.irinterp ? - OptimizerLattice(InferenceLattice(SimpleInferenceLattice.instance)) : + InferenceLattice(SimpleInferenceLattice.instance) : InferenceLattice(BaseInferenceLattice.instance) ipo_lattice(interp::NativeInterpreter) = interp.irinterp ? InferenceLattice(SimpleInferenceLattice.instance) : InferenceLattice(IPOResultLattice.instance) -optimizer_lattice(interp::NativeInterpreter) = OptimizerLattice(SimpleInferenceLattice.instance) +optimizer_lattice(interp::NativeInterpreter) = SimpleInferenceLattice.instance """ switch_to_irinterp(interp::AbstractInterpreter) -> irinterp::AbstractInterpreter diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl index cff10b02ceafc..2ecc077228264 100644 --- a/base/compiler/typeutils.jl +++ b/base/compiler/typeutils.jl @@ -139,12 +139,12 @@ valid_tparam(@nospecialize(x)) = valid_typeof_tparam(typeof(x)) function compatible_vatuple(a::DataType, b::DataType) vaa = a.parameters[end] - vab = a.parameters[end] + vab = b.parameters[end] if !(isvarargtype(vaa) && isvarargtype(vab)) return isvarargtype(vaa) == isvarargtype(vab) end - (isdefined(vaa, :N) == isdefined(vab, :N)) || return false - !isdefined(vaa, :N) && return true + isdefined(vaa, :N) || return !isdefined(vab, :N) + isdefined(vab, :N) || return false return vaa.N === vab.N end @@ -163,8 +163,7 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I elseif a isa DataType ub = unwrap_unionall(b) if ub isa DataType - if a.name === ub.name === Tuple.name && - length(a.parameters) == length(ub.parameters) + if a.name === ub.name === Tuple.name && length(a.parameters) == length(ub.parameters) if 1 < unionsplitcost(JLTypeLattice(), a.parameters) <= max_union_splitting ta = switchtupleunion(a) return typesubtract(Union{ta...}, b, 0) diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 836c370b98bd4..f3c5694535ce6 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -107,6 +107,10 @@ function is_inlineable_constant(@nospecialize(x)) return count_const_size(x) <= MAX_INLINE_CONST_SIZE end +is_nospecialized(method::Method) = method.nospecialize ≠ 0 + +is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method) + ########################### # MethodInstance/CodeInfo # ########################### @@ -154,8 +158,16 @@ function get_compileable_sig(method::Method, @nospecialize(atype), sparams::Simp isa(atype, DataType) || return nothing mt = ccall(:jl_method_get_table, Any, (Any,), method) mt === nothing && return nothing - return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any), - mt, atype, sparams, method) + return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint), + mt, atype, sparams, method, #=int return_if_compileable=#1) +end + +function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector) + isa(atype, DataType) || return method.sig + mt = ccall(:jl_method_table_for, Any, (Any,), atype) + mt === nothing && return method.sig + return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint), + mt, atype, sparams, method, #=int return_if_compileable=#0) end isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) = @@ -199,10 +211,13 @@ function normalize_typevars(method::Method, @nospecialize(atype), sparams::Simpl end # get a handle to the unique specialization object representing a particular instantiation of a call -function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false) +@inline function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false) if isa(atype, UnionAll) atype, sparams = normalize_typevars(method, atype, sparams) end + if is_nospecializeinfer(method) + atype = get_nospecializeinfer_sig(method, atype, sparams) + end if preexisting # check cached specializations # for an existing result stored there @@ -312,7 +327,7 @@ end # types # ######### -function singleton_type(@nospecialize(ft)) +@nospecializeinfer function singleton_type(@nospecialize(ft)) ft = widenslotwrapper(ft) if isa(ft, Const) return ft.val @@ -324,7 +339,7 @@ function singleton_type(@nospecialize(ft)) return nothing end -function maybe_singleton_const(@nospecialize(t)) +@nospecializeinfer function maybe_singleton_const(@nospecialize(t)) if isa(t, DataType) if issingletontype(t) return Const(t.instance) diff --git a/base/complex.jl b/base/complex.jl index a0473c90d5c17..97b47eac91a5a 100644 --- a/base/complex.jl +++ b/base/complex.jl @@ -245,9 +245,9 @@ bswap(z::Complex) = Complex(bswap(real(z)), bswap(imag(z))) ==(z::Complex, x::Real) = isreal(z) && real(z) == x ==(x::Real, z::Complex) = isreal(z) && real(z) == x -isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w)) -isequal(z::Complex, w::Real) = isequal(real(z),w) & isequal(imag(z),zero(w)) -isequal(z::Real, w::Complex) = isequal(z,real(w)) & isequal(zero(z),imag(w)) +isequal(z::Complex, w::Complex) = isequal(real(z),real(w))::Bool & isequal(imag(z),imag(w))::Bool +isequal(z::Complex, w::Real) = isequal(real(z),w)::Bool & isequal(imag(z),zero(w))::Bool +isequal(z::Real, w::Complex) = isequal(z,real(w))::Bool & isequal(zero(z),imag(w))::Bool in(x::Complex, r::AbstractRange{<:Real}) = isreal(x) && real(x) in r diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl index e0d21715c2147..e0733280e7c7d 100644 --- a/base/docs/Docs.jl +++ b/base/docs/Docs.jl @@ -286,12 +286,26 @@ catdoc(xs...) = vcat(xs...) const keywords = Dict{Symbol, DocStr}() function unblock(@nospecialize ex) + while isexpr(ex, :var"hygienic-scope") + isexpr(ex.args[1], :escape) || break + ex = ex.args[1].args[1] + end isexpr(ex, :block) || return ex exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args) length(exs) == 1 || return ex return unblock(exs[1]) end +# peek through ex to figure out what kind of expression it may eventually act like +# but ignoring scopes and line numbers +function unescape(@nospecialize ex) + ex = unblock(ex) + while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope") + ex = unblock(ex.args[1]) + end + return ex +end + uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex namify(@nospecialize x) = astname(x, isexpr(x, :macro))::Union{Symbol,Expr,GlobalRef} @@ -351,18 +365,19 @@ function metadata(__source__, __module__, expr, ismodule) fields = P[] last_docstr = nothing for each in (expr.args[3]::Expr).args - if isa(each, Symbol) || isexpr(each, :(::)) + eachex = unescape(each) + if isa(eachex, Symbol) || isexpr(eachex, :(::)) # a field declaration if last_docstr !== nothing - push!(fields, P(namify(each::Union{Symbol,Expr}), last_docstr)) + push!(fields, P(namify(eachex::Union{Symbol,Expr}), last_docstr)) last_docstr = nothing end - elseif isexpr(each, :function) || isexpr(each, :(=)) + elseif isexpr(eachex, :function) || isexpr(eachex, :(=)) break - elseif isa(each, String) || isexpr(each, :string) || isexpr(each, :call) || - (isexpr(each, :macrocall) && each.args[1] === Symbol("@doc_str")) + elseif isa(eachex, String) || isexpr(eachex, :string) || isexpr(eachex, :call) || + (isexpr(eachex, :macrocall) && eachex.args[1] === Symbol("@doc_str")) # forms that might be doc strings - last_docstr = each::Union{String,Expr} + last_docstr = each end end dict = :($(Dict{Symbol,Any})($([(:($(P)($(quot(f)), $d)))::Expr for (f, d) in fields]...))) @@ -627,8 +642,9 @@ function loaddocs(docs::Vector{Core.SimpleVector}) for (mod, ex, str, file, line) in docs data = Dict{Symbol,Any}(:path => string(file), :linenumber => line) doc = docstr(str, data) - docstring = docm(LineNumberNode(line, file), mod, doc, ex, false) # expand the real @doc macro now - Core.eval(mod, Expr(Core.unescape, docstring, Docs)) + lno = LineNumberNode(line, file) + docstring = docm(lno, mod, doc, ex, false) # expand the real @doc macro now + Core.eval(mod, Expr(:var"hygienic-scope", docstring, Docs, lno)) end empty!(docs) nothing diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 684ed8b48f734..fd8c35a5fdf76 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -3227,6 +3227,15 @@ See also [`"`](@ref \") """ kw"\"\"\"" +""" +Unsafe pointer operations are compatible with loading and storing pointers declared with +`_Atomic` and `std::atomic` type in C11 and C++23 respectively. An error may be thrown if +there is not support for atomically loading the Julia type `T`. + +See also: [`unsafe_load`](@ref), [`unsafe_modify!`](@ref), [`unsafe_replace!`](@ref), [`unsafe_store!`](@ref), [`unsafe_swap!`](@ref) +""" +kw"atomic" + """ Base.donotdelete(args...) diff --git a/base/errorshow.jl b/base/errorshow.jl index 03650920aae57..81f4c9c2ee9e0 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -35,6 +35,13 @@ show_index(io::IO, x::LogicalIndex) = summary(io, x.mask) show_index(io::IO, x::OneTo) = print(io, "1:", x.stop) show_index(io::IO, x::Colon) = print(io, ':') +function showerror(io::IO, ex::Meta.ParseError) + if isnothing(ex.detail) + print(io, "ParseError(", repr(ex.msg), ")") + else + showerror(io, ex.detail) + end +end function showerror(io::IO, ex::BoundsError) print(io, "BoundsError") @@ -243,7 +250,7 @@ function showerror(io::IO, ex::MethodError) ft = typeof(f) arg_types_param = arg_types_param[3:end] kwargs = pairs(ex.args[1]) - ex = MethodError(f, ex.args[3:end::Int]) + ex = MethodError(f, ex.args[3:end::Int], ex.world) end name = ft.name.mt.name if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types @@ -490,7 +497,11 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=() if !((min(length(t_i), length(sig)) == 0) && k==1) print(iob, ", ") end - if get(io, :color, false)::Bool + if k == 1 && Base.isvarargtype(sigtype) + # There wasn't actually a mismatch - the method match failed for + # some other reason, e.g. world age. Just print the sigstr. + print(iob, sigstr...) + elseif get(io, :color, false)::Bool let sigstr=sigstr Base.with_output_color(Base.error_color(), iob) do iob print(iob, "::", sigstr...) @@ -761,6 +772,9 @@ function show_backtrace(io::IO, t::Vector) if haskey(io, :last_shown_line_infos) empty!(io[:last_shown_line_infos]) end + # this will be set to true if types in the stacktrace are truncated + limitflag = Ref(false) + io = IOContext(io, :stacktrace_types_limited => limitflag) # t is a pre-processed backtrace (ref #12856) if t isa Vector{Any} @@ -781,12 +795,15 @@ function show_backtrace(io::IO, t::Vector) if length(filtered) > BIG_STACKTRACE_SIZE show_reduced_backtrace(IOContext(io, :backtrace => true), filtered) return + else + try invokelatest(update_stackframes_callback[], filtered) catch end + # process_backtrace returns a Vector{Tuple{Frame, Int}} + show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks()) end - - try invokelatest(update_stackframes_callback[], filtered) catch end - # process_backtrace returns a Vector{Tuple{Frame, Int}} - show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks()) - return + if limitflag[] + print(io, "\nSome type information was truncated. Use `show(err)` to see complete types.") + end + nothing end diff --git a/base/essentials.jl b/base/essentials.jl index 06e2c3ea2ec87..97f32483a6b14 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -85,7 +85,8 @@ f(y) = [x for x in y] !!! note `@nospecialize` affects code generation but not inference: it limits the diversity of the resulting native code, but it does not impose any limitations (beyond the - standard ones) on type-inference. + standard ones) on type-inference. Use [`Base.@nospecializeinfer`](@ref) together with + `@nospecialize` to additionally suppress inference. # Example @@ -856,6 +857,9 @@ e.g. long-running event loops or callback functions that may call obsolete versions of a function `f`. (The drawback is that `invokelatest` is somewhat slower than calling `f` directly, and the type of the result cannot be inferred by the compiler.) + +!!! compat "Julia 1.9" + Prior to Julia 1.9, this function was not exported, and was called as `Base.invokelatest`. """ function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...) kwargs = merge(NamedTuple(), kwargs) diff --git a/base/exports.jl b/base/exports.jl index ec151df0bfde2..10f43825e12df 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -979,8 +979,11 @@ export reenable_sigint, unsafe_copyto!, unsafe_load, + unsafe_modify!, unsafe_pointer_to_objref, + unsafe_replace!, unsafe_store!, + unsafe_swap!, # implemented in Random module rand, @@ -1004,6 +1007,7 @@ export @v_str, # version number @raw_str, # raw string with no interpolation/unescaping @NamedTuple, + @Kwargs, @lazy_str, # lazy string # documentation diff --git a/base/expr.jl b/base/expr.jl index e45684f95a34f..e007306063db1 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -342,7 +342,6 @@ macro noinline(x) return annotate_meta_def_or_block(x, :noinline) end - """ @constprop setting [ex] @@ -763,6 +762,44 @@ function compute_assumed_setting(@nospecialize(setting), val::Bool=true) end end +""" + Base.@nospecializeinfer function f(args...) + @nospecialize ... + ... + end + Base.@nospecializeinfer f(@nospecialize args...) = ... + +Tells the compiler to infer `f` using the declared types of `@nospecialize`d arguments. +This can be used to limit the number of compiler-generated specializations during inference. + +# Example + +```julia +julia> f(A::AbstractArray) = g(A) +f (generic function with 1 method) + +julia> @noinline Base.@nospecializeinfer g(@nospecialize(A::AbstractArray)) = A[1] +g (generic function with 1 method) + +julia> @code_typed f([1.0]) +CodeInfo( +1 ─ %1 = invoke Main.g(_2::AbstractArray)::Any +└── return %1 +) => Any +``` + +In this example, `f` will be inferred for each specific type of `A`, +but `g` will only be inferred once with the declared argument type `A::AbstractArray`, +meaning that the compiler will not likely see the excessive inference time on it +while it can not infer the concrete return type of it. +Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`, +indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on +specialized code generation. +""" +macro nospecializeinfer(ex) + esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex) +end + """ @propagate_inbounds @@ -1007,6 +1044,7 @@ end @atomic order ex Mark `var` or `ex` as being performed atomically, if `ex` is a supported expression. +If no `order` is specified it defaults to :sequentially_consistent. @atomic a.b.x = new @atomic a.b.x += addend diff --git a/base/fastmath.jl b/base/fastmath.jl index 7865736f8a776..44440ebad2050 100644 --- a/base/fastmath.jl +++ b/base/fastmath.jl @@ -112,19 +112,10 @@ function make_fastmath(expr::Expr) if isa(var, Symbol) # simple assignment expr = :($var = $op($var, $rhs)) - elseif isa(var, Expr) && var.head === :ref - var = var::Expr - # array reference - arr = var.args[1] - inds = var.args[2:end] - arrvar = gensym() - indvars = Any[gensym() for _ in inds] - expr = quote - $(Expr(:(=), arrvar, arr)) - $(Expr(:(=), Base.exprarray(:tuple, indvars), Base.exprarray(:tuple, inds))) - $arrvar[$(indvars...)] = $op($arrvar[$(indvars...)], $rhs) - end end + # It is hard to optimize array[i += 1] += 1 + # and array[end] += 1 without bugs. (#47241) + # We settle for not optimizing the op= call. end Base.exprarray(make_fastmath(expr.head), Base.mapany(make_fastmath, expr.args)) end diff --git a/base/float.jl b/base/float.jl index fad7146655ade..d5280ef74fbce 100644 --- a/base/float.jl +++ b/base/float.jl @@ -635,31 +635,40 @@ isinf(x::Real) = !isnan(x) & !isfinite(x) isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf) const hx_NaN = hash_uint64(reinterpret(UInt64, NaN)) -let Tf = Float64, Tu = UInt64, Ti = Int64 - @eval function hash(x::$Tf, h::UInt) - # see comments on trunc and hash(Real, UInt) - if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))) - xi = fptosi($Ti, x) - if isequal(xi, x) - return hash(xi, h) - end - elseif $(Tf(typemin(Tu))) <= x < $(Tf(typemax(Tu))) - xu = fptoui($Tu, x) - if isequal(xu, x) - return hash(xu, h) - end - elseif isnan(x) - return hx_NaN ⊻ h # NaN does not have a stable bit pattern +function hash(x::Float64, h::UInt) + # see comments on trunc and hash(Real, UInt) + if typemin(Int64) <= x < typemax(Int64) + xi = fptosi(Int64, x) + if isequal(xi, x) + return hash(xi, h) + end + elseif typemin(UInt64) <= x < typemax(UInt64) + xu = fptoui(UInt64, x) + if isequal(xu, x) + return hash(xu, h) end - return hash_uint64(bitcast(UInt64, x)) - 3h + elseif isnan(x) + return hx_NaN ⊻ h # NaN does not have a stable bit pattern end + return hash_uint64(bitcast(UInt64, x)) - 3h end hash(x::Float32, h::UInt) = hash(Float64(x), h) -hash(x::Float16, h::UInt) = hash(Float64(x), h) -## generic hashing for rational values ## +function hash(x::Float16, h::UInt) + # see comments on trunc and hash(Real, UInt) + if isfinite(x) # all finite Float16 fit in Int64 + xi = fptosi(Int64, x) + if isequal(xi, x) + return hash(xi, h) + end + elseif isnan(x) + return hx_NaN ⊻ h # NaN does not have a stable bit pattern + end + return hash_uint64(bitcast(UInt64, Float64(x))) - 3h +end +## generic hashing for rational values ## function hash(x::Real, h::UInt) # decompose x as num*2^pow/den num, pow, den = decompose(x) @@ -674,32 +683,28 @@ function hash(x::Real, h::UInt) num = -num den = -den end - z = trailing_zeros(num) - if z != 0 - num >>= z - pow += z - end - z = trailing_zeros(den) - if z != 0 - den >>= z - pow -= z - end + num_z = trailing_zeros(num) + num >>= num_z + den_z = trailing_zeros(den) + den >>= den_z + pow += num_z - den_z # handle values representable as Int64, UInt64, Float64 if den == 1 - left = ndigits0z(num,2) + pow - right = trailing_zeros(num) + pow + left = top_set_bit(abs(num)) + pow + right = pow + den_z if -1074 <= right - if 0 <= right && left <= 64 - left <= 63 && return hash(Int64(num) << Int(pow), h) - signbit(num) == signbit(den) && return hash(UInt64(num) << Int(pow), h) + if 0 <= right + left <= 63 && return hash(Int64(num) << Int(pow), h) + left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h) end # typemin(Int64) handled by Float64 case - left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num),pow), h) + left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow), h) end + else + h = hash_integer(den, h) end # handle generic rational values - h = hash_integer(den, h) h = hash_integer(pow, h) h = hash_integer(num, h) return h @@ -875,7 +880,7 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn end end function (::Type{$Ti})(x::$Tf) - if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && (round(x, RoundToZero) == x) + if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && isinteger(x) return unsafe_trunc($Ti,x) else throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x)) @@ -896,7 +901,7 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn end end function (::Type{$Ti})(x::$Tf) - if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && (round(x, RoundToZero) == x) + if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && isinteger(x) return unsafe_trunc($Ti,x) else throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x)) diff --git a/base/gmp.jl b/base/gmp.jl index 69926f4ad0d06..8a1451be7a590 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -606,9 +606,9 @@ Number of ones in the binary representation of abs(x). count_ones_abs(x::BigInt) = iszero(x) ? 0 : MPZ.mpn_popcount(x) function top_set_bit(x::BigInt) - x < 0 && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned.")) - x == 0 && return 0 - Int(ccall((:__gmpz_sizeinbase, :libgmp), Csize_t, (Base.GMP.MPZ.mpz_t, Cint), x, 2)) + isneg(x) && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned.")) + iszero(x) && return 0 + x.size * sizeof(Limb) << 3 - leading_zeros(GC.@preserve x unsafe_load(x.d, x.size)) end divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y) @@ -843,8 +843,8 @@ Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), st ## streamlined hashing for BigInt, by avoiding allocation from shifts ## -if Limb === UInt - # this condition is true most (all?) of the time, and in this case we can define +if Limb === UInt64 === UInt + # On 64 bit systems we can define # an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}), # and of hash @@ -854,7 +854,7 @@ if Limb === UInt GC.@preserve n begin s = n.size s == 0 && return hash_integer(0, h) - p = convert(Ptr{UInt}, n.d) + p = convert(Ptr{UInt64}, n.d) b = unsafe_load(p) h ⊻= hash_uint(ifelse(s < 0, -b, b) ⊻ h) for k = 2:abs(s) @@ -864,14 +864,11 @@ if Limb === UInt end end - _divLimb(n) = UInt === UInt64 ? n >>> 6 : n >>> 5 - _modLimb(n) = UInt === UInt64 ? n & 63 : n & 31 - function hash(x::BigInt, h::UInt) GC.@preserve x begin sz = x.size sz == 0 && return hash(0, h) - ptr = Ptr{UInt}(x.d) + ptr = Ptr{UInt64}(x.d) if sz == 1 return hash(unsafe_load(ptr), h) elseif sz == -1 @@ -880,8 +877,8 @@ if Limb === UInt end pow = trailing_zeros(x) nd = Base.ndigits0z(x, 2) - idx = _divLimb(pow) + 1 - shift = _modLimb(pow) % UInt + idx = (pow >>> 6) + 1 + shift = (pow & 63) % UInt upshift = BITS_PER_LIMB - shift asz = abs(sz) if shift == 0 @@ -894,7 +891,6 @@ if Limb === UInt if nd <= 1024 && nd - pow <= 53 return hash(ldexp(flipsign(Float64(limb), sz), pow), h) end - h = hash_integer(1, h) h = hash_integer(pow, h) h ⊻= hash_uint(flipsign(limb, sz) ⊻ h) for idx = idx+1:asz diff --git a/base/iddict.jl b/base/iddict.jl index 99710fbb3491e..01ff213305d7b 100644 --- a/base/iddict.jl +++ b/base/iddict.jl @@ -134,7 +134,10 @@ end function empty!(d::IdDict) resize!(d.ht, 32) - ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), d.ht, 0, sizeof(d.ht)) + ht = d.ht + t = @_gc_preserve_begin ht + memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht)) + @_gc_preserve_end t d.ndel = 0 d.count = 0 return d diff --git a/base/indices.jl b/base/indices.jl index a9189865048cd..15a2a2f3c0ac7 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -504,6 +504,7 @@ promote_rule(a::Type{IdentityUnitRange{T1}}, b::Type{IdentityUnitRange{T2}}) whe IndexStyle(::Type{<:LinearIndices}) = IndexLinear() axes(iter::LinearIndices) = map(axes1, iter.indices) size(iter::LinearIndices) = map(length, iter.indices) +isassigned(iter::LinearIndices, i::Int) = checkbounds(Bool, iter, i) function getindex(iter::LinearIndices, i::Int) @inline @boundscheck checkbounds(iter, i) diff --git a/base/initdefs.jl b/base/initdefs.jl index 002984b83dd97..ed0aa3856f339 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -93,6 +93,7 @@ function append_default_depot_path!(DEPOT_PATH) path in DEPOT_PATH || push!(DEPOT_PATH, path) path = abspath(Sys.BINDIR, "..", "share", "julia") path in DEPOT_PATH || push!(DEPOT_PATH, path) + return DEPOT_PATH end function init_depot_path() @@ -111,6 +112,7 @@ function init_depot_path() else append_default_depot_path!(DEPOT_PATH) end + nothing end ## LOAD_PATH & ACTIVE_PROJECT ## @@ -220,9 +222,7 @@ function parse_load_path(str::String) end function init_load_path() - if Base.creating_sysimg - paths = ["@stdlib"] - elseif haskey(ENV, "JULIA_LOAD_PATH") + if haskey(ENV, "JULIA_LOAD_PATH") paths = parse_load_path(ENV["JULIA_LOAD_PATH"]) else paths = filter!(env -> env !== nothing, @@ -336,6 +336,10 @@ end Return the fully expanded value of [`LOAD_PATH`](@ref) that is searched for projects and packages. + +!!! note + `load_path` may return a reference to a cached value so it is not safe to modify the + returned vector. """ function load_path() cache = LOADING_CACHE[] @@ -354,6 +358,7 @@ const atexit_hooks = Callable[ () -> Filesystem.temp_cleanup_purge(force=true) ] const _atexit_hooks_lock = ReentrantLock() +global _atexit_hooks_finished::Bool = false """ atexit(f) @@ -374,12 +379,40 @@ exit code `n` (instead of the original exit code). If more than one exit hook calls `exit(n)`, then Julia will exit with the exit code corresponding to the last called exit hook that calls `exit(n)`. (Because exit hooks are called in LIFO order, "last called" is equivalent to "first registered".) + +Note: Once all exit hooks have been called, no more exit hooks can be registered, +and any call to `atexit(f)` after all hooks have completed will throw an exception. +This situation may occur if you are registering exit hooks from background Tasks that +may still be executing concurrently during shutdown. """ -atexit(f::Function) = Base.@lock _atexit_hooks_lock (pushfirst!(atexit_hooks, f); nothing) +function atexit(f::Function) + Base.@lock _atexit_hooks_lock begin + _atexit_hooks_finished && error("cannot register new atexit hook; already exiting.") + pushfirst!(atexit_hooks, f) + return nothing + end +end function _atexit(exitcode::Cint) - while !isempty(atexit_hooks) - f = popfirst!(atexit_hooks) + # Don't hold the lock around the iteration, just in case any other thread executing in + # parallel tries to register a new atexit hook while this is running. We don't want to + # block that thread from proceeding, and we can allow it to register its hook which we + # will immediately run here. + while true + local f + Base.@lock _atexit_hooks_lock begin + # If this is the last iteration, atomically disable atexit hooks to prevent + # someone from registering a hook that will never be run. + # (We do this inside the loop, so that it is atomic: no one can have registered + # a hook that never gets run, and we run all the hooks we know about until + # the vector is empty.) + if isempty(atexit_hooks) + global _atexit_hooks_finished = true + break + end + + f = popfirst!(atexit_hooks) + end try if hasmethod(f, (Cint,)) f(exitcode) diff --git a/base/io.jl b/base/io.jl index 9c00c57576bac..60a24831587cb 100644 --- a/base/io.jl +++ b/base/io.jl @@ -224,7 +224,6 @@ read(stream, ::Type{Union{}}, slurp...; kwargs...) = error("cannot read a value """ write(io::IO, x) - write(filename::AbstractString, x) Write the canonical binary representation of a value to the given I/O stream or file. Return the number of bytes written into the stream. See also [`print`](@ref) to @@ -458,17 +457,29 @@ wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe # Exception-safe wrappers (io = open(); try f(io) finally close(io)) + +""" + write(filename::AbstractString, content) + +Write the canonical binary representation of `content` to a file, which will be created if it does not exist yet or overwritten if it does exist. + +Return the number of bytes written into the file. +""" write(filename::AbstractString, a1, args...) = open(io->write(io, a1, args...), convert(String, filename)::String, "w") """ - read(filename::AbstractString, args...) + read(filename::AbstractString) -Open a file and read its contents. `args` is passed to `read`: this is equivalent to -`open(io->read(io, args...), filename)`. +Read the entire contents of a file as a `Vector{UInt8}`. read(filename::AbstractString, String) Read the entire contents of a file as a string. + + read(filename::AbstractString, args...) + +Open a file and read its contents. `args` is passed to `read`: this is equivalent to +`open(io->read(io, args...), filename)`. """ read(filename::AbstractString, args...) = open(io->read(io, args...), convert(String, filename)::String) diff --git a/base/libc.jl b/base/libc.jl index 82286fbf01af6..99e8dce6b87e5 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -6,10 +6,13 @@ Interface to libc, the C standard library. """ Libc import Base: transcode, windowserror, show +# these need to be defined seperately for bootstrapping but belong to Libc +import Base: memcpy, memmove, memset, memcmp import Core.Intrinsics: bitcast -export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc, - errno, strerror, flush_cstdio, systemsleep, time, transcode +export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, memcpy, + memmove, memset, calloc, realloc, errno, strerror, flush_cstdio, systemsleep, time, + transcode if Sys.iswindows() export GetLastError, FormatMessage end @@ -336,7 +339,6 @@ if Sys.iswindows() end ## Memory related ## - """ free(addr::Ptr) @@ -346,6 +348,8 @@ be freed by the free functions defined in that library, to avoid assertion failu multiple `libc` libraries exist on the system. """ free(p::Ptr) = ccall(:free, Cvoid, (Ptr{Cvoid},), p) +free(p::Cstring) = free(convert(Ptr{UInt8}, p)) +free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p)) """ malloc(size::Integer) -> Ptr{Cvoid} @@ -371,8 +375,7 @@ Call `calloc` from the C standard library. """ calloc(num::Integer, size::Integer) = ccall(:calloc, Ptr{Cvoid}, (Csize_t, Csize_t), num, size) -free(p::Cstring) = free(convert(Ptr{UInt8}, p)) -free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p)) + ## Random numbers ## diff --git a/base/loading.jl b/base/loading.jl index 9cc2f13752dfb..1ea4412ecc68f 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -491,12 +491,12 @@ end """ pkgdir(m::Module[, paths::String...]) -Return the root directory of the package that imported module `m`, -or `nothing` if `m` was not imported from a package. Optionally further +Return the root directory of the package that declared module `m`, +or `nothing` if `m` was not declared in a package. Optionally further path component strings can be provided to construct a path within the package root. -To get the root directory of the package that imported the current module +To get the root directory of the package that implements the current module the form `pkgdir(@__MODULE__)` can be used. ```julia-repl @@ -627,7 +627,7 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing} if exts !== nothing # Check if `where` is an extension of the project - if where.name in keys(exts) && where.uuid == uuid5(proj.uuid, where.name) + if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name) # Extensions can load weak deps... weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing} if weakdeps !== nothing @@ -1209,7 +1209,9 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}} extensions === nothing && return weakdeps === nothing && return - return _insert_extension_triggers(pkg, extensions, weakdeps) + if weakdeps isa Dict{String, Any} + return _insert_extension_triggers(pkg, extensions, weakdeps) + end end # Now look in manifest @@ -1231,7 +1233,7 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi return _insert_extension_triggers(pkg, extensions, weakdeps) end - d_weakdeps = Dict{String, String}() + d_weakdeps = Dict{String, Any}() for (dep_name, entries) in d dep_name in weakdeps || continue entries::Vector{Any} @@ -1251,8 +1253,9 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi return nothing end -function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, <:Any}, weakdeps::Dict{String, <:Any}) - for (ext::String, triggers::Union{String, Vector{String}}) in extensions +function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, weakdeps::Dict{String, Any}) + for (ext, triggers) in extensions + triggers = triggers::Union{String, Vector{String}} triggers isa String && (triggers = [triggers]) id = PkgId(uuid5(parent.uuid, ext), ext) if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id) @@ -1361,6 +1364,65 @@ end # End extensions +# should sync with the types of arguments of `stale_cachefile` +const StaleCacheKey = Tuple{Base.PkgId, UInt128, String, String} + +""" + Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false) + +Returns whether a given PkgId within the active project is precompiled. + +By default this check observes the same approach that code loading takes +with respect to when different versions of dependencies are currently loaded +to that which is expected. To ignore loaded modules and answer as if in a +fresh julia session specify `ignore_loaded=true`. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. +""" +function isprecompiled(pkg::PkgId; + ignore_loaded::Bool=false, + stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(), + cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg), + sourcepath::Union{String,Nothing}=Base.locate_package(pkg) + ) + isnothing(sourcepath) && error("Cannot locate source for $(repr(pkg))") + for path_to_try in cachepaths + staledeps = stale_cachefile(sourcepath, path_to_try, ignore_loaded = true) + if staledeps === true + continue + end + staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}} + # finish checking staledeps module graph + for i in 1:length(staledeps) + dep = staledeps[i] + dep isa Module && continue + modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128} + modpaths = find_all_in_cache_path(modkey) + for modpath_to_try in modpaths::Vector{String} + stale_cache_key = (modkey, modbuild_id, modpath, modpath_to_try)::StaleCacheKey + if get!(() -> stale_cachefile(stale_cache_key...; ignore_loaded) === true, + stale_cache, stale_cache_key) + continue + end + @goto check_next_dep + end + @goto check_next_path + @label check_next_dep + end + try + # update timestamp of precompilation file so that it is the first to be tried by code loading + touch(path_to_try) + catch ex + # file might be read-only and then we fail to update timestamp, which is fine + ex isa IOError || rethrow() + end + return true + @label check_next_path + end + return false +end + # loads a precompile cache file, after checking stale_cachefile tests function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128) assert_havelock(require_lock) @@ -1650,6 +1712,8 @@ end # require always works in Main scope and loads files from node 1 const toplevel_load = Ref(true) +const _require_world_age = Ref{UInt}(typemax(UInt)) + """ require(into::Module, module::Symbol) @@ -1672,6 +1736,14 @@ For more details regarding code loading, see the manual sections on [modules](@r [parallel computing](@ref code-availability). """ function require(into::Module, mod::Symbol) + if _require_world_age[] != typemax(UInt) + Base.invoke_in_world(_require_world_age[], __require, into, mod) + else + @invokelatest __require(into, mod) + end +end + +function __require(into::Module, mod::Symbol) @lock require_lock begin LOADING_CACHE[] = LoadingCache() try @@ -1721,6 +1793,14 @@ require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey) const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL") function _require_prelocked(uuidkey::PkgId, env=nothing) + if _require_world_age[] != typemax(UInt) + Base.invoke_in_world(_require_world_age[], __require_prelocked, uuidkey, env) + else + @invokelatest __require_prelocked(uuidkey, env) + end +end + +function __require_prelocked(uuidkey::PkgId, env=nothing) assert_havelock(require_lock) if !root_module_exists(uuidkey) newm = _require(uuidkey, env) @@ -1881,15 +1961,24 @@ function _require(pkg::PkgId, env=nothing) @goto load_from_cache end # spawn off a new incremental pre-compile task for recursive `require` calls - cachefile = compilecache(pkg, path) - if isa(cachefile, Exception) + cachefile_or_module = maybe_cachefile_lock(pkg, path) do + # double-check now that we have lock + m = _require_search_from_serialized(pkg, path, UInt128(0)) + m isa Module && return m + compilecache(pkg, path) + end + cachefile_or_module isa Module && return cachefile_or_module::Module + cachefile = cachefile_or_module + if isnothing(cachefile) # maybe_cachefile_lock returns nothing if it had to wait for another process + @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search + elseif isa(cachefile, Exception) if precompilableerror(cachefile) verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug @logmsg verbosity "Skipping precompilation since __precompile__(false). Importing $pkg." else @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m end - # fall-through to loading the file locally + # fall-through to loading the file locally if not incremental else cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}} m = _tryrequire_from_serialized(pkg, cachefile, ocachefile) @@ -1899,6 +1988,10 @@ function _require(pkg::PkgId, env=nothing) return m end end + if JLOptions().incremental != 0 + # during incremental precompilation, this should be fail-fast + throw(PrecompilableError()) + end end end @@ -2192,14 +2285,14 @@ function compilecache_dir(pkg::PkgId) return joinpath(DEPOT_PATH[1], entrypath) end -function compilecache_path(pkg::PkgId, prefs_hash::UInt64)::String +function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=something(Base.active_project(), ""))::String entrypath, entryfile = cache_file_entry(pkg) cachepath = joinpath(DEPOT_PATH[1], entrypath) isdir(cachepath) || mkpath(cachepath) if pkg.uuid === nothing abspath(cachepath, entryfile) * ".ji" else - crc = _crc32c(something(Base.active_project(), "")) + crc = _crc32c(project) crc = _crc32c(unsafe_string(JLOptions().image_file), crc) crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc) crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc) @@ -2784,6 +2877,44 @@ function show(io::IO, cf::CacheFlags) print(io, ", opt_level = ", cf.opt_level) end +# Set by FileWatching.__init__() +global mkpidlock_hook +global trymkpidlock_hook +global parse_pidfile_hook + +# The preferences hash is only known after precompilation so just assume no preferences. +# Also ignore the active project, which means that if all other conditions are equal, +# the same package cannot be precompiled from different projects and/or different preferences at the same time. +compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile" + +# Allows processes to wait if another process is precompiling a given source already. +# The lock file is deleted and precompilation will proceed after `stale_age` seconds if +# - the locking process no longer exists +# - the lock is held by another host, since processes cannot be checked remotely +# or after `stale_age * 25` seconds if the process does still exist. +function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300) + if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook) + pidfile = compilecache_pidfile_path(pkg) + cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age) + if cachefile === false + pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile) + verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug + if isempty(hostname) || hostname == gethostname() + @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg" + else + @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg" + end + # wait until the lock is available, but don't actually acquire it + # returning nothing indicates a process waited for another + return invokelatest(mkpidlock_hook, Returns(nothing), pidfile; stale_age) + end + return cachefile + else + # for packages loaded before FileWatching.__init__() + f() + end +end + # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey # otherwise returns the list of dependencies to also check @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false) diff --git a/base/meta.jl b/base/meta.jl index b0e0dc371b26c..31fef1b9697e3 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -187,8 +187,11 @@ expression. """ struct ParseError <: Exception msg::String + detail::Any end +ParseError(msg::AbstractString) = ParseError(msg, nothing) + function _parse_string(text::AbstractString, filename::AbstractString, lineno::Integer, index::Integer, options) if index < 1 || index > ncodeunits(text) + 1 @@ -199,7 +202,7 @@ function _parse_string(text::AbstractString, filename::AbstractString, end """ - parse(str, start; greedy=true, raise=true, depwarn=true) + parse(str, start; greedy=true, raise=true, depwarn=true, filename="none") Parse the expression string and return an expression (which could later be passed to eval for execution). `start` is the code unit index into `str` of the @@ -211,6 +214,7 @@ return `Expr(:incomplete, "(error message)")`. If `raise` is `true` (default), syntax errors other than incomplete expressions will raise an error. If `raise` is `false`, `parse` will return an expression that will raise an error upon evaluation. If `depwarn` is `false`, deprecation warnings will be suppressed. +The `filename` argument is used to display diagnostics when an error is raised. ```jldoctest julia> Meta.parse("(α, β) = 3, 5", 1) # start of string @@ -229,43 +233,50 @@ julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false) (3, 13) ``` """ -function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true, - depwarn::Bool=true) - ex, pos = _parse_string(str, "none", 1, pos, greedy ? :statement : :atom) - if raise && isa(ex,Expr) && ex.head === :error - throw(ParseError(ex.args[1])) +function parse(str::AbstractString, pos::Integer; + filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true) + ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom) + if raise && isexpr(ex, :error) + err = ex.args[1] + if err isa String + err = ParseError(err) # For flisp parser + end + throw(err) end return ex, pos end """ - parse(str; raise=true, depwarn=true) + parse(str; raise=true, depwarn=true, filename="none") Parse the expression string greedily, returning a single expression. An error is thrown if there are additional characters after the first expression. If `raise` is `true` (default), syntax errors will raise an error; otherwise, `parse` will return an expression that will raise an error upon evaluation. If `depwarn` is `false`, deprecation warnings will be -suppressed. +suppressed. The `filename` argument is used to display diagnostics when an error is raised. -```jldoctest +```jldoctest; filter=r"(?<=Expr\\(:error).*|(?<=Expr\\(:incomplete).*" julia> Meta.parse("x = 3") :(x = 3) -julia> Meta.parse("x = ") -:($(Expr(:incomplete, "incomplete: premature end of input"))) - julia> Meta.parse("1.0.2") -ERROR: Base.Meta.ParseError("invalid numeric constant \\\"1.0.\\\"") -Stacktrace: +ERROR: ParseError: +# Error @ none:1:1 +1.0.2 +└──┘ ── invalid numeric constant [...] julia> Meta.parse("1.0.2"; raise = false) -:($(Expr(:error, "invalid numeric constant \"1.0.\""))) +:(\$(Expr(:error, "invalid numeric constant \"1.0.\""))) + +julia> Meta.parse("x = ") +:(\$(Expr(:incomplete, "incomplete: premature end of input"))) ``` """ -function parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) - ex, pos = parse(str, 1, greedy=true, raise=raise, depwarn=depwarn) - if isa(ex,Expr) && ex.head === :error +function parse(str::AbstractString; + filename="none", raise::Bool=true, depwarn::Bool=true) + ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn) + if isexpr(ex, :error) return ex end if pos <= ncodeunits(str) diff --git a/base/missing.jl b/base/missing.jl index 4544c2b38c460..f6f5fe507260b 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -12,7 +12,7 @@ where it is not supported. The error message, in the `msg` field may provide more specific details. """ struct MissingException <: Exception - msg::String + msg::AbstractString end showerror(io::IO, ex::MissingException) = @@ -136,11 +136,14 @@ max(::Missing, ::Missing) = missing max(::Missing, ::Any) = missing max(::Any, ::Missing) = missing +missing_conversion_msg(@nospecialize T) = + LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead") + # Rounding and related functions round(::Missing, ::RoundingMode=RoundNearest; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing round(::Type{>:Missing}, ::Missing, ::RoundingMode=RoundNearest) = missing round(::Type{T}, ::Missing, ::RoundingMode=RoundNearest) where {T} = - throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead")) + throw(MissingException(missing_conversion_msg(T))) round(::Type{T}, x::Any, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r) # to fix ambiguities round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T>:Missing,Tr} = round(nonmissingtype_checked(T), x, r) @@ -151,8 +154,7 @@ for f in (:(ceil), :(floor), :(trunc)) @eval begin ($f)(::Missing; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing ($f)(::Type{>:Missing}, ::Missing) = missing - ($f)(::Type{T}, ::Missing) where {T} = - throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead")) + ($f)(::Type{T}, ::Missing) where {T} = throw(MissingException(missing_conversion_msg(T))) ($f)(::Type{T}, x::Any) where {T>:Missing} = $f(nonmissingtype_checked(T), x) # to fix ambiguities ($f)(::Type{T}, x::Rational) where {T>:Missing} = $f(nonmissingtype_checked(T), x) @@ -265,7 +267,7 @@ keys(itr::SkipMissing) = Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x)) @propagate_inbounds function getindex(itr::SkipMissing, I...) v = itr.x[I...] - v === missing && throw(MissingException("the value at index $I is missing")) + v === missing && throw(MissingException(LazyString("the value at index ", I, " is missing"))) v end diff --git a/base/mpfr.jl b/base/mpfr.jl index ff85fc6155df4..2e03018f7669f 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -19,6 +19,8 @@ import isone, big, _string_n, decompose, minmax, sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand + +using .Base.Libc import ..Rounding: rounding_raw, setrounding_raw import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp @@ -1140,7 +1142,7 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int} s.size = cld(x.prec, 8*sizeof(Limb)) # limbs b = s.size * sizeof(Limb) # bytes ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits - ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), s.d, x.d, b) # bytes + memcpy(s.d, x.d, b) s, x.exp - 8b, x.sign end diff --git a/base/multidimensional.jl b/base/multidimensional.jl index ce1b6c39adb43..ba4e6eb12695a 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -2,8 +2,8 @@ ### Multidimensional iterators module IteratorsMD - import .Base: eltype, length, size, first, last, in, getindex, setindex!, IndexStyle, - min, max, zero, oneunit, isless, eachindex, ndims, IteratorSize, + import .Base: eltype, length, size, first, last, in, getindex, setindex!, + min, max, zero, oneunit, isless, eachindex, convert, show, iterate, promote_rule import .Base: +, -, *, (:) @@ -342,8 +342,14 @@ module IteratorsMD # AbstractArray implementation Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices) - Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian() Base.has_offset_axes(iter::CartesianIndices) = Base.has_offset_axes(iter.indices...) + @propagate_inbounds function isassigned(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R} + for i in 1:N + isassigned(iter.indices[i], I[i]) || return false + end + return true + end + # getindex for a 0D CartesianIndices is necessary for disambiguation @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R} CartesianIndex() @@ -383,10 +389,6 @@ module IteratorsMD getindex(c, C) end - ndims(R::CartesianIndices) = ndims(typeof(R)) - ndims(::Type{CartesianIndices{N}}) where {N} = N - ndims(::Type{CartesianIndices{N,TT}}) where {N,TT} = N - eachindex(::IndexCartesian, A::AbstractArray) = CartesianIndices(axes(A)) @inline function eachindex(::IndexCartesian, A::AbstractArray, B::AbstractArray...) @@ -395,10 +397,6 @@ module IteratorsMD CartesianIndices(axsA) end - eltype(::Type{CartesianIndices{N}}) where {N} = CartesianIndex{N} - eltype(::Type{CartesianIndices{N,TT}}) where {N,TT} = CartesianIndex{N} - IteratorSize(::Type{<:CartesianIndices{N}}) where {N} = Base.HasShape{N}() - @inline function iterate(iter::CartesianIndices) iterfirst = first(iter) if !all(map(in, iterfirst.I, iter.indices)) @@ -1565,7 +1563,28 @@ end end isassigned(a::AbstractArray, i::CartesianIndex) = isassigned(a, Tuple(i)...) -isassigned(a::AbstractArray, i::Union{Integer, CartesianIndex}...) = isassigned(a, CartesianIndex(i)) +function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...) + isa(i, Tuple{Vararg{Int}}) || return isassigned(A, CartesianIndex(i...)) + @boundscheck checkbounds(Bool, A, i...) || return false + S = IndexStyle(A) + ninds = length(i) + if (isa(S, IndexLinear) && ninds != 1) + return @inbounds isassigned(A, _to_linear_index(A, i...)) + elseif (!isa(S, IndexLinear) && ninds != ndims(A)) + return @inbounds isassigned(A, _to_subscript_indices(A, i...)...) + else + try + A[i...] + true + catch e + if isa(e, BoundsError) || isa(e, UndefRefError) + return false + else + rethrow() + end + end + end +end ## permutedims diff --git a/base/namedtuple.jl b/base/namedtuple.jl index 320d068205a3d..e489508bc55ea 100644 --- a/base/namedtuple.jl +++ b/base/namedtuple.jl @@ -133,12 +133,6 @@ function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple} end end -# Like NamedTuple{names, T} as a constructor, but omits the additional -# `convert` call, when the types are known to match the fields -@eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple) - $(Expr(:splatnew, :T, :args)) -end - function NamedTuple{names}(nt::NamedTuple) where {names} if @generated idx = Int[ fieldindex(nt, names[n]) for n in 1:length(names) ] @@ -161,6 +155,12 @@ NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTu end # if Base +# Like NamedTuple{names, T} as a constructor, but omits the additional +# `convert` call, when the types are known to match the fields +@eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple) + $(Expr(:splatnew, :T, :args)) +end + length(t::NamedTuple) = nfields(t) iterate(t::NamedTuple, iter=1) = iter > nfields(t) ? nothing : (getfield(t, iter), iter + 1) rest(t::NamedTuple) = t @@ -495,6 +495,65 @@ macro NamedTuple(ex) return :(NamedTuple{($(vars...),), Tuple{$(types...)}}) end +""" + @Kwargs{key1::Type1, key2::Type2, ...} + +This macro gives a convenient way to construct the type representation of keyword arguments +from the same syntax as [`@NamedTuple`](@ref). +For example, when we have a function call like `func([positional arguments]; kw1=1.0, kw2="2")`, +we can use this macro to construct the internal type representation of the keyword arguments +as `@Kwargs{kw1::Float64, kw2::String}`. +The macro syntax is specifically designed to simplify the signature type of a keyword method +when it is printed in the stack trace view. + +```julia +julia> @Kwargs{init::Int} # the internal representation of keyword arguments +Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}} + +julia> sum("julia"; init=1) +ERROR: MethodError: no method matching +(::Char, ::Char) + +Closest candidates are: + +(::Any, ::Any, ::Any, ::Any...) + @ Base operators.jl:585 + +(::Integer, ::AbstractChar) + @ Base char.jl:247 + +(::T, ::Integer) where T<:AbstractChar + @ Base char.jl:237 + +Stacktrace: + [1] add_sum(x::Char, y::Char) + @ Base ./reduce.jl:24 + [2] BottomRF + @ Base ./reduce.jl:86 [inlined] + [3] _foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, init::Int64, itr::String) + @ Base ./reduce.jl:62 + [4] foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, nt::Int64, itr::String) + @ Base ./reduce.jl:48 [inlined] + [5] mapfoldl_impl(f::typeof(identity), op::typeof(Base.add_sum), nt::Int64, itr::String) + @ Base ./reduce.jl:44 [inlined] + [6] mapfoldl(f::typeof(identity), op::typeof(Base.add_sum), itr::String; init::Int64) + @ Base ./reduce.jl:175 [inlined] + [7] mapreduce(f::typeof(identity), op::typeof(Base.add_sum), itr::String; kw::@Kwargs{init::Int64}) + @ Base ./reduce.jl:307 [inlined] + [8] sum(f::typeof(identity), a::String; kw::@Kwargs{init::Int64}) + @ Base ./reduce.jl:535 [inlined] + [9] sum(a::String; kw::@Kwargs{init::Int64}) + @ Base ./reduce.jl:564 [inlined] + [10] top-level scope + @ REPL[12]:1 +``` + +!!! compat "Julia 1.10" + This macro is available as of Julia 1.10. +""" +macro Kwargs(ex) + return :(let + NT = @NamedTuple $ex + Base.Pairs{keytype(NT),eltype(NT),typeof(NT.parameters[1]),NT} + end) +end + @constprop :aggressive function split_rest(t::NamedTuple{names}, n::Int, st...) where {names} _check_length_split_rest(length(t), n) names_front, names_last_n = split_rest(names, n, st...) diff --git a/base/operators.jl b/base/operators.jl index 5893c5944a3a0..3f51be737ca5c 100644 --- a/base/operators.jl +++ b/base/operators.jl @@ -79,7 +79,7 @@ also implement [`<`](@ref) to ensure consistency of comparisons. == """ - isequal(x, y) + isequal(x, y) -> Bool Similar to [`==`](@ref), except for the treatment of floating point numbers and of missing values. `isequal` treats all floating-point `NaN` values as equal diff --git a/base/options.jl b/base/options.jl index 23a3dbc802b5f..a94936391fa8d 100644 --- a/base/options.jl +++ b/base/options.jl @@ -11,7 +11,8 @@ struct JLOptions cpu_target::Ptr{UInt8} nthreadpools::Int16 nthreads::Int16 - ngcthreads::Int16 + nmarkthreads::Int16 + nsweepthreads::Int8 nthreads_per_pool::Ptr{Int16} nprocs::Int32 machine_file::Ptr{UInt8} @@ -54,6 +55,7 @@ struct JLOptions rr_detach::Int8 strip_metadata::Int8 strip_ir::Int8 + permalloc_pkgimg::Int8 heap_size_hint::UInt64 end diff --git a/base/osutils.jl b/base/osutils.jl index 1f5a708d30c7a..95d0562540e5a 100644 --- a/base/osutils.jl +++ b/base/osutils.jl @@ -16,7 +16,7 @@ macro static(ex) @label loop hd = ex.head if hd ∈ (:if, :elseif, :&&, :||) - cond = Core.eval(__module__, ex.args[1]) + cond = Core.eval(__module__, ex.args[1])::Bool if xor(cond, hd === :||) return esc(ex.args[2]) elseif length(ex.args) == 3 diff --git a/base/pair.jl b/base/pair.jl index f34ebb89c80da..1953dc2886053 100644 --- a/base/pair.jl +++ b/base/pair.jl @@ -44,7 +44,7 @@ indexed_iterate(p::Pair, i::Int, state=1) = (getfield(p, i), i + 1) hash(p::Pair, h::UInt) = hash(p.second, hash(p.first, h)) ==(p::Pair, q::Pair) = (p.first==q.first) & (p.second==q.second) -isequal(p::Pair, q::Pair) = isequal(p.first,q.first) & isequal(p.second,q.second) +isequal(p::Pair, q::Pair) = isequal(p.first,q.first)::Bool & isequal(p.second,q.second)::Bool isless(p::Pair, q::Pair) = ifelse(!isequal(p.first,q.first), isless(p.first,q.first), isless(p.second,q.second)) diff --git a/base/parse.jl b/base/parse.jl index d800e54258b0d..f6a93e56369b7 100644 --- a/base/parse.jl +++ b/base/parse.jl @@ -210,9 +210,11 @@ function tryparse_internal(::Type{Bool}, sbuff::AbstractString, len = endpos - startpos + 1 if sbuff isa Union{String, SubString{String}} p = pointer(sbuff) + startpos - 1 - GC.@preserve sbuff begin - (len == 4) && (0 == _memcmp(p, "true", 4)) && (return true) - (len == 5) && (0 == _memcmp(p, "false", 5)) && (return false) + truestr = "true" + falsestr = "false" + GC.@preserve sbuff truestr falsestr begin + (len == 4) && (0 == memcmp(p, unsafe_convert(Ptr{UInt8}, truestr), 4)) && (return true) + (len == 5) && (0 == memcmp(p, unsafe_convert(Ptr{UInt8}, falsestr), 5)) && (return false) end else (len == 4) && (SubString(sbuff, startpos:startpos+3) == "true") && (return true) diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl index 80685332a85dc..41c3636b40216 100644 --- a/base/permuteddimsarray.jl +++ b/base/permuteddimsarray.jl @@ -78,6 +78,12 @@ end val end +function Base.isassigned(A::PermutedDimsArray{T,N,perm,iperm}, I::Vararg{Int,N}) where {T,N,perm,iperm} + @boundscheck checkbounds(Bool, A, I...) || return false + @inbounds x = isassigned(A.parent, genperm(I, iperm)...) + x +end + @inline genperm(I::NTuple{N,Any}, perm::Dims{N}) where {N} = ntuple(d -> I[perm[d]], Val(N)) @inline genperm(I, perm::AbstractVector{Int}) = genperm(I, (perm...,)) diff --git a/base/pointer.jl b/base/pointer.jl index 62b34dd06d368..a47f1e38edb9b 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -98,32 +98,147 @@ unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}}, """ unsafe_load(p::Ptr{T}, i::Integer=1) + unsafe_load(p::Ptr{T}, order::Symbol) + unsafe_load(p::Ptr{T}, i::Integer, order::Symbol) Load a value of type `T` from the address of the `i`th element (1-indexed) starting at `p`. -This is equivalent to the C expression `p[i-1]`. +This is equivalent to the C expression `p[i-1]`. Optionally, an atomic memory ordering can +be provided. The `unsafe` prefix on this function indicates that no validation is performed on the pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring that referenced memory is not freed or garbage collected while invoking this function. Incorrect usage may segfault your program or return garbage answers. Unlike C, dereferencing memory region allocated as different type may be valid provided that the types are compatible. + +!!! compat "Julia 1.10" + The `order` argument is available as of Julia 1.10. + +See also: [`atomic`](@ref) """ unsafe_load(p::Ptr, i::Integer=1) = pointerref(p, Int(i), 1) +unsafe_load(p::Ptr, order::Symbol) = atomic_pointerref(p, order) +function unsafe_load(p::Ptr, i::Integer, order::Symbol) + unsafe_load(p + (elsize(typeof(p)) * (Int(i) - 1)), order) +end """ unsafe_store!(p::Ptr{T}, x, i::Integer=1) + unsafe_store!(p::Ptr{T}, x, order::Symbol) + unsafe_store!(p::Ptr{T}, x, i::Integer, order::Symbol) Store a value of type `T` to the address of the `i`th element (1-indexed) starting at `p`. -This is equivalent to the C expression `p[i-1] = x`. +This is equivalent to the C expression `p[i-1] = x`. Optionally, an atomic memory ordering +can be provided. The `unsafe` prefix on this function indicates that no validation is performed on the pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring that referenced memory is not freed or garbage collected while invoking this function. Incorrect usage may segfault your program. Unlike C, storing memory region allocated as different type may be valid provided that that the types are compatible. + +!!! compat "Julia 1.10" + The `order` argument is available as of Julia 1.10. + +See also: [`atomic`](@ref) """ unsafe_store!(p::Ptr{Any}, @nospecialize(x), i::Integer=1) = pointerset(p, x, Int(i), 1) unsafe_store!(p::Ptr{T}, x, i::Integer=1) where {T} = pointerset(p, convert(T,x), Int(i), 1) +unsafe_store!(p::Ptr{T}, x, order::Symbol) where {T} = atomic_pointerset(p, x isa T ? x : convert(T,x), order) +function unsafe_store!(p::Ptr, x, i::Integer, order::Symbol) + unsafe_store!(p + (elsize(typeof(p)) * (Int(i) - 1)), x, order) +end + +""" + unsafe_modify!(p::Ptr{T}, op, x, [order::Symbol]) -> Pair + +These atomically perform the operations to get and set a memory address after applying +the function `op`. If supported by the hardware (for example, atomic increment), this may be +optimized to the appropriate hardware instruction, otherwise its execution will be +similar to: + + y = unsafe_load(p) + z = op(y, x) + unsafe_store!(p, z) + return y => z + +The `unsafe` prefix on this function indicates that no validation is performed on the +pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring +that referenced memory is not freed or garbage collected while invoking this function. +Incorrect usage may segfault your program. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +See also: [`modifyproperty!`](@ref Base.modifyproperty!), [`atomic`](@ref) +""" +function unsafe_modify!(p::Ptr, op, x, order::Symbol=:not_atomic) + return atomic_pointermodify(p, op, x, order) +end + +""" + unsafe_replace!(p::Ptr{T}, expected, desired, + [success_order::Symbol[, fail_order::Symbol=success_order]]) -> (; old, success::Bool) + +These atomically perform the operations to get and conditionally set a memory address to +a given value. If supported by the hardware, this may be optimized to the appropriate +hardware instruction, otherwise its execution will be similar to: + + y = unsafe_load(p, fail_order) + ok = y === expected + if ok + unsafe_store!(p, desired, success_order) + end + return (; old = y, success = ok) + +The `unsafe` prefix on this function indicates that no validation is performed on the +pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring +that referenced memory is not freed or garbage collected while invoking this function. +Incorrect usage may segfault your program. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +See also: [`replaceproperty!`](@ref Base.replaceproperty!), [`atomic`](@ref) +""" +function unsafe_replace!(p::Ptr{T}, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) where {T} + @inline + xT = desired isa T ? desired : convert(T, desired) + return atomic_pointerreplace(p, expected, xT, success_order, fail_order) +end +function unsafe_replace!(p::Ptr{Any}, @nospecialize(expected), @nospecialize(desired), success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) + return atomic_pointerreplace(p, expected, desired, success_order, fail_order) +end + +""" + unsafe_swap!(p::Ptr{T}, x, [order::Symbol]) + +These atomically perform the operations to simultaneously get and set a memory address. +If supported by the hardware, this may be optimized to the appropriate hardware +instruction, otherwise its execution will be similar to: + + y = unsafe_load(p) + unsafe_store!(p, x) + return y + +The `unsafe` prefix on this function indicates that no validation is performed on the +pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring +that referenced memory is not freed or garbage collected while invoking this function. +Incorrect usage may segfault your program. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +See also: [`swapproperty!`](@ref Base.swapproperty!), [`atomic`](@ref) +""" +function unsafe_swap!(p::Ptr{Any}, x, order::Symbol=:not_atomic) + return atomic_pointerswap(p, x, order) +end +function unsafe_swap!(p::Ptr{T}, x, order::Symbol=:not_atomic) where {T} + @inline + xT = x isa T ? x : convert(T, x) + return atomic_pointerswap(p, xT, order) +end # convert a raw Ptr to an object reference, and vice-versa """ diff --git a/base/range.jl b/base/range.jl index f7dc35703a196..6b701d31b0358 100644 --- a/base/range.jl +++ b/base/range.jl @@ -404,7 +404,11 @@ struct UnitRange{T<:Real} <: AbstractUnitRange{T} end UnitRange{T}(start, stop) where {T<:Real} = UnitRange{T}(convert(T, start), convert(T, stop)) UnitRange(start::T, stop::T) where {T<:Real} = UnitRange{T}(start, stop) -UnitRange(start, stop) = UnitRange(promote(start, stop)...) +function UnitRange(start, stop) + startstop_promoted = promote(start, stop) + not_sametype((start, stop), startstop_promoted) + UnitRange(startstop_promoted...) +end # if stop and start are integral, we know that their difference is a multiple of 1 unitrange_last(start::Integer, stop::Integer) = @@ -901,6 +905,8 @@ end ## indexing +isassigned(r::AbstractRange, i::Int) = firstindex(r) <= i <= lastindex(r) + _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start function getindex(v::UnitRange{T}, i::Integer) where T diff --git a/base/rational.jl b/base/rational.jl index 6ab022736388e..baca2397c42ff 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -173,10 +173,11 @@ julia> typeof(numerator(a)) BigInt ``` """ -function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer +function rationalize(::Type{T}, x::Union{AbstractFloat, Rational}, tol::Real) where T<:Integer if tol < 0 throw(ArgumentError("negative tolerance $tol")) end + T<:Unsigned && x < 0 && __throw_negate_unsigned() isnan(x) && return T(x)//one(T) isinf(x) && return unsafe_rational(x < 0 ? -one(T) : one(T), zero(T)) @@ -188,7 +189,6 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer a = trunc(x) r = x-a y = one(x) - tolx = oftype(x, tol) nt, t, tt = tolx, zero(tolx), tolx ia = np = nq = zero(T) @@ -233,10 +233,21 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer return p // q end end -rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)::Rational{T} +rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol) rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...) -rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re, kvs...)::Rational{T}, rationalize(T, x.im, kvs...)::Rational{T}) -rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re, kvs...), rationalize(Int, x.im, kvs...)) +rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re; kvs...), rationalize(T, x.im; kvs...)) +rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re; kvs...), rationalize(Int, x.im; kvs...)) +rationalize(::Type{T}, x::Rational; tol::Real = 0) where {T<:Integer} = rationalize(T, x, tol) +rationalize(x::Rational; kvs...) = x +rationalize(x::Integer; kvs...) = Rational(x) +function rationalize(::Type{T}, x::Integer; kvs...) where {T<:Integer} + if Base.hastypemax(T) # BigInt doesn't + x < typemin(T) && return unsafe_rational(-one(T), zero(T)) + x > typemax(T) && return unsafe_rational(one(T), zero(T)) + end + return Rational{T}(x) +end + """ numerator(x) @@ -545,11 +556,14 @@ function hash(x::Rational{<:BitInteger64}, h::UInt) pow = trailing_zeros(den) den >>= pow pow = -pow - if den == 1 && uabs(num) < UInt64(maxintfloat(Float64)) - return hash(ldexp(Float64(num),pow),h) + if den == 1 + if uabs(num) < UInt64(maxintfloat(Float64)) + return hash(ldexp(Float64(num),pow),h) + end + else + h = hash_integer(den, h) end end - h = hash_integer(den, h) h = hash_integer(pow, h) h = hash_integer(num, h) return h diff --git a/base/reflection.jl b/base/reflection.jl index 97f1ed14c6729..bbcd6cad27128 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -820,9 +820,19 @@ julia> Base.fieldindex(Foo, :z, false) ``` """ function fieldindex(T::DataType, name::Symbol, err::Bool=true) + return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name) +end + +function _fieldindex_maythrow(T::DataType, name::Symbol) @_foldable_meta @noinline - return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, err)+1) + return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1) +end + +function _fieldindex_nothrow(T::DataType, name::Symbol) + @_total_meta + @noinline + return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1) end function fieldindex(t::UnionAll, name::Symbol, err::Bool=true) @@ -1194,6 +1204,7 @@ struct CodegenParams gnu_pubnames::Cint debug_info_kind::Cint safepoint_on_entry::Cint + gcstack_arg::Cint lookup::Ptr{Cvoid} @@ -1203,6 +1214,7 @@ struct CodegenParams prefer_specsig::Bool=false, gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(), safepoint_on_entry::Bool=true, + gcstack_arg::Bool=true, lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})), generic_context = nothing) return new( @@ -1210,6 +1222,7 @@ struct CodegenParams Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, Cint(safepoint_on_entry), + Cint(gcstack_arg), lookup, generic_context) end end @@ -1247,8 +1260,10 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim # generator only has one method generator = method.generator isa(generator, Core.GeneratedFunctionStub) || return false - gen_mthds = _methods_by_ftype(Tuple{typeof(generator.gen), Vararg{Any}}, 1, method.primary_world) - (gen_mthds isa Vector && length(gen_mthds) == 1) || return false + tt = Tuple{typeof(generator.gen), Vararg{Any}} + gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world) + gen_mthds isa Vector || return false + length(gen_mthds) == 1 || return false generator_method = first(gen_mthds).method nsparams = length(sparams) @@ -1307,19 +1322,20 @@ generic function and type signature. # Keyword Arguments -- `optimize=true`: controls whether additional optimizations, such as inlining, are also applied. -- `debuginfo=:default`: controls the amount of code metadata present in the output, -possible options are `:source` or `:none`. +- `optimize::Bool = true`: optional, controls whether additional optimizations, + such as inlining, are also applied. +- `debuginfo::Symbol = :default`: optional, controls the amount of code metadata present + in the output, possible options are `:source` or `:none`. # Internal Keyword Arguments This section should be considered internal, and is only for who understands Julia compiler internals. -- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up methods, -use current world age if not specified. -- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to use, -use the native interpreter Julia uses if not specified. +- `world::UInt = Base.get_world_counter()`: optional, controls the world age to use + when looking up methods, use current world age if not specified. +- `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`: + optional, controls the abstract interpreter to use, use the native interpreter if not specified. # Example @@ -1334,16 +1350,12 @@ julia> code_typed(+, (Float64, Float64)) ) => Float64 ``` """ -function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); - optimize=true, - debuginfo::Symbol=:default, - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) +function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); kwargs...) if isa(f, Core.OpaqueClosure) - return code_typed_opaque_closure(f; optimize, debuginfo, interp) + return code_typed_opaque_closure(f; kwargs...) end tt = signature_type(f, types) - return code_typed_by_type(tt; optimize, debuginfo, world, interp) + return code_typed_by_type(tt; kwargs...) end # returns argument tuple type which is supposed to be used for `code_typed` and its family; @@ -1365,10 +1377,10 @@ Similar to [`code_typed`](@ref), except the argument is a tuple type describing a full signature to query. """ function code_typed_by_type(@nospecialize(tt::Type); - optimize=true, + optimize::Bool=true, debuginfo::Symbol=:default, - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) && error("code reflection cannot be used from generated functions") if @isdefined(IRShow) @@ -1380,7 +1392,7 @@ function code_typed_by_type(@nospecialize(tt::Type); throw(ArgumentError("'debuginfo' must be either :source or :none")) end tt = to_tuple_type(tt) - matches = _methods_by_ftype(tt, -1, world)::Vector + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector asts = [] for match in matches match = match::Core.MethodMatch @@ -1397,7 +1409,7 @@ function code_typed_by_type(@nospecialize(tt::Type); end function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure); - debuginfo::Symbol=:default, __...) + debuginfo::Symbol=:default, _...) ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions") m = oc.source if isa(m, Method) @@ -1424,14 +1436,15 @@ See also: [`code_typed`](@ref) This section should be considered internal, and is only for who understands Julia compiler internals. -- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up - methods, use current world age if not specified. -- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to - use, use the native interpreter Julia uses if not specified. -- `optimize_until`: optional, controls the optimization passes to run. If it is a string, - it specifies the name of the pass up to which the optimizer is run. If it is an integer, - it specifies the number of passes to run. If it is `nothing` (default), all passes are - run. +- `world::UInt = Base.get_world_counter()`: optional, controls the world age to use + when looking up methods, use current world age if not specified. +- `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`: + optional, controls the abstract interpreter to use, use the native interpreter if not specified. +- `optimize_until::Union{Integer,AbstractString,Nothing} = nothing`: optional, + controls the optimization passes to run. + If it is a string, it specifies the name of the pass up to which the optimizer is run. + If it is an integer, it specifies the number of passes to run. + If it is `nothing` (default), all passes are run. # Example @@ -1453,18 +1466,12 @@ julia> Base.code_ircode(+, (Float64, Int64); optimize_until = "compact 1") => Float64 ``` """ -function code_ircode( - @nospecialize(f), - @nospecialize(types = default_tt(f)); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world), - optimize_until::Union{Integer,AbstractString,Nothing} = nothing, -) +function code_ircode(@nospecialize(f), @nospecialize(types = default_tt(f)); kwargs...) if isa(f, Core.OpaqueClosure) error("OpaqueClosure not supported") end tt = signature_type(f, types) - return code_ircode_by_type(tt; world, interp, optimize_until) + return code_ircode_by_type(tt; kwargs...) end """ @@ -1475,14 +1482,14 @@ a full signature to query. """ function code_ircode_by_type( @nospecialize(tt::Type); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world), - optimize_until::Union{Integer,AbstractString,Nothing} = nothing, + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world), + optimize_until::Union{Integer,AbstractString,Nothing}=nothing, ) (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) && error("code reflection cannot be used from generated functions") tt = to_tuple_type(tt) - matches = _methods_by_ftype(tt, -1, world)::Vector + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector asts = [] for match in matches match = match::Core.MethodMatch @@ -1537,8 +1544,8 @@ julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},)) doing so will result in an error. """ function return_types(@nospecialize(f), @nospecialize(types=default_tt(f)); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) && error("code reflection cannot be used from generated functions") if isa(f, Core.OpaqueClosure) @@ -1552,7 +1559,9 @@ function return_types(@nospecialize(f), @nospecialize(types=default_tt(f)); return Any[Core.Compiler.widenconst(rt)] end rts = [] - for match in _methods(f, types, -1, world)::Vector + tt = signature_type(f, types) + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector + for match in matches match = match::Core.MethodMatch meth = func_for_method_checked(match.method, types, match.sparams) ty = Core.Compiler.typeinf_type(interp, meth, match.spec_types, match.sparams) @@ -1643,9 +1652,10 @@ function print_statement_costs(io::IO, @nospecialize(f), @nospecialize(t); kwarg end function print_statement_costs(io::IO, @nospecialize(tt::Type); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) - matches = _methods_by_ftype(tt, -1, world)::Vector + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) + tt = to_tuple_type(tt) + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector params = Core.Compiler.OptimizationParams(interp) cst = Int[] for match in matches @@ -2147,15 +2157,15 @@ end """ @invokelatest f(args...; kwargs...) -Provides a convenient way to call [`Base.invokelatest`](@ref). +Provides a convenient way to call [`invokelatest`](@ref). `@invokelatest f(args...; kwargs...)` will simply be expanded into `Base.invokelatest(f, args...; kwargs...)`. It also supports the following syntax: - `@invokelatest x.f` expands to `Base.invokelatest(getproperty, x, :f)` - `@invokelatest x.f = v` expands to `Base.invokelatest(setproperty!, x, :f, v)` -- `@invokelatest xs[i]` expands to `invoke(getindex, xs, i)` -- `@invokelatest xs[i] = v` expands to `invoke(setindex!, xs, v, i)` +- `@invokelatest xs[i]` expands to `Base.invokelatest(getindex, xs, i)` +- `@invokelatest xs[i] = v` expands to `Base.invokelatest(setindex!, xs, v, i)` ```jldoctest julia> @macroexpand @invokelatest f(x; kw=kwv) @@ -2177,8 +2187,11 @@ julia> @macroexpand @invokelatest xs[i] = v !!! compat "Julia 1.7" This macro requires Julia 1.7 or later. +!!! compat "Julia 1.9" + Prior to Julia 1.9, this macro was not exported, and was called as `Base.@invokelatest`. + !!! compat "Julia 1.10" - The additional syntax is supported as of Julia 1.10. + The additional `x.f` and `xs[i]` syntax requires Julia 1.10. """ macro invokelatest(ex) topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally diff --git a/base/refpointer.jl b/base/refpointer.jl index 0cb2df6d24bce..ad74763ff8286 100644 --- a/base/refpointer.jl +++ b/base/refpointer.jl @@ -83,6 +83,7 @@ else primitive type Cwstring 32 end end + ### General Methods for Ref{T} type eltype(x::Type{<:Ref{T}}) where {T} = @isdefined(T) ? T : Any diff --git a/base/regex.jl b/base/regex.jl index 400784e1b27d7..c8d66265e0784 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -167,7 +167,7 @@ abstract type AbstractMatch end """ RegexMatch <: AbstractMatch -A type representing a single match to a `Regex` found in a string. +A type representing a single match to a [`Regex`](@ref) found in a string. Typically created from the [`match`](@ref) function. The `match` field stores the substring of the entire matched string. diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 2fc246f86fa96..d33c127b78c76 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -387,8 +387,6 @@ end end end -@inline _memcpy!(dst, src, n) = ccall(:memcpy, Cvoid, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), dst, src, n) - @inline @propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT} # Make sure to match the scalar reinterpret if that is applicable if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0 @@ -434,7 +432,7 @@ end while nbytes_copied < sizeof(T) s[] = a.parent[ind_start + i, tailinds...] nb = min(sizeof(S) - sidx, sizeof(T)-nbytes_copied) - _memcpy!(tptr + nbytes_copied, sptr + sidx, nb) + memcpy(tptr + nbytes_copied, sptr + sidx, nb) nbytes_copied += nb sidx = 0 i += 1 @@ -574,7 +572,7 @@ end if sidx != 0 s[] = a.parent[ind_start + i, tailinds...] nb = min((sizeof(S) - sidx) % UInt, sizeof(T) % UInt) - _memcpy!(sptr + sidx, tptr, nb) + memcpy(sptr + sidx, tptr, nb) nbytes_copied += nb a.parent[ind_start + i, tailinds...] = s[] i += 1 @@ -583,7 +581,7 @@ end # Deal with the main body of elements while nbytes_copied < sizeof(T) && (sizeof(T) - nbytes_copied) > sizeof(S) nb = min(sizeof(S), sizeof(T) - nbytes_copied) - _memcpy!(sptr, tptr + nbytes_copied, nb) + memcpy(sptr, tptr + nbytes_copied, nb) nbytes_copied += nb a.parent[ind_start + i, tailinds...] = s[] i += 1 @@ -592,7 +590,7 @@ end if nbytes_copied < sizeof(T) s[] = a.parent[ind_start + i, tailinds...] nb = min(sizeof(S), sizeof(T) - nbytes_copied) - _memcpy!(sptr, tptr + nbytes_copied, nb) + memcpy(sptr, tptr + nbytes_copied, nb) a.parent[ind_start + i, tailinds...] = s[] end end @@ -653,8 +651,8 @@ end # Padding struct Padding - offset::Int - size::Int + offset::Int # 0-indexed offset of the next valid byte; sizeof(T) indicates trailing padding + size::Int # bytes of padding before a valid byte end function intersect(p1::Padding, p2::Padding) start = max(p1.offset, p2.offset) @@ -698,20 +696,24 @@ function iterate(cp::CyclePadding, state::Tuple) end """ - Compute the location of padding in a type. + Compute the location of padding in an isbits datatype. Recursive over the fields of that type. """ -function padding(T) - padding = Padding[] - last_end::Int = 0 +@assume_effects :foldable function padding(T::DataType, baseoffset::Int = 0) + pads = Padding[] + last_end::Int = baseoffset for i = 1:fieldcount(T) - offset = fieldoffset(T, i) + offset = baseoffset + Int(fieldoffset(T, i)) fT = fieldtype(T, i) + append!(pads, padding(fT, offset)) if offset != last_end - push!(padding, Padding(offset, offset-last_end)) + push!(pads, Padding(offset, offset-last_end)) end last_end = offset + sizeof(fT) end - padding + if 0 < last_end - baseoffset < sizeof(T) + push!(pads, Padding(baseoffset + sizeof(T), sizeof(T) - last_end + baseoffset)) + end + return Core.svec(pads...) end function CyclePadding(T::DataType) @@ -750,6 +752,124 @@ end return true end +@assume_effects :foldable function struct_subpadding(::Type{Out}, ::Type{In}) where {Out, In} + padding(Out) == padding(In) +end + +@assume_effects :foldable function packedsize(::Type{T}) where T + pads = padding(T) + return sizeof(T) - sum((p.size for p ∈ pads), init = 0) +end + +@assume_effects :foldable ispacked(::Type{T}) where T = isempty(padding(T)) + +function _copytopacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In} + writeoffset = 0 + for i ∈ 1:fieldcount(In) + readoffset = fieldoffset(In, i) + fT = fieldtype(In, i) + if ispacked(fT) + readsize = sizeof(fT) + memcpy(ptr_out + writeoffset, ptr_in + readoffset, readsize) + writeoffset += readsize + else # nested padded type + _copytopacked!(ptr_out + writeoffset, Ptr{fT}(ptr_in + readoffset)) + writeoffset += packedsize(fT) + end + end +end + +function _copyfrompacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In} + readoffset = 0 + for i ∈ 1:fieldcount(Out) + writeoffset = fieldoffset(Out, i) + fT = fieldtype(Out, i) + if ispacked(fT) + writesize = sizeof(fT) + memcpy(ptr_out + writeoffset, ptr_in + readoffset, writesize) + readoffset += writesize + else # nested padded type + _copyfrompacked!(Ptr{fT}(ptr_out + writeoffset), ptr_in + readoffset) + readoffset += packedsize(fT) + end + end +end + +""" + reinterpret(::Type{Out}, x::In) + +Reinterpret the valid non-padding bytes of an isbits value `x` as isbits type `Out`. + +Both types must have the same amount of non-padding bytes. This operation is guaranteed +to be reversible. + +```jldoctest +julia> reinterpret(NTuple{2, UInt8}, 0x1234) +(0x34, 0x12) + +julia> reinterpret(UInt16, (0x34, 0x12)) +0x1234 + +julia> reinterpret(Tuple{UInt16, UInt8}, (0x01, 0x0203)) +(0x0301, 0x02) +``` + +!!! warning + + Use caution if some combinations of bits in `Out` are not considered valid and would + otherwise be prevented by the type's constructors and methods. Unexpected behavior + may result without additional validation. +""" +@inline function reinterpret(::Type{Out}, x::In) where {Out, In} + isbitstype(Out) || throw(ArgumentError("Target type for `reinterpret` must be isbits")) + isbitstype(In) || throw(ArgumentError("Source type for `reinterpret` must be isbits")) + if isprimitivetype(Out) && isprimitivetype(In) + outsize = sizeof(Out) + insize = sizeof(In) + outsize == insize || + throw(ArgumentError("Sizes of types $Out and $In do not match; got $outsize \ + and $insize, respectively.")) + return bitcast(Out, x) + end + inpackedsize = packedsize(In) + outpackedsize = packedsize(Out) + inpackedsize == outpackedsize || + throw(ArgumentError("Packed sizes of types $Out and $In do not match; got $outpackedsize \ + and $inpackedsize, respectively.")) + in = Ref{In}(x) + out = Ref{Out}() + if struct_subpadding(Out, In) + # if packed the same, just copy + GC.@preserve in out begin + ptr_in = unsafe_convert(Ptr{In}, in) + ptr_out = unsafe_convert(Ptr{Out}, out) + memcpy(ptr_out, ptr_in, sizeof(Out)) + end + return out[] + else + # mismatched padding + GC.@preserve in out begin + ptr_in = unsafe_convert(Ptr{In}, in) + ptr_out = unsafe_convert(Ptr{Out}, out) + + if fieldcount(In) > 0 && ispacked(Out) + _copytopacked!(ptr_out, ptr_in) + elseif fieldcount(Out) > 0 && ispacked(In) + _copyfrompacked!(ptr_out, ptr_in) + else + packed = Ref{NTuple{inpackedsize, UInt8}}() + GC.@preserve packed begin + ptr_packed = unsafe_convert(Ptr{NTuple{inpackedsize, UInt8}}, packed) + _copytopacked!(ptr_packed, ptr_in) + _copyfrompacked!(ptr_out, ptr_packed) + end + end + end + return out[] + end +end + + # Reductions with IndexSCartesian2 function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted) where {F,OP,K} diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index 4037aff246a81..bcb47a9359392 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -226,6 +226,19 @@ end offset_if_vec(i::Integer, axs::Tuple{<:AbstractUnitRange}) = i + first(axs[1]) - 1 offset_if_vec(i::Integer, axs::Tuple) = i +@inline function isassigned(A::ReshapedArrayLF, index::Int) + @boundscheck checkbounds(Bool, A, index) || return false + @inbounds ret = isassigned(parent(A), index) + ret +end +@inline function isassigned(A::ReshapedArray{T,N}, indices::Vararg{Int, N}) where {T,N} + @boundscheck checkbounds(Bool, A, indices...) || return false + axp = axes(A.parent) + i = offset_if_vec(_sub2ind(size(A), indices...), axp) + I = ind2sub_rs(axp, A.mi, i) + @inbounds isassigned(A.parent, I...) +end + @inline function getindex(A::ReshapedArrayLF, index::Int) @boundscheck checkbounds(A, index) @inbounds ret = parent(A)[index] diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl index 81d1c41f4c19f..9b236caeb6ff1 100644 --- a/base/ryu/Ryu.jl +++ b/base/ryu/Ryu.jl @@ -1,5 +1,6 @@ module Ryu +using .Base.Libc import .Base: significand_bits, significand_mask, exponent_bits, exponent_mask, exponent_bias, exponent_max, uinttype include("utils.jl") diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl index f95c09d235e6d..aaa62ba33c703 100644 --- a/base/ryu/shortest.jl +++ b/base/ryu/shortest.jl @@ -363,10 +363,10 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, c1 = (c ÷ 100) << 1 d0 = (d % 100) << 1 d1 = (d ÷ 100) << 1 - memcpy(ptr, pos + olength - 2, ptr2, c0 + 1, 2) - memcpy(ptr, pos + olength - 4, ptr2, c1 + 1, 2) - memcpy(ptr, pos + olength - 6, ptr2, d0 + 1, 2) - memcpy(ptr, pos + olength - 8, ptr2, d1 + 1, 2) + memcpy(ptr + pos + olength - 3, ptr2 + c0, 2) + memcpy(ptr + pos + olength - 5, ptr2 + c1, 2) + memcpy(ptr + pos + olength - 7, ptr2 + d0, 2) + memcpy(ptr + pos + olength - 9, ptr2 + d1, 2) i += 8 end output2 = output % UInt32 @@ -375,14 +375,14 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, output2 = div(output2, UInt32(10000)) c0 = (c % 100) << 1 c1 = (c ÷ 100) << 1 - memcpy(ptr, pos + olength - i - 2, ptr2, c0 + 1, 2) - memcpy(ptr, pos + olength - i - 4, ptr2, c1 + 1, 2) + memcpy(ptr + pos + olength - i - 3, ptr2 + c0, 2) + memcpy(ptr + pos + olength - i - 5, ptr2 + c1, 2) i += 4 end if output2 >= 100 c = (output2 % UInt32(100)) << 1 output2 = div(output2, UInt32(100)) - memcpy(ptr, pos + olength - i - 2, ptr2, c + 1, 2) + memcpy(ptr + pos + olength - i - 3, ptr2 + c, 2) i += 2 end if output2 >= 10 @@ -425,7 +425,7 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, end else pointoff = olength - abs(nexp) - memmove(ptr, pos + pointoff + 1, ptr, pos + pointoff, olength - pointoff + 1) + memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, olength - pointoff + 1) buf[pos + pointoff] = decchar pos += olength + 1 precision -= olength @@ -470,11 +470,11 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, if exp2 >= 100 c = exp2 % 10 - memcpy(ptr, pos, ptr2, 2 * div(exp2, 10) + 1, 2) + memcpy(ptr + pos - 1, ptr2 + 2 * div(exp2, 10), 2) buf[pos + 2] = UInt8('0') + (c % UInt8) pos += 3 elseif exp2 >= 10 - memcpy(ptr, pos, ptr2, 2 * exp2 + 1, 2) + memcpy(ptr + pos - 1, ptr2 + 2 * exp2, 2) pos += 2 else if padexp diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl index 4fe0b7d397d07..f5a88c057e2b3 100644 --- a/base/ryu/utils.jl +++ b/base/ryu/utils.jl @@ -1,9 +1,6 @@ const MANTISSA_MASK = Base.significand_mask(Float64) const EXP_MASK = Base.exponent_mask(Float64) >> Base.significand_bits(Float64) -memcpy(d, doff, s, soff, n) = (ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), d + doff - 1, s + soff - 1, n); nothing) -memmove(d, doff, s, soff, n) = (ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), d + doff - 1, s + soff - 1, n); nothing) - # Note: these are smaller than the values given in Figure 4 from the paper # see https://github.com/ulfjack/ryu/issues/119 pow5_bitcount(::Type{Float16}) = 30 diff --git a/base/secretbuffer.jl b/base/secretbuffer.jl index 935c50fb80fd6..eedfd8cbe84c1 100644 --- a/base/secretbuffer.jl +++ b/base/secretbuffer.jl @@ -179,6 +179,21 @@ function final_shred!(s::SecretBuffer) shred!(s) end +""" + shred!(s::SecretBuffer) + +Shreds the contents of a `SecretBuffer` by securely zeroing its data and +resetting its pointer and size. +This function is used to securely erase the sensitive data held in the buffer, +reducing the potential for information leaks. + +# Example +```julia +s = SecretBuffer() +write(s, 's', 'e', 'c', 'r', 'e', 't') +shred!(s) # s is now empty +``` +""" function shred!(s::SecretBuffer) securezero!(s.data) s.ptr = 1 @@ -188,6 +203,13 @@ end isshredded(s::SecretBuffer) = all(iszero, s.data) +""" + shred!(f::Function, x) + +Applies function `f` to the argument `x` and then shreds `x`. +This function is useful when you need to perform some operations on e.g. a +`SecretBuffer` and then want to ensure that it is securely shredded afterwards. +""" function shred!(f::Function, x) try f(x) diff --git a/base/show.jl b/base/show.jl index 36f7df54d0008..45d6a502619db 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1057,10 +1057,27 @@ function show_type_name(io::IO, tn::Core.TypeName) nothing end +function maybe_kws_nt(x::DataType) + x.name === typename(Pairs) || return nothing + length(x.parameters) == 4 || return nothing + x.parameters[1] === Symbol || return nothing + p4 = x.parameters[4] + if (isa(p4, DataType) && p4.name === typename(NamedTuple) && length(p4.parameters) == 2) + syms, types = p4.parameters + types isa DataType || return nothing + x.parameters[2] === eltype(p4) || return nothing + isa(syms, Tuple) || return nothing + x.parameters[3] === typeof(syms) || return nothing + return p4 + end + return nothing +end + function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[]) parameters = x.parameters::SimpleVector istuple = x.name === Tuple.name isnamedtuple = x.name === typename(NamedTuple) + kwsnt = maybe_kws_nt(x) n = length(parameters) # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg` @@ -1094,30 +1111,41 @@ function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[]) return elseif isnamedtuple syms, types = parameters - first = true if syms isa Tuple && types isa DataType print(io, "@NamedTuple{") - for i in 1:length(syms) - if !first - print(io, ", ") - end - print(io, syms[i]) - typ = types.parameters[i] - if typ !== Any - print(io, "::") - show(io, typ) - end - first = false - end + show_at_namedtuple(io, syms, types) print(io, "}") return end + elseif get(io, :backtrace, false)::Bool && kwsnt !== nothing + # simplify the type representation of keyword arguments + # when printing signature of keyword method in the stack trace + print(io, "@Kwargs{") + show_at_namedtuple(io, kwsnt.parameters[1]::Tuple, kwsnt.parameters[2]::DataType) + print(io, "}") + return end show_type_name(io, x.name) show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres) end +function show_at_namedtuple(io::IO, syms::Tuple, types::DataType) + first = true + for i in 1:length(syms) + if !first + print(io, ", ") + end + print(io, syms[i]) + typ = types.parameters[i] + if typ !== Any + print(io, "::") + show(io, typ) + end + first = false + end +end + function show_supertypes(io::IO, typ::DataType) print(io, typ) while typ != Any @@ -2132,10 +2160,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In # block with argument elseif head in (:for,:while,:function,:macro,:if,:elseif,:let) && nargs==2 + if head === :function && is_expr(args[1], :...) + # fix printing of "function (x...) x end" + block_args = Expr(:tuple, args[1]) + else + block_args = args[1] + end if is_expr(args[2], :block) - show_block(IOContext(io, beginsym=>false), head, args[1], args[2], indent, quote_level) + show_block(IOContext(io, beginsym=>false), head, block_args, args[2], indent, quote_level) else - show_block(IOContext(io, beginsym=>false), head, args[1], Expr(:block, args[2]), indent, quote_level) + show_block(IOContext(io, beginsym=>false), head, block_args, Expr(:block, args[2]), indent, quote_level) end print(io, "end") @@ -2470,15 +2504,16 @@ function print_within_stacktrace(io, s...; color=:normal, bold=false) end end -function show_tuple_as_call(io::IO, name::Symbol, sig::Type; +function show_tuple_as_call(out::IO, name::Symbol, sig::Type; demangle=false, kwargs=nothing, argnames=nothing, qualified=false, hasfirst=true) # print a method signature tuple for a lambda definition if sig === Tuple - print(io, demangle ? demangle_function_name(name) : name, "(...)") + print(out, demangle ? demangle_function_name(name) : name, "(...)") return end tv = Any[] + io = IOContext(IOBuffer(), out) env_io = io while isa(sig, UnionAll) push!(tv, sig.var) @@ -2501,7 +2536,7 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type; print_within_stacktrace(io, argnames[i]; color=:light_black) end print(io, "::") - print_type_bicolor(env_io, sig[i]; use_color = get(io, :backtrace, false)) + print_type_bicolor(env_io, sig[i]; use_color = get(io, :backtrace, false)::Bool) end if kwargs !== nothing print(io, "; ") @@ -2510,15 +2545,126 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type; first || print(io, ", ") first = false print_within_stacktrace(io, k; color=:light_black) - print(io, "::") - print_type_bicolor(io, t; use_color = get(io, :backtrace, false)) + if t == pairs(NamedTuple) + # omit type annotation for splat keyword argument + print(io, "...") + else + print(io, "::") + print_type_bicolor(io, t; use_color = get(io, :backtrace, false)::Bool) + end end end print_within_stacktrace(io, ")", bold=true) show_method_params(io, tv) + str = String(take!(unwrapcontext(io)[1])) + if get(out, :limit, false)::Bool + sz = get(out, :displaysize, (typemax(Int), typemax(Int)))::Tuple{Int, Int} + str_lim = type_depth_limit(str, max(sz[2], 120)) + if sizeof(str_lim) < sizeof(str) + typelimitflag = get(out, :stacktrace_types_limited, nothing) + if typelimitflag !== nothing + typelimitflag[] = true + end + end + str = str_lim + end + print(out, str) nothing end +# limit nesting depth of `{ }` until string textwidth is less than `n` +function type_depth_limit(str::String, n::Int; maxdepth = nothing) + depth = 0 + width_at = Int[] # total textwidth at each nesting depth + depths = zeros(Int16, lastindex(str)) # depth at each character index + levelcount = Int[] # number of nodes at each level + strwid = 0 + st_0, st_backslash, st_squote, st_dquote = 0,1,2,4 + state::Int = st_0 + stateis(s) = (state & s) != 0 + quoted() = stateis(st_squote) || stateis(st_dquote) + enter(s) = (state |= s) + leave(s) = (state &= ~s) + for (i, c) in ANSIIterator(str) + if c isa ANSIDelimiter + depths[i] = depth + continue + end + + if c == '\\' && quoted() + enter(st_backslash) + elseif c == '\'' + if stateis(st_backslash) || stateis(st_dquote) + elseif stateis(st_squote) + leave(st_squote) + else + enter(st_squote) + end + elseif c == '"' + if stateis(st_backslash) || stateis(st_squote) + elseif stateis(st_dquote) + leave(st_dquote) + else + enter(st_dquote) + end + end + if c == '}' && !quoted() + depth -= 1 + end + + wid = textwidth(c) + strwid += wid + if depth > 0 + width_at[depth] += wid + end + depths[i] = depth + + if c == '{' && !quoted() + depth += 1 + if depth > length(width_at) + push!(width_at, 0) + push!(levelcount, 0) + end + levelcount[depth] += 1 + end + if c != '\\' && stateis(st_backslash) + leave(st_backslash) + end + end + if maxdepth === nothing + limit_at = length(width_at) + 1 + while strwid > n + limit_at -= 1 + limit_at <= 1 && break + # add levelcount[] to include space taken by `…` + strwid = strwid - width_at[limit_at] + levelcount[limit_at] + if limit_at < length(width_at) + # take away the `…` from the previous considered level + strwid -= levelcount[limit_at+1] + end + end + else + limit_at = maxdepth + end + output = IOBuffer() + prev = 0 + for (i, c) in ANSIIterator(str) + di = depths[i] + if di < limit_at + if c isa ANSIDelimiter + write(output, c.del) + else + write(output, c) + end + end + if di > prev && di == limit_at + write(output, "…") + end + prev = di + end + return String(take!(output)) +end + function print_type_bicolor(io, type; kwargs...) str = sprint(show, type, context=io) print_type_bicolor(io, str; kwargs...) diff --git a/base/simdloop.jl b/base/simdloop.jl index 29e2382cf39aa..797b77ed75a99 100644 --- a/base/simdloop.jl +++ b/base/simdloop.jl @@ -100,7 +100,7 @@ The object iterated over in a `@simd for` loop should be a one-dimensional range By using `@simd`, you are asserting several properties of the loop: * It is safe to execute iterations in arbitrary or overlapping order, with special consideration for reduction variables. -* Floating-point operations on reduction variables can be reordered, possibly causing different results than without `@simd`. +* Floating-point operations on reduction variables can be reordered or contracted, possibly causing different results than without `@simd`. In many cases, Julia is able to automatically vectorize inner for loops without the use of `@simd`. Using `@simd` gives the compiler a little extra leeway to make it possible in more situations. In diff --git a/base/sort.jl b/base/sort.jl index 0e84657fc481e..90f8755d3b1a4 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -5,7 +5,8 @@ module Sort using Base.Order using Base: copymutable, midpoint, require_one_based_indexing, uinttype, - sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit + sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit, + IteratorSize, HasShape, IsInfinite, tail import Base: sort, @@ -43,6 +44,7 @@ export # not exported by Base SMALL_ALGORITHM, SMALL_THRESHOLD +abstract type Algorithm end ## functions requiring only ordering ## @@ -435,7 +437,7 @@ for (sym, exp, type) in [ (:mn, :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), (:mx, :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), (:scratch, nothing, :(Union{Nothing, Vector})), # could have different eltype - (:allow_legacy_dispatch, true, Bool)] + (:legacy_dispatch_entry, nothing, Union{Nothing, Algorithm})] usym = Symbol(:_, sym) @eval function $usym(v, o, kw) # using missing instead of nothing because scratch could === nothing. @@ -498,8 +500,6 @@ internal or recursive calls. """ function _sort! end -abstract type Algorithm end - """ MissingOptimization(next) <: Algorithm @@ -523,12 +523,12 @@ struct WithoutMissingVector{T, U} <: AbstractVector{T} new{nonmissingtype(eltype(data)), typeof(data)}(data) end end -Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i) +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer) out = v.data[i] @assert !(out isa Missing) out::eltype(v) end -Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i) +Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i::Integer) v.data[i] = x v end @@ -589,8 +589,9 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) # we can assume v is equal to eachindex(o.data) which allows a copying partition # without allocations. lo_i, hi_i = lo, hi - for i in eachindex(o.data) # equal to copy(v) - x = o.data[i] + cv = eachindex(o.data) # equal to copy(v) + for i in lo:hi + x = o.data[cv[i]] if ismissing(x) == (o.order == Reverse) # should x go at the beginning/end? v[lo_i] = i lo_i += 1 @@ -1383,6 +1384,11 @@ end Variant of [`sort!`](@ref) that returns a sorted copy of `v` leaving `v` itself unmodified. +Uses `Base.copymutable` to support immutable collections and iterables. + +!!! compat "Julia 1.10" + `sort` of arbitrary iterables requires at least Julia 1.10. + # Examples ```jldoctest julia> v = [3, 1, 2]; @@ -1400,7 +1406,39 @@ julia> v 2 ``` """ -sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...) +function sort(v; kws...) + size = IteratorSize(v) + size == HasShape{0}() && throw(ArgumentError("$v cannot be sorted")) + size == IsInfinite() && throw(ArgumentError("infinite iterator $v cannot be sorted")) + sort!(copymutable(v); kws...) +end +sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...) # for method disambiguation +sort(::AbstractString; kws...) = + throw(ArgumentError("sort(::AbstractString) is not supported")) +sort(::Tuple; kws...) = + throw(ArgumentError("sort(::Tuple) is only supported for NTuples")) + +function sort(x::NTuple{N}; lt::Function=isless, by::Function=identity, + rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where N + o = ord(lt,by,rev,order) + if N > 9 + v = sort!(copymutable(x), DEFAULT_STABLE, o) + tuple((v[i] for i in 1:N)...) + else + _sort(x, o) + end +end +_sort(x::Union{NTuple{0}, NTuple{1}}, o::Ordering) = x +function _sort(x::NTuple, o::Ordering) + a, b = Base.IteratorsMD.split(x, Val(length(x)>>1)) + merge(_sort(a, o), _sort(b, o), o) +end +merge(x::NTuple, y::NTuple{0}, o::Ordering) = x +merge(x::NTuple{0}, y::NTuple, o::Ordering) = y +merge(x::NTuple{0}, y::NTuple{0}, o::Ordering) = x # Method ambiguity +merge(x::NTuple, y::NTuple, o::Ordering) = + (lt(o, y[1], x[1]) ? (y[1], merge(x, tail(y), o)...) : (x[1], merge(tail(x), y, o)...)) + ## partialsortperm: the permutation to sort the first k elements of an array ## @@ -1771,7 +1809,7 @@ function sort!(A::AbstractArray{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, # TODO stop eagerly over-allocating. - scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T + scratch::Union{Vector{T}, Nothing}=Vector{T}(undef, size(A, dims))) where T __sort!(A, Val(dims), maybe_apply_initial_optimizations(alg), ord(lt, by, rev, order), scratch) end function __sort!(A::AbstractArray{T}, ::Val{K}, @@ -2111,25 +2149,25 @@ end # Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) - _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) + _sort!(v, a, o, (; lo, hi, legacy_dispatch_entry=a)) v end sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector) - _sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false)) + _sort!(v, a, o, (; lo, hi, scratch, legacy_dispatch_entry=a)) v end # Support dispatch on custom algorithms in the old way # sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ... function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) - @getkw lo hi scratch allow_legacy_dispatch - if allow_legacy_dispatch + @getkw lo hi scratch legacy_dispatch_entry + if legacy_dispatch_entry === a + # This error prevents infinite recursion for unknown algorithms + throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined")) + else sort!(v, lo, hi, a, o) scratch - else - # This error prevents infinite recursion for unknown algorithms - throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined")) end end diff --git a/base/stacktraces.jl b/base/stacktraces.jl index ee6a2762d7818..9c942814eefad 100644 --- a/base/stacktraces.jl +++ b/base/stacktraces.jl @@ -20,9 +20,10 @@ Stack information representing execution context, with the following fields: The name of the function containing the execution context. -- `linfo::Union{Core.MethodInstance, CodeInfo, Nothing}` +- `linfo::Union{Core.MethodInstance, Method, Module, Core.CodeInfo, Nothing}` - The MethodInstance containing the execution context (if it could be found). + The MethodInstance or CodeInfo containing the execution context (if it could be found), \ + or Module (for macro expansions)" - `file::Symbol` @@ -319,37 +320,45 @@ function show_spec_linfo(io::IO, frame::StackFrame) print(io, "top-level scope") elseif linfo isa Module Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true) - else - def, sig = if linfo isa MethodInstance - linfo.def, linfo.specTypes + elseif linfo isa MethodInstance + def = linfo.def + if def isa Module + Base.show_mi(io, linfo, #=from_stackframe=#true) else - linfo, linfo.sig + show_spec_sig(io, def, linfo.specTypes) end - if def isa Method - argnames = Base.method_argnames(def) - argnames = replace(argnames, :var"#unused#" => :var"") - if def.nkw > 0 - # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...) - kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+def.nkw) ] - uw = Base.unwrap_unionall(sig)::DataType - pos_sig = Base.rewrap_unionall(Tuple{uw.parameters[(def.nkw+2):end]...}, sig) - kwnames = argnames[2:(def.nkw+1)] - for i = 1:length(kwnames) - str = string(kwnames[i])::String - if endswith(str, "...") - kwnames[i] = Symbol(str[1:end-3]) - end - end - Base.show_tuple_as_call(io, def.name, pos_sig; - demangle=true, - kwargs=zip(kwnames, kwarg_types), - argnames=argnames[def.nkw+2:end]) - else - Base.show_tuple_as_call(io, def.name, sig; demangle=true, argnames) + else + m = linfo::Method + show_spec_sig(io, m, m.sig) + end +end + +function show_spec_sig(io::IO, m::Method, @nospecialize(sig::Type)) + if get(io, :limit, :false)::Bool + if !haskey(io, :displaysize) + io = IOContext(io, :displaysize => displaysize(io)) + end + end + argnames = Base.method_argnames(m) + argnames = replace(argnames, :var"#unused#" => :var"") + if m.nkw > 0 + # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...; kw_args) + kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+m.nkw) ] + uw = Base.unwrap_unionall(sig)::DataType + pos_sig = Base.rewrap_unionall(Tuple{uw.parameters[(m.nkw+2):end]...}, sig) + kwnames = argnames[2:(m.nkw+1)] + for i = 1:length(kwnames) + str = string(kwnames[i])::String + if endswith(str, "...") + kwnames[i] = Symbol(str[1:end-3]) end - else - Base.show_mi(io, linfo, true) end + Base.show_tuple_as_call(io, m.name, pos_sig; + demangle=true, + kwargs=zip(kwnames, kwarg_types), + argnames=argnames[m.nkw+2:end]) + else + Base.show_tuple_as_call(io, m.name, sig; demangle=true, argnames) end end diff --git a/base/strings/io.jl b/base/strings/io.jl index 5ae67fc8c841c..987a64798d3da 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -237,7 +237,7 @@ function show( if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1) skip = skip_text(n) show(io, SubString(str, 1:prevind(str, head))) - print(io, skip) # TODO: bold styled + printstyled(io, skip; color=:light_yellow, bold=true) show(io, SubString(str, tail)) else show(io, str) diff --git a/base/strings/search.jl b/base/strings/search.jl index 1bb4936661c51..1a3085e084ccd 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -709,6 +709,17 @@ The returned function is of type `Base.Fix2{typeof(occursin)}`. !!! compat "Julia 1.6" This method requires Julia 1.6 or later. + +# Examples +```jldoctest +julia> search_f = occursin("JuliaLang is a programming language"); + +julia> search_f("JuliaLang") +true + +julia> search_f("Python") +false +``` """ occursin(haystack) = Base.Fix2(occursin, haystack) diff --git a/base/strings/string.jl b/base/strings/string.jl index 9716d06deefdf..a26791958cd50 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -127,7 +127,11 @@ end _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b))) function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int) - ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), a, b, len % Csize_t) % Int + GC.@preserve a b begin + pa = unsafe_convert(Ptr{UInt8}, a) + pb = unsafe_convert(Ptr{UInt8}, b) + memcmp(pa, pb, len % Csize_t) % Int + end end function cmp(a::String, b::String) @@ -542,7 +546,7 @@ function repeat(c::AbstractChar, r::Integer) s = _string_n(n*r) p = pointer(s) GC.@preserve s if n == 1 - ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), p, u % UInt8, r) + memset(p, u % UInt8, r) elseif n == 2 p16 = reinterpret(Ptr{UInt16}, p) for i = 1:r diff --git a/base/strings/substring.jl b/base/strings/substring.jl index 5ba08ac2f7fff..792925f24b12b 100644 --- a/base/strings/substring.jl +++ b/base/strings/substring.jl @@ -103,6 +103,9 @@ end thisind(s::SubString{String}, i::Int) = _thisind_str(s, i) nextind(s::SubString{String}, i::Int) = _nextind_str(s, i) +parent(s::SubString) = s.string +parentindices(s::SubString) = (s.offset + 1 : thisind(s.string, s.offset + s.ncodeunits),) + function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}}) sizeof(a) == sizeof(b) && _memcmp(a, b) == 0 end @@ -264,7 +267,7 @@ function repeat(s::Union{String, SubString{String}}, r::Integer) out = _string_n(n*r) if n == 1 # common case: repeating a single-byte string @inbounds b = codeunit(s, 1) - ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), out, b, r) + memset(unsafe_convert(Ptr{UInt8}, out), b, r) else for i = 0:r-1 GC.@preserve s out unsafe_copyto!(pointer(out, i*n+1), pointer(s), n) diff --git a/base/subarray.jl b/base/subarray.jl index 214a2f98afe31..901410e908d1e 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -81,7 +81,7 @@ parentindices(V::SubArray) = V.indices """ parentindices(A) -Return the indices in the [`parent`](@ref) which correspond to the array view `A`. +Return the indices in the [`parent`](@ref) which correspond to the view `A`. # Examples ```jldoctest @@ -96,6 +96,8 @@ julia> parentindices(V) (1, Base.Slice(Base.OneTo(2))) ``` """ +function parentindices end + parentindices(a::AbstractArray) = map(oneto, size(a)) ## Aliasing detection @@ -352,6 +354,37 @@ function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int) V end +function isassigned(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N} + @inline + @boundscheck checkbounds(Bool, V, I...) || return false + @inbounds r = isassigned(V.parent, reindex(V.indices, I)...) + r +end +function isassigned(V::FastSubArray, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i) + r +end +function isassigned(V::FastContiguousSubArray, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + i) + r +end +function isassigned(V::FastSubArray{<:Any, 1}, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i) + r +end +function isassigned(V::FastContiguousSubArray{<:Any, 1}, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + i) + r +end + IndexStyle(::Type{<:FastSubArray}) = IndexLinear() IndexStyle(::Type{<:SubArray}) = IndexCartesian() diff --git a/base/sysimg.jl b/base/sysimg.jl index b0eeffa5757ba..09ea015b0f903 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -9,11 +9,9 @@ using Base.MainInclude # ans, err, and sometimes Out import Base.MainInclude: eval, include # Ensure this file is also tracked -pushfirst!(Base._included_files, (@__MODULE__, joinpath(@__DIR__, "Base.jl"))) -pushfirst!(Base._included_files, (@__MODULE__, joinpath(@__DIR__, "sysimg.jl"))) +pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__))) # set up depot & load paths to be able to find stdlib packages -@eval Base creating_sysimg = true Base.init_depot_path() Base.init_load_path() @@ -82,7 +80,7 @@ let m = Module() GC.@preserve m begin print_time = @eval m (mod, t) -> (print(rpad(string(mod) * " ", $maxlen + 3, "─")); - Base.time_print(t * 10^9); println()) + Base.time_print(stdout, t * 10^9); println()) print_time(Base, (Base.end_base_include - Base.start_base_include) * 10^(-9)) Base._track_dependencies[] = true @@ -104,7 +102,6 @@ let empty!(Core.ARGS) empty!(Base.ARGS) empty!(LOAD_PATH) - @eval Base creating_sysimg = false Base.init_load_path() # want to be able to find external packages in userimg.jl ccall(:jl_clear_implicit_imports, Cvoid, (Any,), Main) @@ -114,12 +111,12 @@ let tot_time = tot_time_base + tot_time_stdlib + tot_time_userimg println("Sysimage built. Summary:") - print("Base ──────── "); Base.time_print(tot_time_base * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_base / tot_time) * 100); println("%") - print("Stdlibs ───── "); Base.time_print(tot_time_stdlib * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_stdlib / tot_time) * 100); println("%") + print("Base ──────── "); Base.time_print(stdout, tot_time_base * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_base / tot_time) * 100); println("%") + print("Stdlibs ───── "); Base.time_print(stdout, tot_time_stdlib * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_stdlib / tot_time) * 100); println("%") if isfile("userimg.jl") - print("Userimg ───── "); Base.time_print(tot_time_userimg * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_userimg / tot_time) * 100); println("%") + print("Userimg ───── "); Base.time_print(stdout, tot_time_userimg * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_userimg / tot_time) * 100); println("%") end - print("Total ─────── "); Base.time_print(tot_time * 10^9); println(); + print("Total ─────── "); Base.time_print(stdout, tot_time * 10^9); println(); empty!(LOAD_PATH) empty!(DEPOT_PATH) diff --git a/base/threadcall.jl b/base/threadcall.jl index 45965fdbc6c65..7548c5063671f 100644 --- a/base/threadcall.jl +++ b/base/threadcall.jl @@ -47,7 +47,7 @@ macro threadcall(f, rettype, argtypes, argvals...) push!(body, :(return Int(Core.sizeof($rettype)))) # return code to generate wrapper function and send work request thread queue - wrapper = Expr(Symbol("hygienic-scope"), wrapper, @__MODULE__) + wrapper = Expr(:var"hygienic-scope", wrapper, @__MODULE__, __source__) return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) # use cglobal to look up the function on the calling thread do_threadcall(fun_ptr, cglobal($f), $rettype, Any[$(argtypes...)], Any[$(argvals...)]) diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index d150fd3ea1af4..e4f14e26ac5a9 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -8,6 +8,25 @@ export threadid, nthreads, @threads, @spawn, Get the ID number of the current thread of execution. The master thread has ID `1`. + +# Examples +```julia-repl +julia> Threads.threadid() +1 + +julia> Threads.@threads for i in 1:4 + println(Threads.threadid()) + end +4 +2 +5 +4 +``` + +!!! note + The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration). + For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects. + """ threadid() = Int(ccall(:jl_threadid, Int16, ())+1) @@ -40,11 +59,23 @@ function _nthreads_in_pool(tpid::Int8) end function _tpid_to_sym(tpid::Int8) - return tpid == 0 ? :interactive : :default + if tpid == 0 + return :interactive + elseif tpid == 1 + return :default + else + throw(ArgumentError("Unrecognized threadpool id $tpid")) + end end function _sym_to_tpid(tp::Symbol) - return tp === :interactive ? Int8(0) : Int8(1) + if tp === :interactive + return Int8(0) + elseif tp === :default + return Int8(1) + else + throw(ArgumentError("Unrecognized threadpool name `$(repr(tp))`")) + end end """ @@ -103,6 +134,7 @@ end Threads.ngcthreads() -> Int Returns the number of GC threads currently configured. +This includes both mark threads and concurrent sweep threads. """ ngcthreads() = Int(unsafe_load(cglobal(:jl_n_gcthreads, Cint))) + 1 @@ -122,7 +154,7 @@ function threading_run(fun, static) Base._wait(tasks[i]) end ccall(:jl_exit_threaded_region, Cvoid, ()) - failed_tasks = filter(istaskfailed, tasks) + failed_tasks = filter!(istaskfailed, tasks) if !isempty(failed_tasks) throw(CompositeException(map(TaskFailedException, failed_tasks))) end @@ -214,8 +246,8 @@ For example, the above conditions imply that: - Communicating between iterations using blocking primitives like `Channel`s is incorrect. - Write only to locations not shared across iterations (unless a lock or atomic operation is used). -- The value of [`threadid()`](@ref Threads.threadid) may change even within a single - iteration. +- Unless the `:static` schedule is used, the value of [`threadid()`](@ref Threads.threadid) + may change even within a single iteration. See [`Task Migration`](@ref man-task-migration). ## Schedulers @@ -341,8 +373,10 @@ the _value_ of a variable, isolating the asynchronous code from changes to the variable's value in the current task. !!! note - See the manual chapter on [multi-threading](@ref man-multithreading) - for important caveats. See also the chapter on [threadpools](@ref man-threadpools). + The thread that the task runs on may change if the task yields, therefore `threadid()` should not + be treated as constant for a task. See [`Task Migration`](@ref man-task-migration), and the broader + [multi-threading](@ref man-multithreading) manual for further important caveats. + See also the chapter on [threadpools](@ref man-threadpools). !!! compat "Julia 1.3" This macro is available as of Julia 1.3. @@ -352,22 +386,31 @@ the variable's value in the current task. !!! compat "Julia 1.9" A threadpool may be specified as of Julia 1.9. + +# Examples +```julia-repl +julia> t() = println("Hello from ", Threads.threadid()); + +julia> tasks = fetch.([Threads.@spawn t() for i in 1:4]); +Hello from 1 +Hello from 1 +Hello from 3 +Hello from 4 +``` """ macro spawn(args...) - tp = :default + tp = QuoteNode(:default) na = length(args) if na == 2 ttype, ex = args if ttype isa QuoteNode ttype = ttype.value - elseif ttype isa Symbol - # TODO: allow unquoted symbols - ttype = nothing - end - if ttype === :interactive || ttype === :default - tp = ttype + if ttype !== :interactive && ttype !== :default + throw(ArgumentError("unsupported threadpool in @spawn: $ttype")) + end + tp = QuoteNode(ttype) else - throw(ArgumentError("unsupported threadpool in @spawn: $ttype")) + tp = ttype end elseif na == 1 ex = args[1] @@ -383,7 +426,7 @@ macro spawn(args...) let $(letargs...) local task = Task($thunk) task.sticky = false - _spawn_set_thrpool(task, $(QuoteNode(tp))) + _spawn_set_thrpool(task, $(esc(tp))) if $(Expr(:islocal, var)) put!($var, task) end diff --git a/base/timing.jl b/base/timing.jl index 3e1f3a3451149..d166b4162db59 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -24,6 +24,7 @@ struct GC_Num mark_time ::Int64 total_sweep_time ::Int64 total_mark_time ::Int64 + last_full_sweep ::Int64 end gc_num() = ccall(:jl_gc_num, GC_Num, ()) @@ -135,7 +136,7 @@ function format_bytes(bytes) # also used by InteractiveUtils end end -function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true) +function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true) timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6) str = sprint() do io _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "") @@ -169,8 +170,9 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, re print(io, ": ", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% of which was recompilation") end parens && print(io, ")") + newline && print(io, "\n") end - newline ? println(str) : print(str) + print(io, str) nothing end @@ -178,7 +180,7 @@ function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad) allocs = gc_alloc_count(diff) compile_time = first(compile_times) recompile_time = last(compile_times) - time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad) + time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad) padded_nonzero_print(elapsedtime, "elapsed time (ns)") padded_nonzero_print(diff.total_time, "gc time (ns)") padded_nonzero_print(diff.allocd, "bytes allocated") @@ -279,7 +281,7 @@ macro time(msg, ex) local _msg = $(esc(msg)) local has_msg = !isnothing(_msg) has_msg && print(_msg, ": ") - time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg) + time_print(stdout, elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg) val end end diff --git a/base/util.jl b/base/util.jl index 6f424f80d13b6..4dcb819292ff8 100644 --- a/base/util.jl +++ b/base/util.jl @@ -268,7 +268,7 @@ will always be called. function securezero! end @noinline securezero!(a::AbstractArray{<:Number}) = fill!(a, 0) @noinline unsafe_securezero!(p::Ptr{T}, len::Integer=1) where {T} = - ccall(:memset, Ptr{T}, (Ptr{T}, Cint, Csize_t), p, 0, len*sizeof(T)) + memset(p, 0, len*sizeof(T)) unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len)) """ @@ -604,7 +604,7 @@ macro kwdef(expr) kwdefs = nothing end return quote - Base.@__doc__ $(esc(expr)) + $(esc(:($Base.@__doc__ $expr))) $kwdefs end end diff --git a/cli/Makefile b/cli/Makefile index c2e2bcd568a07..b6a2b48ebf044 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -104,14 +104,18 @@ julia-debug: $(build_bindir)/julia-debug$(EXE) libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT) libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) +ifneq (,$(filter $(OS), Linux FreeBSD)) +VERSIONSCRIPT := -Wl,--version-script=$(BUILDDIR)/julia.expmap +endif + ifeq ($(OS),WINNT) # On Windows we need to strip out exported functions from the generated import library. STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_strip_symbols.h) endif -$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) +$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \ - $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT))) + $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@ @$(DSYMUTIL) $@ ifeq ($(OS), WINNT) @@ -120,9 +124,9 @@ ifeq ($(OS), WINNT) @$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a) endif -$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) +$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \ - $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT))) + $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@ @$(DSYMUTIL) $@ ifeq ($(OS), WINNT) @@ -144,8 +148,12 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT) $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir) @$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug) +$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in + sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/' + clean: | $(CLEAN_TARGETS) rm -f $(BUILDDIR)/*.o $(BUILDDIR)/*.dbg.obj rm -f $(build_bindir)/julia* + rm -f $(BUILDDIR)/julia.expmap .PHONY: clean release debug julia-release julia-debug diff --git a/cli/julia.expmap.in b/cli/julia.expmap.in new file mode 100644 index 0000000000000..b6fa72e9aca03 --- /dev/null +++ b/cli/julia.expmap.in @@ -0,0 +1 @@ +@JULIA_SHLIB_SYMBOL_VERSION@ { global: *; }; diff --git a/cli/loader_lib.c b/cli/loader_lib.c index e2f615c684637..12feed0c508a0 100644 --- a/cli/loader_lib.c +++ b/cli/loader_lib.c @@ -345,6 +345,8 @@ static char *libstdcxxprobe(void) free(path); return NULL; } + // Ensure that `path` is zero-terminated. + path[pathlen] = '\0'; return path; } } @@ -525,8 +527,13 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) { } void *fptr = lookup_symbol(RTLD_DEFAULT, "jl_get_pgcstack_static"); void *(*key)(void) = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_addr_static"); - if (fptr != NULL && key != NULL) - jl_pgcstack_setkey(fptr, key); + _Atomic(char) *semaphore = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_static_semaphore"); + if (fptr != NULL && key != NULL && semaphore != NULL) { + char already_used = 0; + atomic_compare_exchange_strong(semaphore, &already_used, 1); + if (already_used == 0) // RMW succeeded - we have exclusive access + jl_pgcstack_setkey(fptr, key); + } #endif // jl_options must be initialized very early, in case an embedder sets some diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index e8901a7b462ea..fea4ca6bc1fe3 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -1,13 +1,14 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# Prevent this from putting anyting into the Main namespace +@eval Module() begin + if Threads.maxthreadid() != 1 @warn "Running this file with multiple Julia threads may lead to a build error" Threads.maxthreadid() end if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0" Sys.__init_build() -# Prevent this from being put into the Main namespace -@eval Module() begin if !isdefined(Base, :uv_eventloop) Base.reinit_stdio() end @@ -152,7 +153,6 @@ if Artifacts !== nothing """ end - Pkg = get(Base.loaded_modules, Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg"), nothing) @@ -234,6 +234,13 @@ end ansi_enablecursor = "\e[?25h" ansi_disablecursor = "\e[?25l" +blackhole = Sys.isunix() ? "/dev/null" : "nul" +procenv = Dict{String,Any}( + "JULIA_HISTORY" => blackhole, + "JULIA_PROJECT" => nothing, # remove from environment + "JULIA_LOAD_PATH" => "@stdlib", + "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":", + "TERM" => "") generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printed start_time = time_ns() @@ -285,7 +292,9 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path))) $precompile_script """ - run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`) + p = run(pipeline(addenv(`$(julia_exepath()) -O0 --trace-compile=$tmp_proc --sysimage $sysimg + --cpu-target=native --startup-file=no --color=yes`, procenv), + stdin=IOBuffer(s), stdout=debug_output)) n_step1 = 0 for f in (tmp_prec, tmp_proc) isfile(f) || continue @@ -305,23 +314,15 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe # Collect statements from running a REPL process and replaying our REPL script touch(precompile_file) pts, ptm = open_fake_pty() - blackhole = Sys.isunix() ? "/dev/null" : "nul" if have_repl - cmdargs = ```--color=yes - -e 'import REPL; REPL.Terminals.is_precompiling[] = true' - ``` + cmdargs = `-e 'import REPL; REPL.Terminals.is_precompiling[] = true'` else cmdargs = `-e nothing` end - p = withenv("JULIA_HISTORY" => blackhole, - "JULIA_PROJECT" => nothing, # remove from environment - "JULIA_LOAD_PATH" => Sys.iswindows() ? "@;@stdlib" : "@:@stdlib", - "JULIA_PKG_PRECOMPILE_AUTO" => "0", - "TERM" => "") do - run(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg - --cpu-target=native --startup-file=no -i $cmdargs```, - pts, pts, pts; wait=false) - end + p = run(addenv(addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg + --cpu-target=native --startup-file=no --color=yes -i $cmdargs```, procenv), + "JULIA_PKG_PRECOMPILE_AUTO" => "0"), + pts, pts, pts; wait=false) Base.close_stdio(pts) # Prepare a background process to copy output from process until `pts` is closed output_copy = Base.BufferStream() @@ -452,20 +453,19 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe failed = length(statements) - n_succeeded print_state("step3" => string("F$n_succeeded", failed > 0 ? " ($failed failed)" : "")) println() - if have_repl - # Seems like a reasonable number right now, adjust as needed - # comment out if debugging script - n_succeeded > 1500 || @warn "Only $n_succeeded precompile statements" - end + # Seems like a reasonable number right now, adjust as needed + # comment out if debugging script + n_succeeded > (have_repl ? 900 : 90) || @warn "Only $n_succeeded precompile statements" fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.") fetch(step2) == :ok || throw("Step 2 of collecting precompiles failed.") tot_time = time_ns() - start_time println("Precompilation complete. Summary:") - print("Total ─────── "); Base.time_print(tot_time); println() + print("Total ─────── "); Base.time_print(stdout, tot_time); println() finally fancyprint && print(ansi_enablecursor) + GC.gc(true); GC.gc(false); # reduce memory footprint return end @@ -474,22 +474,30 @@ generate_precompile_statements() # As a last step in system image generation, # remove some references to build time environment for a more reproducible build. Base.Filesystem.temp_cleanup_purge(force=true) -@eval Base PROGRAM_FILE = "" -@eval Sys begin - BINDIR = "" - STDLIB = "" -end -empty!(Base.ARGS) -empty!(Core.ARGS) -end # @eval -end # if +let stdout = Ref{IO}(stdout) + Base.PROGRAM_FILE = "" + Sys.BINDIR = "" + Sys.STDLIB = "" + empty!(Base.ARGS) + empty!(Core.ARGS) + empty!(Base.TOML_CACHE.d) + Base.TOML.reinit!(Base.TOML_CACHE.p, "") + + println("Outputting sysimage file...") + Base.stdout = Core.stdout + Base.stderr = Core.stderr -println("Outputting sysimage file...") -let pre_output_time = time_ns() # Print report after sysimage has been saved so all time spent can be captured + pre_output_time = time_ns() Base.postoutput() do output_time = time_ns() - pre_output_time - print("Output ────── "); Base.time_print(output_time); println() + let stdout = stdout[] + print(stdout, "Output ────── "); Base.time_print(stdout, output_time); println(stdout) + end + stdout[] = Core.stdout end end + +end # if +end # @eval diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk new file mode 100644 index 0000000000000..e9cc0c942dbe0 --- /dev/null +++ b/deps/JuliaSyntax.mk @@ -0,0 +1,16 @@ +$(eval $(call git-external,JuliaSyntax,JULIASYNTAX,,,$(BUILDDIR))) + +$(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted + @# no build steps + echo 1 > $@ + +$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(JULIAHOME)/base)) + +clean-JuliaSyntax: + -rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled +get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE) +extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted +configure-JuliaSyntax: extract-JuliaSyntax +compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled +fastcheck-JuliSyntax: check-JuliSyntax +check-JuliSyntax: compile-JuliSyntax diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version new file mode 100644 index 0000000000000..b604eedaa43dd --- /dev/null +++ b/deps/JuliaSyntax.version @@ -0,0 +1,4 @@ +JULIASYNTAX_BRANCH = main +JULIASYNTAX_SHA1 = 8731bab86f14762cca8cf24224d8c7a6a89c21c5 +JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git +JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1 diff --git a/deps/Makefile b/deps/Makefile index 62bb85e72c492..27f5fdbb693d5 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -36,7 +36,7 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST) # prevent installing libs into usr/lib64 on opensuse unexport CONFIG_SITE -DEP_LIBS := +DEP_LIBS := JuliaSyntax ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0) DEP_LIBS += blastrampoline @@ -46,6 +46,10 @@ ifeq ($(USE_SYSTEM_CSL), 0) DEP_LIBS += csl endif +ifeq ($(SANITIZE), 1) +DEP_LIBS += sanitizers +endif + ifeq ($(USE_SYSTEM_LIBUV), 0) DEP_LIBS += libuv endif @@ -188,7 +192,7 @@ DEP_LIBS_STAGED := $(DEP_LIBS) DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \ openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \ objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \ - libsuitesparse lld libtracyclient ittapi + sanitizers libsuitesparse lld libtracyclient ittapi JuliaSyntax DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL) ifneq ($(USE_BINARYBUILDER_OPENBLAS),0) @@ -223,6 +227,7 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL)) getall: $(addprefix get-, $(DEP_LIBS_ALL)) include $(SRCDIR)/csl.mk +include $(SRCDIR)/sanitizers.mk include $(SRCDIR)/ittapi.mk include $(SRCDIR)/llvm.mk include $(SRCDIR)/libuv.mk @@ -248,4 +253,7 @@ include $(SRCDIR)/libwhich.mk include $(SRCDIR)/p7zip.mk include $(SRCDIR)/libtracyclient.mk +# vendored Julia libs +include $(SRCDIR)/JuliaSyntax.mk + include $(SRCDIR)/tools/uninstallers.mk diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 new file mode 100644 index 0000000000000..8bec9dde7fbae --- /dev/null +++ b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 @@ -0,0 +1 @@ +6fdeb9332af478502be39af642027387 diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 new file mode 100644 index 0000000000000..50c676f808c5c --- /dev/null +++ b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 @@ -0,0 +1 @@ +fbb4ab0b99de7e1f86b918b401c2d42883a2bf8e80f6af4d6b85b7ca263d97cca1c47b25aca48359f14dee91b658684c0c590b7f20240bd9e0ce6e960ccf6647 diff --git a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 deleted file mode 100644 index 08f5ccda57979..0000000000000 --- a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -c135dc6ed97656fe956d9ee5cf3cbc55 diff --git a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 deleted file mode 100644 index 957075f0f281a..0000000000000 --- a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -2ae67fd4c5e1bf83df5df836fcd69afc0fb8454723043d32de9c7bc29feedf390adb76efda52e79937ea801ff21b5f4ea875469136424e2889904130b247b52a diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 new file mode 100644 index 0000000000000..8710722b5409c --- /dev/null +++ b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 @@ -0,0 +1 @@ +f0e62f7b63dc9400caa2fec1b91b7889 diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 new file mode 100644 index 0000000000000..c92e62d861633 --- /dev/null +++ b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 @@ -0,0 +1 @@ +e48ee01791f58d41715fd44e16238d835315e930d3ef529dd3f3b5660935f7f0ca2c5163ec9c4e4d90e4ead5328f39e0bfffa88223c2094c8727460eac022cc1 diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries index 098c181ca5c87..4830109bd7aea 100644 --- a/deps/checksums/compilersupportlibraries +++ b/deps/checksums/compilersupportlibraries @@ -1,92 +1,92 @@ -CompilerSupportLibraries.v1.0.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4 -CompilerSupportLibraries.v1.0.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5 -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092 -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/756718e5eaa4547b874a71a8e3545492 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/c21c1be10ca8810f56e435b3629e2ab0678926ea9c4f4c3dd003f9e292c075493b83df04401d3bcf7738f1a44098f674f9b01bba9db4b9a9e45ad7af3497444e -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/65ce0024bf8fe3276addbf185ed03e48 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/5e8105a12ab04e2949e41eda50a060dea04ccd98660c7528cfc86e120fe61cca8bab878fd2c92a3858f02ac3f3c55d0e48789907e5fbd2392a8e84b183ed4636 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/b7727324d550f637209db795238c46a4 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/864b1db2642e68665b9d3322563c7ce964835d0e720325ea00b193e2cbf6791760e0014710e2a79876165ab0daffa6d53d61b87a5034f956ba6e255b0144652c -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69 -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4 -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16 -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/2338f8aa2696935f7460454e708ce308 -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/5a4b0e97928c26eee16bbec4c3e69e55fa9c768101257c3e2f161118809c778aa0feaf21307198822c3172a58ed12ca0a49285b2941ed0b8f2b367e64ca1c51a -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/b393d2bf0d181d218130ac572c17d369 -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/76e0f7caa24bb734c6f7542be9f834d5b912f082cb3c4c3c52a63e37d4b8c33dd94e576c43f4bee6c04bfb44af2f2b67ba70773fa52ad0de6c8c0059b3e51b83 -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/23db836e6e4142f621862971017fe61e -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/c0b04f7fe5aabfe6af509c77a1f68e0bcfd14714758042fe502b968c4cc272156fc84c8b4c1ee574754bb2fddaa810f6a4215cbd164ddc11b697b3adaef09a81 +CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4 +CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5 +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092 +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6 +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f5795dada5360eb8422f45150b13bae9 +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/6acd1bf7c81631cef9b8b0576ccece08723c5ae2f49de2487d3aefd25f9a0ad49df09e3782735267997d40687b04b85c89e00f6889b026af599bf1bbe91803a1 +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/5e590f83161913f0145ba8d496b2504b +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/4a3f36588afcdef26173764597054068e26f2376e6126a9a94c46b258b5d7a29951d47b5e1ba24df6c3d139bbc4decc5c501a266811692d7fadadc7bd7b6960d +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/27da4a7c890fe1427c33fe214cc5feaf +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/310ad00f053f9f3ec715ce2e8d20446f397728dff5acc787ea9c9332346607a3d42b678099c424e6d6e5294acddf2aa26051de657b48d34abfd04486951bf241 +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69 +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4 +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16 +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/0c2fc6fae4ebe293a7f0dc1e91f6531a +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/fdb0ad061cacad0557fde3ec216fd3666284f24ad6a86f4a4b6f946dccb112c9704f52edba86f3b17d84c824affbcfef740720348ef227380cf6017811bda80b +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/005e608dbef2b5cdb7624702ccc426be +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/8bb2bcd0a6b1901e8a9be20f505bead5c78ecafbe5a8271cd13385553e5744e0c7bff62976ac9e7d74b8f3bd467603d4c0f5658e6b120bb23066c15e0a644ed4 +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d6c2c7ad72bff7f7e5c43678d716a57a +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/36f5eba1b0be440797467cb7104652b74709913d2bad1b08ee2dc70f450fb8eab81b28f2b0bc8dfc238b3c46982c69aac831b4fad5bcee4e9dd114852fcb4a0b diff --git a/deps/libgit2.mk b/deps/libgit2.mk index 9bd7bd555d89d..014fdc0108f7c 100644 --- a/deps/libgit2.mk +++ b/deps/libgit2.mk @@ -16,6 +16,11 @@ endif LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=ON -DUSE_SSH=ON -DBUILD_CLI=OFF ifeq ($(OS),WINNT) LIBGIT2_OPTS += -DWIN32=ON -DMINGW=ON +ifeq ($(USE_SYSTEM_LIBSSH2), 0) +LIBGIT2_OPTS += -DLIBSSH2_LIBRARIES=libssh2.dll +LIBGIT2_OPTS += -DLIBSSH2_LIBRARY_DIRS=$(build_prefix)/lib +LIBGIT2_OPTS += -DLIBSSH2_INCLUDE_DIRS=$(build_prefix)/include +endif # USE_SYSTEM_LIBSSH2=0 ifneq ($(ARCH),x86_64) ifneq ($(USECLANG),1) LIBGIT2_OPTS += -DCMAKE_C_FLAGS="-mincoming-stack-boundary=2" diff --git a/deps/llvm-ver.make b/deps/llvm-ver.make index 3c498be6c2363..3777d5b37915a 100644 --- a/deps/llvm-ver.make +++ b/deps/llvm-ver.make @@ -17,3 +17,4 @@ LLVM_SHARED_LIB_VER_SUFFIX := $(LLVM_VER_MAJ)jl # e.g.: "libLLVM-14jl" LLVM_SHARED_LIB_NAME := libLLVM-$(LLVM_SHARED_LIB_VER_SUFFIX) LLVM_SHARED_LINK_FLAG := -lLLVM-$(LLVM_SHARED_LIB_VER_SUFFIX) +LLVM_SHLIB_SYMBOL_VERSION := JL_LLVM_$(LLVM_VER_MAJ).$(LLVM_VER_MIN) diff --git a/deps/llvm.mk b/deps/llvm.mk index 83b9a66ec608e..2a8365dd73e75 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -230,6 +230,8 @@ $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patc LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied endef +$(eval $(call LLVM_PATCH,llvm-ittapi-cmake)) + ifeq ($(USE_SYSTEM_ZLIB), 0) $(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zlib endif @@ -239,6 +241,21 @@ endif # declare that all patches must be applied before running ./configure $(LLVM_BUILDDIR_withtype)/build-configured: | $(LLVM_PATCH_PREV) +# Apply Julia's specific patches if requested, e.g. if not using Julia's fork of LLVM. +ifeq ($(LLVM_APPLY_JULIA_PATCHES), 1) +# Download Julia's patchset. +$(BUILDDIR)/julia-patches.patch: + $(JLDOWNLOAD) $@ $(LLVM_JULIA_DIFF_GITHUB_REPO)/compare/$(LLVM_BASE_REF)...$(LLVM_JULIA_REF).diff + +# Apply the patch. +$(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied: $(BUILDDIR)/julia-patches.patch $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted + cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 < $(realpath $<) + echo 1 > $@ + +# Require application of Julia's patchset before configuring LLVM. +$(LLVM_BUILDDIR_withtype)/build-configured: | $(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied +endif + $(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted mkdir -p $(dir $@) cd $(dir $@) && \ @@ -288,7 +305,7 @@ fastcheck-llvm: #none check-llvm: $(LLVM_BUILDDIR_withtype)/build-checked ifeq ($(USE_INTEL_JITEVENTS),1) -extract-llvm: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted +$(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted endif #todo: LLVM make check target is broken on julia.mit.edu (and really slow elsewhere) diff --git a/deps/llvm.version b/deps/llvm.version index e35db3bd6aed2..4e7969994141e 100644 --- a/deps/llvm.version +++ b/deps/llvm.version @@ -1,7 +1,23 @@ +# -*- makefile -*- + ## jll artifact LLVM_JLL_NAME := libLLVM LLVM_ASSERT_JLL_VER := 15.0.7+5 ## source build +# Version number of LLVM LLVM_VER := 15.0.7 +# Git branch name in `LLVM_GIT_URL` repository LLVM_BRANCH=julia-15.0.7-5 +# Git ref in `LLVM_GIT_URL` repository LLVM_SHA1=julia-15.0.7-5 + +## Following options are used to automatically fetch patchset from Julia's fork. This is +## useful if you want to build an external LLVM while still applying Julia's patches. +# Set to 1 if you want to automatically apply Julia's patches to a different fork of LLVM. +LLVM_APPLY_JULIA_PATCHES := 0 +# GitHub repository to use for fetching the Julia patches to apply to LLVM source code. +LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project +# Base GitHub ref for generating the diff. +LLVM_BASE_REF := llvm:llvmorg-15.0.7 +# Julia fork's GitHub ref for generating the diff. +LLVM_JULIA_REF := JuliaLang:julia-15.0.7-5 diff --git a/deps/patches/llvm-ittapi-cmake.patch b/deps/patches/llvm-ittapi-cmake.patch new file mode 100644 index 0000000000000..6746d21754283 --- /dev/null +++ b/deps/patches/llvm-ittapi-cmake.patch @@ -0,0 +1,47 @@ +diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +index 0c5017c359d6..92777133e9de 100644 +--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt ++++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +@@ -12,23 +12,23 @@ if(NOT DEFINED ITTAPI_SOURCE_DIR) + set(ITTAPI_SOURCE_DIR ${PROJECT_BINARY_DIR}) + endif() + +-if(NOT EXISTS ${ITTAPI_SOURCE_DIR}/ittapi) +- execute_process(COMMAND ${GIT_EXECUTABLE} clone ${ITTAPI_GIT_REPOSITORY} +- WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR} ++if(NOT EXISTS ${ITTAPI_SOURCE_DIR}) ++ execute_process(COMMAND ${GIT_EXECUTABLE} clone ${ITTAPI_GIT_REPOSITORY} ${ITTAPI_SOURCE_DIR} ++ WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}/.. + RESULT_VARIABLE GIT_CLONE_RESULT) + if(NOT GIT_CLONE_RESULT EQUAL "0") + message(FATAL_ERROR "git clone ${ITTAPI_GIT_REPOSITORY} failed with ${GIT_CLONE_RESULT}, please clone ${ITTAPI_GIT_REPOSITORY}") + endif() +-endif() + +-execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ITTAPI_GIT_TAG} +- WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}/ittapi +- RESULT_VARIABLE GIT_CHECKOUT_RESULT) +-if(NOT GIT_CHECKOUT_RESULT EQUAL "0") +- message(FATAL_ERROR "git checkout ${ITTAPI_GIT_TAG} failed with ${GIT_CHECKOUT_RESULT}, please checkout ${ITTAPI_GIT_TAG} at ${ITTAPI_SOURCE_DIR}/ittapi") ++ execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ITTAPI_GIT_TAG} ++ WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR} ++ RESULT_VARIABLE GIT_CHECKOUT_RESULT) ++ if(NOT GIT_CHECKOUT_RESULT EQUAL "0") ++ message(FATAL_ERROR "git checkout ${ITTAPI_GIT_TAG} failed with ${GIT_CHECKOUT_RESULT}, please checkout ${ITTAPI_GIT_TAG} at ${ITTAPI_SOURCE_DIR}") ++ endif() + endif() + +-include_directories( ${ITTAPI_SOURCE_DIR}/ittapi/include/ ) ++include_directories( ${ITTAPI_SOURCE_DIR}/include/ ) + + if( HAVE_LIBDL ) + set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS}) +@@ -40,7 +40,7 @@ set(LLVM_INTEL_JIT_LIBS ${LLVM_PTHREAD_LIB} ${LLVM_INTEL_JIT_LIBS}) + add_llvm_component_library(LLVMIntelJITEvents + IntelJITEventListener.cpp + jitprofiling.c +- ${ITTAPI_SOURCE_DIR}/ittapi/src/ittnotify/ittnotify_static.c ++ ${ITTAPI_SOURCE_DIR}/src/ittnotify/ittnotify_static.c + + LINK_LIBS ${LLVM_INTEL_JIT_LIBS} + diff --git a/deps/sanitizers.mk b/deps/sanitizers.mk new file mode 100644 index 0000000000000..81db75a4ee63e --- /dev/null +++ b/deps/sanitizers.mk @@ -0,0 +1,29 @@ +# Interrogate the compiler about where it is keeping its sanitizer libraries +ifeq ($(USECLANG),1) +SANITIZER_LIB_PATH := $(shell LANG=C $(CC) -print-runtime-dir) +else +SANITIZER_LIB_PATH := $(dir $(shell LANG=C $(CC) -print-file-name=libasan.so)) +endif + +# Given a colon-separated list of paths in $(2), find the location of the library given in $(1) +define pathsearch +$(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))) +endef + +define copy_sanitizer_lib +install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir) +$$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(SANITIZER_LIB_PATH)/,$(2)) | $$(build_shlibdir) + -cp $$< $$@ +endef + +ifeq ($(USECLANG),1) + +## Clang libraries +$(eval $(call copy_sanitizer_lib,$(call versioned_libname,libclang_rt.asan-*),$(call versioned_libname,libclang_rt.asan-%))) + +endif + +get-sanitizers: +clean-sanitizers: + -rm -f $(build_shlibdir)/libclang_rt.asan*$(SHLIB_EXT)* +distclean-sanitizers: clean-sanitizers diff --git a/deps/tools/common.mk b/deps/tools/common.mk index c19886114c14e..3cefc253cec3d 100644 --- a/deps/tools/common.mk +++ b/deps/tools/common.mk @@ -15,11 +15,12 @@ CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN) $(SANITIZE_LDFLA endif CONFIGURE_COMMON += F77="$(FC)" CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" LD="$(LD)" -CMAKE_CC_ARG := $(CC_ARG) -CMAKE_CXX_ARG := $(CXX_ARG) - CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix) CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir) +ifneq ($(OS),WINNT) +CMAKE_COMMON += -DCMAKE_INSTALL_LIBDIR=$(build_libdir) +endif + ifeq ($(OS), Darwin) CMAKE_COMMON += -DCMAKE_MACOSX_RPATH=1 endif @@ -27,12 +28,27 @@ endif ifneq ($(VERBOSE), 0) CMAKE_COMMON += -DCMAKE_VERBOSE_MAKEFILE=ON endif -# The call to which here is to work around https://cmake.org/Bug/view.php?id=14366 -CMAKE_COMMON += -DCMAKE_C_COMPILER="$$(which $(CC_BASE))" + +# The calls to `which` are to work around https://cmake.org/Bug/view.php?id=14366 +ifeq ($(USECCACHE), 1) +# `ccache` must be used as compiler launcher, not compiler itself. +CMAKE_COMMON += -DCMAKE_C_COMPILER_LAUNCHER=ccache +CMAKE_COMMON += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache +CMAKE_CC := "$$(which $(shell echo $(CC_ARG) | cut -d' ' -f1))" +CMAKE_CXX := "$$(which $(shell echo $(CXX_ARG) | cut -d' ' -f1))" +CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -d' ' -f2-) +CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -d' ' -f2-) +else +CMAKE_CC := "$$(which $(CC_BASE))" +CMAKE_CXX := "$$(which $(CXX_BASE))" +CMAKE_CC_ARG := $(CC_ARG) +CMAKE_CXX_ARG := $(CXX_ARG) +endif +CMAKE_COMMON += -DCMAKE_C_COMPILER=$(CMAKE_CC) ifneq ($(strip $(CMAKE_CC_ARG)),) CMAKE_COMMON += -DCMAKE_C_COMPILER_ARG1="$(CMAKE_CC_ARG) $(SANITIZE_OPTS)" endif -CMAKE_COMMON += -DCMAKE_CXX_COMPILER="$(CXX_BASE)" +CMAKE_COMMON += -DCMAKE_CXX_COMPILER=$(CMAKE_CXX) ifneq ($(strip $(CMAKE_CXX_ARG)),) CMAKE_COMMON += -DCMAKE_CXX_COMPILER_ARG1="$(CMAKE_CXX_ARG) $(SANITIZE_OPTS)" endif diff --git a/doc/make.jl b/doc/make.jl index 3c69f4e6c47b5..a9343a3133a63 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -118,6 +118,7 @@ BaseDocs = [ "base/punctuation.md", "base/sort.md", "base/iterators.md", + "base/reflection.md", "base/c.md", "base/libc.md", "base/stacktraces.md", @@ -127,7 +128,6 @@ BaseDocs = [ StdlibDocs = [stdlib.targetfile for stdlib in STDLIB_DOCS] DevDocs = [ - "devdocs/reflection.md", "Documentation of Julia's Internals" => [ "devdocs/init.md", "devdocs/ast.md", @@ -265,12 +265,6 @@ DocMeta.setdocmeta!( maybe_revise(:(using Base.BinaryPlatforms)); recursive=true, warn=false, ) -DocMeta.setdocmeta!( - Pkg.LazilyInitializedFields, - :DocTestSetup, - maybe_revise(:(using Pkg.LazilyInitializedFields)); - recursive=true, warn=false, -) let r = r"buildroot=(.+)", i = findfirst(x -> occursin(r, x), ARGS) global const buildroot = i === nothing ? (@__DIR__) : first(match(r, ARGS[i]).captures) diff --git a/doc/src/assets/julialogoheaderimage_dark.svg b/doc/src/assets/julialogoheaderimage_dark.svg deleted file mode 100644 index 04e06d2665633..0000000000000 --- a/doc/src/assets/julialogoheaderimage_dark.svg +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/assets/julialogoheaderimage_light.svg b/doc/src/assets/julialogoheaderimage_light.svg deleted file mode 100644 index 892ca1bd08701..0000000000000 --- a/doc/src/assets/julialogoheaderimage_light.svg +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/base/base.md b/doc/src/base/base.md index 7e45e2176478d..81abc009f2fbc 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -141,10 +141,16 @@ Base.copy Base.deepcopy Base.getproperty Base.setproperty! +Base.replaceproperty! +Base.swapproperty! +Base.modifyproperty! Base.propertynames Base.hasproperty Core.getfield Core.setfield! +Core.modifyfield! +Core.replacefield! +Core.swapfield! Core.isdefined Core.getglobal Core.setglobal! @@ -234,6 +240,7 @@ Core.Tuple Core.NTuple Core.NamedTuple Base.@NamedTuple +Base.@Kwargs Base.Val Core.Vararg Core.Nothing @@ -285,6 +292,8 @@ Base.@inline Base.@noinline Base.@nospecialize Base.@specialize +Base.@nospecializeinfer +Base.@constprop Base.gensym Base.@gensym var"name" @@ -453,6 +462,7 @@ Base.identify_package Base.locate_package Base.require Base.compilecache +Base.isprecompiled ``` ## Internals diff --git a/doc/src/base/c.md b/doc/src/base/c.md index bdc64fa6d98b4..e221a6432542f 100644 --- a/doc/src/base/c.md +++ b/doc/src/base/c.md @@ -10,6 +10,9 @@ Base.unsafe_convert Base.cconvert Base.unsafe_load Base.unsafe_store! +Base.unsafe_modify! +Base.unsafe_replace! +Base.unsafe_swap! Base.unsafe_copyto!{T}(::Ptr{T}, ::Ptr{T}, ::Any) Base.unsafe_copyto!{T}(::Array{T}, ::Any, ::Array{T}, ::Any, ::Any) Base.copyto! diff --git a/doc/src/base/libc.md b/doc/src/base/libc.md index 0af1b74a79a71..08d2670123234 100644 --- a/doc/src/base/libc.md +++ b/doc/src/base/libc.md @@ -4,6 +4,10 @@ Base.Libc.malloc Base.Libc.calloc Base.Libc.realloc +Base.Libc.memcpy +Base.Libc.memmove +Base.Libc.memset +Base.Libc.memcmp Base.Libc.free Base.Libc.errno Base.Libc.strerror diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md index fb75b21479707..45a60b14d541a 100644 --- a/doc/src/base/multi-threading.md +++ b/doc/src/base/multi-threading.md @@ -17,6 +17,10 @@ See also [Multi-Threading](@ref man-multithreading). ## Atomic operations +```@docs +atomic +``` + ```@docs Base.@atomic Base.@atomicswap diff --git a/doc/src/devdocs/reflection.md b/doc/src/base/reflection.md similarity index 100% rename from doc/src/devdocs/reflection.md rename to doc/src/base/reflection.md diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md index 9fd03ad9a667a..1a11a5918d091 100644 --- a/doc/src/devdocs/ast.md +++ b/doc/src/devdocs/ast.md @@ -438,10 +438,6 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form. Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`. - * `undefcheck` - - Temporary node inserted by the compiler and will be processed in `type_lift_pass!`. - * `enter` Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md index 0935257526885..7acd32f04dc75 100644 --- a/doc/src/devdocs/boundscheck.md +++ b/doc/src/devdocs/boundscheck.md @@ -28,10 +28,10 @@ end With a custom array-like type `MyArray` having: ```julia -@inline getindex(A::MyArray, i::Real) = (@boundscheck checkbounds(A,i); A.data[to_index(i)]) +@inline getindex(A::MyArray, i::Real) = (@boundscheck checkbounds(A, i); A.data[to_index(i)]) ``` -Then when `getindex` is inlined into `sum`, the call to `checkbounds(A,i)` will be elided. If +Then when `getindex` is inlined into `sum`, the call to `checkbounds(A, i)` will be elided. If your function contains multiple layers of inlining, only `@boundscheck` blocks at most one level of inlining deeper are eliminated. The rule prevents unintended changes in program behavior from code further up the stack. @@ -57,7 +57,7 @@ with [`OffsetArrays`](@ref man-custom-indices): ```julia-repl julia> using OffsetArrays -julia> sum(OffsetArray([1,2,3], -10)) +julia> sum(OffsetArray([1, 2, 3], -10)) 9164911648 # inconsistent results or segfault ``` @@ -123,4 +123,4 @@ the last argument). ## Emit bounds checks -Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect @inbounds declarations. +Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect `@inbounds` declarations. diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md index 9268da32d9c26..747ee25d22a04 100644 --- a/doc/src/devdocs/build/arm.md +++ b/doc/src/devdocs/build/arm.md @@ -68,6 +68,16 @@ Compilation on `ARMv8-A` requires that `Make.user` is configured as follows: MCPU=armv8-a ``` +Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions. +Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like +``` +JIT session error: Cannot allocate memory +``` +Should this happen, ask your system administrator to increase the limit of memory mappings for example with the command +``` +sysctl -w vm.max_map_count=262144 +``` + ### nVidia Jetson TX2 Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html) diff --git a/doc/src/devdocs/eval.md b/doc/src/devdocs/eval.md index 6a153c67daa13..8f2fd68159676 100644 --- a/doc/src/devdocs/eval.md +++ b/doc/src/devdocs/eval.md @@ -18,6 +18,9 @@ function, and primitive function, before turning into the desired result (hopefu Abstract Syntax Tree The AST is the digital representation of the code structure. In this form the code has been tokenized for meaning so that it is more suitable for manipulation and execution. + +![Diagram of the compiler flow](./img/compiler_diagram.png) + ## Julia Execution The 10,000 foot view of the whole process is as follows: diff --git a/doc/src/devdocs/img/compiler_diagram.png b/doc/src/devdocs/img/compiler_diagram.png new file mode 100644 index 0000000000000..5c4b780bca455 Binary files /dev/null and b/doc/src/devdocs/img/compiler_diagram.png differ diff --git a/doc/src/devdocs/img/compiler_diagram.svg b/doc/src/devdocs/img/compiler_diagram.svg new file mode 100644 index 0000000000000..f8fb9172a788f --- /dev/null +++ b/doc/src/devdocs/img/compiler_diagram.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/doc/src/devdocs/jit.md b/doc/src/devdocs/jit.md new file mode 100644 index 0000000000000..f33b968ad3948 --- /dev/null +++ b/doc/src/devdocs/jit.md @@ -0,0 +1,78 @@ +# JIT Design and Implementation + +This document explains the design and implementation of Julia's JIT, after codegen has finished and unoptimized LLVM IR has been produced. The JIT is responsible for optimizing and compiling this IR to machine code, and for linking it into the current process and making the code available for execution. + +## Introduction + +The JIT is responsible for managing compilation resources, looking up previously compiled code, and compiling new code. It is primarily built on LLVM's [On-Request-Compilation](https://llvm.org/docs/ORCv2.html) (ORCv2) technology, which provides support for a number of useful features such as concurrent compilation, lazy compilation, and the ability to compile code in a separate process. Though LLVM provides a basic JIT compiler in the form of LLJIT, Julia uses many ORCv2 APIs directly to create its own custom JIT compiler. + +## Overview + +![Diagram of the compiler flow](./img/compiler_diagram.png) + +Codegen produces an LLVM module containing IR for one or more Julia functions from the original Julia SSA IR produced by type inference (labeled as translate on the compiler diagram above). It also produces a mapping of code-instance to LLVM function name. However, though some optimizations have been applied by the Julia-based compiler on Julia IR, the LLVM IR produced by codegen still contains many opportunities for optimization. Thus, the first step the JIT takes is to run a target-independent optimization pipeline[^tdp] on the LLVM module. Then, the JIT runs a target-dependent optimization pipeline, which includes target-specific optimizations and code generation, and outputs an object file. Finally, the JIT links the resulting object file into the current process and makes the code available for execution. All of this is controlled by code in `src/jitlayers.cpp`. + +[^tdp]: This is not a totally-target independent pipeline, as transformations such as vectorization rely upon target information such as vector register width and cost modeling. Additionally, codegen itself makes a few target-dependent assumptions, and the optimization pipeline will take advantage of that knowledge. + +Currently, only one thread at a time is permitted to enter the optimize-compile-link pipeline at a time, due to restrictions imposed by one of our linkers (RuntimeDyld). However, the JIT is designed to support concurrent optimization and compilation, and the linker restriction is expected to be lifted in the future when RuntimeDyld has been fully superseded on all platforms. + +## Optimization Pipeline + +The optimization pipeline is based off LLVM's new pass manager, but the pipeline is customized for Julia's needs. The pipeline is defined in `src/pipeline.cpp`, and broadly proceeds through a number of stages as detailed below. + +1. Early Simplification + 1. These passes are mainly used to simplify the IR and canonicalize patterns so that later passes can identify those patterns more easily. Additionally, various intrinsic calls such as branch prediction hints and annotations are lowered into other metadata or other IR features. [`SimplifyCFG`](https://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg) (simplify control flow graph), [`DCE`](https://llvm.org/docs/Passes.html#dce-dead-code-elimination) (dead code elimination), and [`SROA`](https://llvm.org/docs/Passes.html#sroa-scalar-replacement-of-aggregates) (scalar replacement of aggregates) are some of the key players here. +2. Early Optimization + 1. These passes are typically cheap and are primarily focused around reducing the number of instructions in the IR and propagating knowledge to other instructions. For example, [`EarlyCSE`](https://en.wikipedia.org/wiki/Common_subexpression_elimination) is used to perform common subexpression elimination, and [`InstCombine`](https://llvm.org/docs/Passes.html#instcombine-combine-redundant-instructions) and [`InstSimplify`](https://llvm.org/doxygen/classllvm_1_1InstSimplifyPass.html#details) perform a number of small peephole optimizations to make operations less expensive. +3. Loop Optimization + 1. These passes canonicalize and simplify loops. Loops are often hot code, which makes loop optimization extremely important for performance. Key players here include [`LoopRotate`](https://llvm.org/docs/Passes.html#loop-rotate-rotate-loops), [`LICM`](https://llvm.org/docs/Passes.html#licm-loop-invariant-code-motion), and [`LoopFullUnroll`](https://llvm.org/docs/Passes.html#loop-unroll-unroll-loops). Some bounds check elimination also happens here, as a result of the [`IRCE`](https://llvm.org/doxygen/InductiveRangeCheckElimination_8cpp_source.html) pass which can prove certain bounds are never exceeded. +4. Scalar Optimization + 1. The scalar optimization pipeline contains a number of more expensive, but more powerful passes such as [`GVN`](https://llvm.org/docs/Passes.html#gvn-global-value-numbering) (global value numbering), [`SCCP`](https://llvm.org/docs/Passes.html#sccp-sparse-conditional-constant-propagation) (sparse conditional constant propagation), and another round of bounds check elimination. These passes are expensive, but they can often remove large amounts of code and make vectorization much more successful and effective. Several other simplification and optimization passes intersperse the more expensive ones to reduce the amount of work they have to do. +5. Vectorization + 1. [Automatic vectorization](https://en.wikipedia.org/wiki/Automatic_vectorization) is an extremely powerful transformation for CPU-intensive code. Briefly, vectorization allows execution of a [single instruction on multiple data](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) (SIMD), e.g. performing 8 addition operations at the same time. However, proving code to be both capable of vectorization and profitable to vectorize is difficult, and this relies heavily on the prior optimization passes to massage the IR into a state where vectorization is worth it. +6. Intrinsic Lowering + 1. Julia inserts a number of custom intrinsics, for reasons such as object allocation, garbage collection, and exception handling. These intrinsics were originally placed to make optimization opportunities more obvious, but they are now lowered into LLVM IR to enable the IR to be emitted as machine code. +7. Cleanup + 1. These passes are last-chance optimizations, and perform small optimizations such as fused multiply-add propagation and division-remainder simplification. Additionally, targets that do not support half-precision floating point numbers will have their half-precision instructions lowered into single-precision instructions here, and passes are added to provide sanitizer support. + +## Target-Dependent Optimization and Code Generation + +LLVM provides target-dependent optimization and machine code generation in the same pipeline, located in the TargetMachine for a given platform. These passes include instruction selection, instruction scheduling, register allocation, and machine code emission. The LLVM documentation provides a good overview of the process, and the LLVM source code is the best place to look for details on the pipeline and passes. + +## Linking + +Currently, Julia is transitioning between two linkers: the older RuntimeDyld linker, and the newer [JITLink](https://llvm.org/docs/JITLink.html) linker. JITLink contains a number of features that RuntimeDyld does not have, such as concurrent and reentrant linking, but currently lacks good support for profiling integrations and does not yet support all of the platforms that RuntimeDyld supports. Over time, JITLink is expected to replace RuntimeDyld entirely. Further details on JITLink can be found in the LLVM documentation. + +## Execution + +Once the code has been linked into the current process, it is available for execution. This fact is made known to the generating codeinst by updating the `invoke`, `specsigflags`, and `specptr` fields appropriately. Codeinsts support upgrading `invoke`, `specsigflags`, and `specptr` fields, so long as every combination of these fields that exists at any given point in time is valid to be called. This allows the JIT to update these fields without invalidating existing codeinsts, supporting a potential future concurrent JIT. Specifically, the following states may be valid: +1. `invoke` is NULL, `specsigflags` is 0b00, `specptr` is NULL + 1. This is the initial state of a codeinst, and indicates that the codeinst has not yet been compiled. +2. `invoke` is non-null, `specsigflags` is 0b00, `specptr` is NULL + 1. This indicates that the codeinst was not compiled with any specialization, and that the codeinst should be invoked directly. Note that in this instance, `invoke` does not read either the `specsigflags` or `specptr` fields, and therefore they may be modified without invalidating the `invoke` pointer. +3. `invoke` is non-null, `specsigflags` is 0b10, `specptr` is non-null + 1. This indicates that the codeinst was compiled, but a specialized function signature was deemed unnecessary by codegen. +4. `invoke` is non-null, `specsigflags` is 0b11, `specptr` is non-null + 1. This indicates that the codeinst was compiled, and a specialized function signature was deemed necessary by codegen. The `specptr` field contains a pointer to the specialized function signature. The `invoke` pointer is permitted to read both `specsigflags` and `specptr` fields. + +In addition, there are a number of different transitional states that occur during the update process. To account for these potential situations, the following write and read patterns should be used when dealing with these codeinst fields. + +1. When writing `invoke`, `specsigflags`, and `specptr`: + 1. Perform an atomic compare-exchange operation of specptr assuming the old value was NULL. This compare-exchange operation should have at least acquire-release ordering, to provide ordering guarantees of the remaining memory operations in the write. + 2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written. + 3. Write the new low bit of `specsigflags` to its final value. This may be a relaxed write. + 4. Write the new `invoke` pointer to its final value. This must have at least a release memory ordering to synchronize with reads of `invoke`. + 5. Set the second bit of `specsigflags` to 1. This must be at least a release memory ordering to synchronize with reads of `specsigflags`. This step completes the write operation and announces to all other threads that all fields have been set. +2. When reading all of `invoke`, `specsigflags`, and `specptr`: + 1. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`. + 2. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL. + 3. Read the `specptr` field with at least an acquire memory ordering. + 4. If `specptr` is NULL, then the `initial_invoke` pointer must not be relying on `specptr` to guarantee correct execution. Therefore, `invoke` is non-null, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL. + 5. If `specptr` is non-null, then `initial_invoke` might not be the final `invoke` field that uses `specptr`. This can occur if `specptr` has been written, but `invoke` has not yet been written. Therefore, spin on the second bit of `specsigflags` until it is set to 1 with at least acquire memory ordering. + 6. Re-read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `final_invoke`. + 7. Read the `specsigflags` field with any memory ordering. + 8. `invoke` is `final_invoke`, `specsigflags` is the value read in step 7, `specptr` is the value read in step 3. +3. When updating a `specptr` to a different but equivalent function pointer: + 1. Perform a release store of the new function pointer to `specptr`. Races here must be benign, as the old function pointer is required to still be valid, and any new ones are also required to be valid as well. Once a pointer has been written to `specptr`, it must always be callable whether or not it is later overwritten. + +Although these write, read, and update steps are complicated, they ensure that the JIT can update codeinsts without invalidating existing codeinsts, and that the JIT can update codeinsts without invalidating existing `invoke` pointers. This allows the JIT to potentially reoptimize functions at higher optimization levels in the future, and also will allow the JIT to support concurrent compilation of functions in the future. diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md index 93b241d703714..4e5e90d7cdbc6 100644 --- a/doc/src/devdocs/llvm.md +++ b/doc/src/devdocs/llvm.md @@ -82,9 +82,39 @@ Here are example settings using `bash` syntax: * `export JULIA_LLVM_ARGS=-debug-only=loop-vectorize` dumps LLVM `DEBUG(...)` diagnostics for loop vectorizer. If you get warnings about "Unknown command line argument", rebuild LLVM with `LLVM_ASSERTIONS = 1`. - * `export JULIA_LLVM_ARGS=-help` shows a list of available options. + * `export JULIA_LLVM_ARGS=-help` shows a list of available options. `export JULIA_LLVM_ARGS=-help-hidden` shows even more. * `export JULIA_LLVM_ARGS="-fatal-warnings -print-options"` is an example how to use multiple options. +### Useful `JULIA_LLVM_ARGS` parameters + * `-print-after=PASS`: prints the IR after any execution of `PASS`, useful for checking changes done by a pass. + * `-print-before=PASS`: prints the IR before any execution of `PASS`, useful for checking the input to a pass. + * `-print-changed`: prints the IR whenever a pass changes the IR, useful for narrowing down which passes are causing problems. + * `-print-(before|after)=MARKER-PASS`: the Julia pipeline ships with a number of marker passes in the pipeline, which can be used to identify where problems or optimizations are occurring. A marker pass is defined as a pass which appears once in the pipeline and performs no transformations on the IR, and is only useful for targeting print-before/print-after. Currently, the following marker passes exist in the pipeline: + * BeforeOptimization + * BeforeEarlySimplification + * AfterEarlySimplification + * BeforeEarlyOptimization + * AfterEarlyOptimization + * BeforeLoopOptimization + * BeforeLICM + * AfterLICM + * BeforeLoopSimplification + * AfterLoopSimplification + * AfterLoopOptimization + * BeforeScalarOptimization + * AfterScalarOptimization + * BeforeVectorization + * AfterVectorization + * BeforeIntrinsicLowering + * AfterIntrinsicLowering + * BeforeCleanup + * AfterCleanup + * AfterOptimization + * `-time-passes`: prints the time spent in each pass, useful for identifying which passes are taking a long time. + * `-print-module-scope`: used in conjunction with `-print-(before|after)`, gets the entire module rather than the IR unit received by the pass + * `-debug`: prints out a lot of debugging information throughout LLVM + * `-debug-only=NAME`, prints out debugging statements from files with `DEBUG_TYPE` defined to `NAME`, useful for getting additional context about a problem + ## Debugging LLVM transformations in isolation On occasion, it can be useful to debug LLVM's transformations in isolation from diff --git a/doc/src/devdocs/pkgimg.md b/doc/src/devdocs/pkgimg.md index f97fc36750f18..d9fc1a33a4d24 100644 --- a/doc/src/devdocs/pkgimg.md +++ b/doc/src/devdocs/pkgimg.md @@ -1,4 +1,4 @@ -# Package Images +# [Package Images](@id pkgimages) Julia package images provide object (native code) caches for Julia packages. They are similar to Julia's [system image](@ref dev-sysimg) and support many of the same features. @@ -32,7 +32,7 @@ To avoid having to deal with `link.exe` we use `-flavor gnu`, effectively turnin Dynamic libraries on macOS need to link against `-lSystem`. On recent macOS versions, `-lSystem` is only available for linking when Xcode is available. To that effect we link with `-undefined dynamic_lookup`. -## Package images optimized for multiple microarchitectures +## [Package images optimized for multiple microarchitectures](@id pkgimgs-multi-versioning) Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogenous environment, with a unified cache, you can set the environment variable `JULIA_CPU_TARGET=generic` to multi-version the object caches. diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md index 6706e30ce97b1..40fcd3fa602f8 100644 --- a/doc/src/devdocs/sysimg.md +++ b/doc/src/devdocs/sysimg.md @@ -42,6 +42,9 @@ All features supported by LLVM are supported and a feature can be disabled with (`+` prefix is also allowed and ignored to be consistent with LLVM syntax). Additionally, a few special features are supported to control the function cloning behavior. +!!! note + It is good practice to specify either `clone_all` or `base()` for every target apart from the first one. This makes it explicit which targets have all functions cloned, and which targets are based on other targets. If this is not done, the default behavior is to not clone every function, and to use the first target's function definition as the fallback when not cloning a function. + 1. `clone_all` By default, only functions that are the most likely to benefit from diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md index f9e60d83ff052..0b4532e1b423d 100644 --- a/doc/src/manual/arrays.md +++ b/doc/src/manual/arrays.md @@ -107,7 +107,7 @@ where no arguments are given. [Array literal can be typed](@ref man-array-typed- the syntax `T[A, B, C, ...]` where `T` is a type. ```jldoctest -julia> [1,2,3] # An array of `Int`s +julia> [1, 2, 3] # An array of `Int`s 3-element Vector{Int64}: 1 2 @@ -326,8 +326,8 @@ These syntaxes are shorthands for function calls that themselves are convenience | Syntax | Function | Description | |:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- | | | [`cat`](@ref) | concatenate input arrays along dimension(s) `k` | -| `[A; B; C; ...]` | [`vcat`](@ref) | shorthand for `cat(A...; dims=1) | -| `[A B C ...]` | [`hcat`](@ref) | shorthand for `cat(A...; dims=2) | +| `[A; B; C; ...]` | [`vcat`](@ref) | shorthand for `cat(A...; dims=1)` | +| `[A B C ...]` | [`hcat`](@ref) | shorthand for `cat(A...; dims=2)` | | `[A B; C D; ...]` | [`hvcat`](@ref) | simultaneous vertical and horizontal concatenation | | `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate | @@ -356,7 +356,7 @@ Comprehensions provide a general and powerful way to construct arrays. Comprehen similar to set construction notation in mathematics: ``` -A = [ F(x,y,...) for x=rx, y=ry, ... ] +A = [ F(x, y, ...) for x=rx, y=ry, ... ] ``` The meaning of this form is that `F(x,y,...)` is evaluated with the variables `x`, `y`, etc. taking @@ -440,7 +440,7 @@ Ranges in generators and comprehensions can depend on previous ranges by writing keywords: ```jldoctest -julia> [(i,j) for i=1:3 for j=1:i] +julia> [(i, j) for i=1:3 for j=1:i] 6-element Vector{Tuple{Int64, Int64}}: (1, 1) (2, 1) @@ -455,7 +455,7 @@ In such cases, the result is always 1-d. Generated values can be filtered using the `if` keyword: ```jldoctest -julia> [(i,j) for i=1:3 for j=1:i if i+j == 4] +julia> [(i, j) for i=1:3 for j=1:i if i+j == 4] 2-element Vector{Tuple{Int64, Int64}}: (2, 2) (3, 1) @@ -740,17 +740,17 @@ that is sometimes referred to as pointwise indexing. For example, it enables accessing the diagonal elements from the first "page" of `A` from above: ```jldoctest cartesianindex -julia> page = A[:,:,1] +julia> page = A[:, :, 1] 4×4 Matrix{Int64}: 1 5 9 13 2 6 10 14 3 7 11 15 4 8 12 16 -julia> page[[CartesianIndex(1,1), - CartesianIndex(2,2), - CartesianIndex(3,3), - CartesianIndex(4,4)]] +julia> page[[CartesianIndex(1, 1), + CartesianIndex(2, 2), + CartesianIndex(3, 3), + CartesianIndex(4, 4)]] 4-element Vector{Int64}: 1 6 @@ -964,7 +964,7 @@ construct, `i` will be an `Int` if `A` is an array type with fast linear indexin it will be a `CartesianIndex`: ```jldoctest -julia> A = rand(4,3); +julia> A = rand(4, 3); julia> B = view(A, 1:3, 2:3); @@ -1029,9 +1029,9 @@ sizes, such as adding a vector to each column of a matrix. An inefficient way to be to replicate the vector to the size of the matrix: ```julia-repl -julia> a = rand(2,1); A = rand(2,3); +julia> a = rand(2, 1); A = rand(2, 3); -julia> repeat(a,1,3)+A +julia> repeat(a, 1, 3) + A 2×3 Array{Float64,2}: 1.20813 1.82068 1.25387 1.56851 1.86401 1.67846 @@ -1153,9 +1153,9 @@ arranged contiguously in column major order. This means that the stride of the f dimension — the spacing between elements in the same column — is `1`: ```julia-repl -julia> A = rand(5,7,2); +julia> A = rand(5, 7, 2); -julia> stride(A,1) +julia> stride(A, 1) 1 ``` diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md index eab901adc2043..7b889589c592d 100644 --- a/doc/src/manual/calling-c-and-fortran-code.md +++ b/doc/src/manual/calling-c-and-fortran-code.md @@ -1000,7 +1000,7 @@ A table of translations between the macro and function interfaces is given below ## [Calling Convention](@id calling-convention) -The second argument to `ccall` (immediatel preceding return type) can optionally +The second argument to `ccall` (immediately preceding return type) can optionally be a calling convention specifier (the `@ccall` macro currently does not support giving a calling convention). Without any specifier, the platform-default C calling convention is used. Other supported conventions are: `stdcall`, `cdecl`, diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md index d3806ee180f32..743ee83c333a4 100644 --- a/doc/src/manual/code-loading.md +++ b/doc/src/manual/code-loading.md @@ -370,7 +370,7 @@ FooExt = "ExtDep" ... ``` -The keys under `extensions` are the name of the extensions. +The keys under `extensions` are the names of the extensions. They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded. If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity. The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 8164299f01250..e1651c61a3ec3 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -1,4 +1,4 @@ -# Command-line Interface +# [Command-line Interface](@id cli) ## Using arguments inside scripts diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md index ac5a6fad6cc08..eb26063a5e61e 100644 --- a/doc/src/manual/environment-variables.md +++ b/doc/src/manual/environment-variables.md @@ -376,6 +376,53 @@ should have at the terminal. The formatting `Base.answer_color()` (default: normal, `"\033[0m"`) that output should have at the terminal. +## System and Package Image Building + +### `JULIA_CPU_TARGET` + +Modify the target machine architecture for (pre)compiling +[system](@ref sysimg-multi-versioning) and [package images](@ref pkgimgs-multi-versioning). +`JULIA_CPU_TARGET` only affects machine code image generation being output to a disk cache. +Unlike the `--cpu-target`, or `-C`, [command line option](@ref cli), it does not influence +just-in-time (JIT) code generation within a Julia session where machine code is only +stored in memory. + +Valid values for `JULIA_CPU_TARGET` can be obtained by executing `julia -C help`. + +Setting `JULIA_CPU_TARGET` is important for heterogeneous compute systems where processors of +distinct types or features may be present. This is commonly encountered in high performance +computing (HPC) clusters since the component nodes may be using distinct processors. + +The CPU target string is a list of strings separated by `;` each string starts with a CPU +or architecture name and followed by an optional list of features separated by `,`. +A `generic` or empty CPU name means the basic required feature set of the target ISA +which is at least the architecture the C/C++ runtime is compiled with. Each string +is interpreted by LLVM. + +A few special features are supported: +1. `clone_all` + + This forces the target to have all functions in sysimg cloned. + When used in negative form (i.e. `-clone_all`), this disables full clone that's + enabled by default for certain targets. + +2. `base([0-9]*)` + + This specifies the (0-based) base target index. The base target is the target + that the current target is based on, i.e. the functions that are not being cloned + will use the version in the base target. This option causes the base target to be + fully cloned (as if `clone_all` is specified for it) if it is not the default target (0). + The index can only be smaller than the current index. + +3. `opt_size` + + Optimize for size with minimum performance impact. Clang/GCC's `-Os`. + +4. `min_size` + + Optimize only for size. Clang's `-Oz`. + + ## Debugging and profiling ### `JULIA_DEBUG` diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index e3960ee1a4690..bdecb5ecf106f 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -22,11 +22,11 @@ On the other hand, language *interoperability* is extremely useful: we want to e ### How does Julia define its public API? -The only interfaces that are stable with respect to [SemVer](https://semver.org/) of `julia` -version are the Julia `Base` and standard libraries interfaces described in -[the documentation](https://docs.julialang.org/) and not marked as unstable (e.g., -experimental and internal). Functions, types, and constants are not part of the public -API if they are not included in the documentation, _even if they have docstrings_. +Julia `Base` and standard library functionality described in the +[the documentation](https://docs.julialang.org/) that is not marked as unstable +(e.g. experimental and internal) is covered by [SemVer](https://semver.org/). +Functions, types, and constants are not part of the public API if they are not +included in the documentation, _even if they have docstrings_. ### There is a useful undocumented function/type/constant. Can I use it? @@ -36,8 +36,8 @@ a complex non-public API, especially when using it from a stable package, it is to open an [issue](https://github.com/JuliaLang/julia/issues) or [pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it into a public API. However, we do not discourage the attempt to create packages that expose -stable public interfaces while relying on non-public implementation details of `julia` and -buffering the differences across different `julia` versions. +stable public interfaces while relying on non-public implementation details of Julia and +buffering the differences across different Julia versions. ### The documentation is not accurate enough. Can I rely on the existing behavior? @@ -94,6 +94,9 @@ When a file is run as the main script using `julia file.jl` one might want to ac functionality like command line argument handling. A way to determine that a file is run in this fashion is to check if `abspath(PROGRAM_FILE) == @__FILE__` is `true`. +However, it is recommended to not write files that double as a script and as an importable library. +If one needs functionality both available as a library and a script, it is better to write is as a library, then import the functionality into a distinct script. + ### [How do I catch CTRL-C in a script?](@id catch-ctrl-c) Running a Julia script using `julia file.jl` does not throw diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md index 16dab24afecf9..e972788022de6 100644 --- a/doc/src/manual/getting-started.md +++ b/doc/src/manual/getting-started.md @@ -34,7 +34,7 @@ command: $ julia script.jl ``` -You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available options can be found under [Command-line Interface](@ref). +You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available options can be found under [Command-line Interface](@ref cli). ## Resources diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md index be64390e473f2..056ceb1363fd7 100644 --- a/doc/src/manual/multi-threading.md +++ b/doc/src/manual/multi-threading.md @@ -239,6 +239,68 @@ julia> a Note that [`Threads.@threads`](@ref) does not have an optional reduction parameter like [`@distributed`](@ref). +### Using `@threads` without data races + +Taking the example of a naive sum + +```julia-repl +julia> function sum_single(a) + s = 0 + for i in a + s += i + end + s + end +sum_single (generic function with 1 method) + +julia> sum_single(1:1_000_000) +500000500000 +``` + +Simply adding `@threads` exposes a data race with multiple threads reading and writing `s` at the same time. +```julia-repl +julia> function sum_multi_bad(a) + s = 0 + Threads.@threads for i in a + s += i + end + s + end +sum_multi_bad (generic function with 1 method) + +julia> sum_multi_bad(1:1_000_000) +70140554652 +``` + +Note that the result is not `500000500000` as it should be, and will most likely change each evaluation. + +To fix this, buffers that are specific to the task may be used to segment the sum into chunks that are race-free. +Here `sum_single` is reused, with its own internal buffer `s`, and vector `a` is split into `nthreads()` +chunks for parallel work via `nthreads()` `@spawn`-ed tasks. + +```julia-repl +julia> function sum_multi_good(a) + chunks = Iterators.partition(a, length(a) ÷ Threads.nthreads()) + tasks = map(chunks) do chunk + Threads.@spawn sum_single(chunk) + end + chunk_sums = fetch.(tasks) + return sum_single(chunk_sums) + end +sum_multi_good (generic function with 1 method) + +julia> sum_multi_good(1:1_000_000) +500000500000 +``` +!!! Note + Buffers should not be managed based on `threadid()` i.e. `buffers = zeros(Threads.nthreads())` because concurrent tasks + can yield, meaning multiple concurrent tasks may use the same buffer on a given thread, introducing risk of data races. + Further, when more than one thread is available tasks may change thread at yield points, which is known as + [task migration](@ref man-task-migration). + +Another option is the use of atomic operations on variables shared across tasks/threads, which may be more performant +depending on the characteristics of the operations. + ## Atomic Operations Julia supports accessing and modifying values *atomically*, that is, in a thread-safe way to avoid @@ -388,6 +450,20 @@ threads in Julia: This may require some transitional work across the ecosystem before threading can be widely adopted with confidence. See the next section for further details. +## [Task Migration](@id man-task-migration) + +After a task starts running on a certain thread it may move to a different thread if the task yields. + +Such tasks may have been started with [`@spawn`](@ref Threads.@spawn) or [`@threads`](@ref Threads.@threads), +although the `:static` schedule option for `@threads` does freeze the threadid. + +This means that in most cases [`threadid()`](@ref Threads.threadid) should not be treated as constant within a task, +and therefore should not be used to index into a vector of buffers or stateful objects. + +!!! compat "Julia 1.7" + Task migration was introduced in Julia 1.7. Before this tasks always remained on the same thread that they were + started on. + ## Safe use of Finalizers Because finalizers can interrupt any code, they must be very careful in how diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md index 1ee2f33de5c23..00a10177b2155 100644 --- a/doc/src/manual/networking-and-streams.md +++ b/doc/src/manual/networking-and-streams.md @@ -120,7 +120,28 @@ of common properties. ## Working with Files -Like many other environments, Julia has an [`open`](@ref) function, which takes a filename and +You can write content to a file with the `write(filename::String, content)` method: + +```julia-repl +julia> write("hello.txt", "Hello, World!") +13 +``` + +_(`13` is the number of bytes written.)_ + +You can read the contents of a file with the `read(filename::String)` method, or `read(filename::String, String)` +to the contents as a string: + +```julia-repl +julia> read("hello.txt", String) +"Hello, World!" +``` + + +### Advanced: streaming files + +The `read` and `write` methods above allow you to read and write file contents. Like many other +environments, Julia also has an [`open`](@ref) function, which takes a filename and returns an [`IOStream`](@ref) object that you can use to read and write things from the file. For example, if we have a file, `hello.txt`, whose contents are `Hello, World!`: diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index ffb84333e8e78..c86630ce2a8f1 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -1631,3 +1631,32 @@ will not require this degree of programmer annotation to attain performance. In the mean time, some user-contributed packages like [FastClosures](https://github.com/c42f/FastClosures.jl) automate the insertion of `let` statements as in `abmult3`. + +## [Multithreading and linear algebra](@id man-multithreading-linear-algebra) + +This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations. +Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded. +In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading. + +Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`. +It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`). +Its current value can be accessed with `BLAS.get_num_threads()`. + +When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.). +However, it is generally recommended to check and set the value manually. +The OpenBLAS behavior is as follows: + +* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation. +* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads. + +When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`. +Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \ + -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/" + +$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ +$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ @@ -417,13 +426,13 @@ libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_ libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS) -$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT) +$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) +$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(CG_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-codegen-debug.$(SHLIB_EXT) $@ @@ -450,9 +459,10 @@ $(CODEGEN_OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN $(CODEGEN_DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN clean: - -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* - -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc - -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a + -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* + -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc + -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen + -rm -f $(BUILDDIR)/julia.expmap -rm -f $(BUILDDIR)/julia_version.h $(MMTK_OBJS) $(MMTK_DOBJS) clean-flisp: @@ -507,36 +517,36 @@ clang-sagc-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .F @$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \ -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c $<) + $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c $<) clang-sagc-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \ -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c++ $<) + $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c++ $<) clang-sa-%: JL_CXXFLAGS += -UNDEBUG clang-sa-%: $(SRCDIR)/%.c .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \ -Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c $<) + $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c $<) clang-sa-%: $(SRCDIR)/%.cpp .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \ -Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c++ $<) + $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c++ $<) clang-tidy-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \ -load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \ - -- $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -fno-caret-diagnostics -x c) + -- $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -fno-caret-diagnostics -x c) clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \ -load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \ - -- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++) + -- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++) # set the exports for the source files based on where they are getting linked -clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS += -DJL_LIBRARY_EXPORTS +clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS # Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc` tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS))) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index cf6378b4f926b..1f02a014175b4 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -305,6 +305,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second)); params.params = cgparams; params.imaging = imaging; + params.debug_level = jl_options.debug_level; params.external_linkage = _external_linkage; size_t compile_for[] = { jl_typeinf_world, _world }; for (int worlds = 0; worlds < 2; worlds++) { @@ -951,10 +952,15 @@ struct ShardTimers { void emitFloat16Wrappers(Module &M, bool external); +struct AOTOutputs { + SmallVector unopt, opt, obj, asm_; +}; + // Perform the actual optimization and emission of the output files -static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *outputs, const std::string *names, - NewArchiveMember *unopt, NewArchiveMember *opt, NewArchiveMember *obj, NewArchiveMember *asm_, - ShardTimers &timers, unsigned shardidx) { +static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimers &timers, + bool unopt, bool opt, bool obj, bool asm_) { + assert((unopt || opt || obj || asm_) && "no output requested"); + AOTOutputs out; auto TM = std::unique_ptr( SourceTM.getTarget().createTargetMachine( SourceTM.getTargetTriple().str(), @@ -967,17 +973,16 @@ static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *out if (unopt) { timers.unopt.startTimer(); - raw_string_ostream OS(*outputs); + raw_svector_ostream OS(out.unopt); PassBuilder PB; AnalysisManagers AM{*TM, PB, OptimizationLevel::O0}; ModulePassManager MPM; MPM.addPass(BitcodeWriterPass(OS)); MPM.run(M, AM.MAM); - *unopt = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.unopt.stopTimer(); } if (!opt && !obj && !asm_) { - return; + return out; } assert(!verifyModule(M, &errs())); @@ -1036,43 +1041,38 @@ static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *out if (opt) { timers.opt.startTimer(); - raw_string_ostream OS(*outputs); + raw_svector_ostream OS(out.opt); PassBuilder PB; AnalysisManagers AM{*TM, PB, OptimizationLevel::O0}; ModulePassManager MPM; MPM.addPass(BitcodeWriterPass(OS)); MPM.run(M, AM.MAM); - *opt = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.opt.stopTimer(); } if (obj) { timers.obj.startTimer(); - SmallVector Buffer; - raw_svector_ostream OS(Buffer); + raw_svector_ostream OS(out.obj); legacy::PassManager emitter; addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false)) jl_safe_printf("ERROR: target does not support generation of object files\n"); emitter.run(M); - *outputs = { Buffer.data(), Buffer.size() }; - *obj = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.obj.stopTimer(); } if (asm_) { timers.asm_.startTimer(); - SmallVector Buffer; - raw_svector_ostream OS(Buffer); + raw_svector_ostream OS(out.asm_); legacy::PassManager emitter; addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false)) jl_safe_printf("ERROR: target does not support generation of assembly files\n"); emitter.run(M); - *outputs = { Buffer.data(), Buffer.size() }; - *asm_ = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.asm_.stopTimer(); } + + return out; } // serialize module to bitcode @@ -1232,20 +1232,12 @@ static void dropUnusedGlobals(Module &M) { // Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading, // as well as partitioning, serialization, and deserialization. -static void add_output(Module &M, TargetMachine &TM, std::vector &outputs, StringRef name, - std::vector &unopt, std::vector &opt, - std::vector &obj, std::vector &asm_, - bool unopt_out, bool opt_out, bool obj_out, bool asm_out, - unsigned threads, ModuleInfo module_info) { - unsigned outcount = unopt_out + opt_out + obj_out + asm_out; - assert(outcount); - outputs.resize(outputs.size() + outcount * threads * 2); - auto names_start = outputs.data() + outputs.size() - outcount * threads * 2; - auto outputs_start = names_start + outcount * threads; - unopt.resize(unopt.size() + unopt_out * threads); - opt.resize(opt.size() + opt_out * threads); - obj.resize(obj.size() + obj_out * threads); - asm_.resize(asm_.size() + asm_out * threads); +template +static SmallVector add_output(Module &M, TargetMachine &TM, StringRef name, unsigned threads, + bool unopt_out, bool opt_out, bool obj_out, bool asm_out, ModuleReleasedFunc module_released) { + SmallVector outputs(threads); + assert(threads); + assert(unopt_out || opt_out || obj_out || asm_out); // Timers for timing purposes TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str()); SmallVector timers(threads); @@ -1281,28 +1273,13 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n"; } } - for (unsigned i = 0; i < threads; ++i) { - auto start = names_start + i * outcount; - auto istr = std::to_string(i); - if (unopt_out) - *start++ = (name + "_unopt#" + istr + ".bc").str(); - if (opt_out) - *start++ = (name + "_opt#" + istr + ".bc").str(); - if (obj_out) - *start++ = (name + "#" + istr + ".o").str(); - if (asm_out) - *start++ = (name + "#" + istr + ".s").str(); - } // Single-threaded case if (threads == 1) { output_timer.startTimer(); - add_output_impl(M, TM, outputs_start, names_start, - unopt_out ? unopt.data() + unopt.size() - 1 : nullptr, - opt_out ? opt.data() + opt.size() - 1 : nullptr, - obj_out ? obj.data() + obj.size() - 1 : nullptr, - asm_out ? asm_.data() + asm_.size() - 1 : nullptr, - timers[0], 0); + outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out); output_timer.stopTimer(); + // Don't need M anymore + module_released(M); if (!report_timings) { timer_group.clear(); @@ -1312,7 +1289,7 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o t.print(dbgs(), true); } } - return; + return outputs; } partition_timer.startTimer(); @@ -1331,17 +1308,15 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o auto serialized = serializeModule(M); serialize_timer.stopTimer(); - output_timer.startTimer(); + // Don't need M anymore, since we'll only read from serialized from now on + module_released(M); - auto unoptstart = unopt_out ? unopt.data() + unopt.size() - threads : nullptr; - auto optstart = opt_out ? opt.data() + opt.size() - threads : nullptr; - auto objstart = obj_out ? obj.data() + obj.size() - threads : nullptr; - auto asmstart = asm_out ? asm_.data() + asm_.size() - threads : nullptr; + output_timer.startTimer(); // Start all of the worker threads std::vector workers(threads); for (unsigned i = 0; i < threads; i++) { - workers[i] = std::thread([&, i](){ + workers[i] = std::thread([&, i]() { LLVMContext ctx; // Lazily deserialize the entire module timers[i].deserialize.startTimer(); @@ -1366,12 +1341,7 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o dropUnusedGlobals(*M); timers[i].deletion.stopTimer(); - add_output_impl(*M, TM, outputs_start + i * outcount, names_start + i * outcount, - unoptstart ? unoptstart + i : nullptr, - optstart ? optstart + i : nullptr, - objstart ? objstart + i : nullptr, - asmstart ? asmstart + i : nullptr, - timers[i], i); + outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out); }); } @@ -1399,6 +1369,7 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o } dbgs() << "]\n"; } + return outputs; } static unsigned compute_image_thread_count(const ModuleInfo &info) { @@ -1462,7 +1433,7 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_dump_native_impl(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len, ios_t *s) + ios_t *z, ios_t *s) { JL_TIMING(NATIVE_AOT, NATIVE_Dump); jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; @@ -1471,14 +1442,11 @@ void jl_dump_native_impl(void *native_code, delete data; return; } - auto TSCtx = data->M.getContext(); - auto lock = TSCtx.getLock(); - LLVMContext &Context = *TSCtx.getContext(); // We don't want to use MCJIT's target machine because // it uses the large code model and we may potentially // want less optimizations there. // make sure to emit the native object format, even if FORCE_ELF was set in codegen - Triple TheTriple(data->M.getModuleUnlocked()->getTargetTriple()); + Triple TheTriple(data->M.withModuleDo([](Module &M) { return M.getTargetTriple(); })); if (TheTriple.isOSWindows()) { TheTriple.setObjectFormat(Triple::COFF); } else if (TheTriple.isOSDarwin()) { @@ -1504,21 +1472,48 @@ void jl_dump_native_impl(void *native_code, CMModel, CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag? )); + auto DL = jl_create_datalayout(*SourceTM); + std::string StackProtectorGuard; + unsigned OverrideStackAlignment; + data->M.withModuleDo([&](Module &M) { + StackProtectorGuard = M.getStackProtectorGuard().str(); + OverrideStackAlignment = M.getOverrideStackAlignment(); + }); + auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) { + return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released); + }; - std::vector bc_Archive; - std::vector obj_Archive; - std::vector asm_Archive; - std::vector unopt_bc_Archive; - std::vector outputs; - - // Reset the target triple to make sure it matches the new target machine - auto dataM = data->M.getModuleUnlocked(); - dataM->setTargetTriple(SourceTM->getTargetTriple().str()); - dataM->setDataLayout(jl_create_datalayout(*SourceTM)); - - Type *T_size = dataM->getDataLayout().getIntPtrType(Context); - Type *T_psize = T_size->getPointerTo(); + SmallVector sysimg_outputs; + SmallVector data_outputs; + SmallVector metadata_outputs; + if (z) { + LLVMContext Context; + Module sysimgM("sysimg", Context); + sysimgM.setTargetTriple(TheTriple.str()); + sysimgM.setDataLayout(DL); + sysimgM.setStackProtectorGuard(StackProtectorGuard); + sysimgM.setOverrideStackAlignment(OverrideStackAlignment); + Constant *data = ConstantDataArray::get(Context, + ArrayRef((const unsigned char*)z->buf, z->size)); + auto sysdata = new GlobalVariable(sysimgM, data->getType(), false, + GlobalVariable::ExternalLinkage, + data, "jl_system_image_data"); + sysdata->setAlignment(Align(64)); + addComdat(sysdata, TheTriple); + Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size); + addComdat(new GlobalVariable(sysimgM, len->getType(), true, + GlobalVariable::ExternalLinkage, + len, "jl_system_image_size"), TheTriple); + // Free z here, since we've copied out everything into data + // Results in serious memory savings + ios_close(z); + free(z); + // Note that we don't set z to null, this allows the check in WRITE_ARCHIVE + // to function as expected + // no need to free the module/context, destructor handles that + sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {}); + } bool imaging_mode = imaging_default() || jl_options.outputo; @@ -1526,191 +1521,201 @@ void jl_dump_native_impl(void *native_code, unsigned nfvars = 0; unsigned ngvars = 0; - ModuleInfo module_info = compute_module_info(*dataM); - LLVM_DEBUG(dbgs() - << "Dumping module with stats:\n" - << " globals: " << module_info.globals << "\n" - << " functions: " << module_info.funcs << "\n" - << " basic blocks: " << module_info.bbs << "\n" - << " instructions: " << module_info.insts << "\n" - << " clones: " << module_info.clones << "\n" - << " weight: " << module_info.weight << "\n" - ); - - // add metadata information - if (imaging_mode) { - multiversioning_preannotate(*dataM); - { - DenseSet fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end()); - for (auto &F : *dataM) { - if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) { - if (fvars.insert(&F).second) { - data->jl_sysimg_fvars.push_back(&F); + // Reset the target triple to make sure it matches the new target machine + + bool has_veccall = false; + + data->M.withModuleDo([&](Module &dataM) { + dataM.setTargetTriple(TheTriple.str()); + dataM.setDataLayout(DL); + auto &Context = dataM.getContext(); + + Type *T_psize = dataM.getDataLayout().getIntPtrType(Context)->getPointerTo(); + + // add metadata information + if (imaging_mode) { + multiversioning_preannotate(dataM); + { + DenseSet fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end()); + for (auto &F : dataM) { + if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) { + if (fvars.insert(&F).second) { + data->jl_sysimg_fvars.push_back(&F); + } } } } - } - threads = compute_image_thread_count(module_info); - LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n"); - nfvars = data->jl_sysimg_fvars.size(); - ngvars = data->jl_sysimg_gvars.size(); - emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize); - emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize); - std::vector idxs; - idxs.resize(data->jl_sysimg_gvars.size()); - std::iota(idxs.begin(), idxs.end(), 0); - auto gidxs = ConstantDataArray::get(Context, idxs); - auto gidxs_var = new GlobalVariable(*dataM, gidxs->getType(), true, - GlobalVariable::ExternalLinkage, - gidxs, "jl_gvar_idxs"); - gidxs_var->setVisibility(GlobalValue::HiddenVisibility); - gidxs_var->setDSOLocal(true); - idxs.clear(); - idxs.resize(data->jl_sysimg_fvars.size()); - std::iota(idxs.begin(), idxs.end(), 0); - auto fidxs = ConstantDataArray::get(Context, idxs); - auto fidxs_var = new GlobalVariable(*dataM, fidxs->getType(), true, - GlobalVariable::ExternalLinkage, - fidxs, "jl_fvar_idxs"); - fidxs_var->setVisibility(GlobalValue::HiddenVisibility); - fidxs_var->setDSOLocal(true); - dataM->addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0")); - - // reflect the address of the jl_RTLD_DEFAULT_handle variable - // back to the caller, so that we can check for consistency issues - GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(dataM); - addComdat(new GlobalVariable(*dataM, - jlRTLD_DEFAULT_var->getType(), - true, - GlobalVariable::ExternalLinkage, - jlRTLD_DEFAULT_var, - "jl_RTLD_DEFAULT_handle_pointer"), TheTriple); - // let the compiler know we are going to internalize a copy of this, - // if it has a current usage with ExternalLinkage - auto small_typeof_copy = dataM->getGlobalVariable("small_typeof"); - if (small_typeof_copy) { - small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); - small_typeof_copy->setDSOLocal(true); + ModuleInfo module_info = compute_module_info(dataM); + LLVM_DEBUG(dbgs() + << "Dumping module with stats:\n" + << " globals: " << module_info.globals << "\n" + << " functions: " << module_info.funcs << "\n" + << " basic blocks: " << module_info.bbs << "\n" + << " instructions: " << module_info.insts << "\n" + << " clones: " << module_info.clones << "\n" + << " weight: " << module_info.weight << "\n" + ); + threads = compute_image_thread_count(module_info); + LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n"); + nfvars = data->jl_sysimg_fvars.size(); + ngvars = data->jl_sysimg_gvars.size(); + emit_offset_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize); + emit_offset_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize); + std::vector idxs; + idxs.resize(data->jl_sysimg_gvars.size()); + std::iota(idxs.begin(), idxs.end(), 0); + auto gidxs = ConstantDataArray::get(Context, idxs); + auto gidxs_var = new GlobalVariable(dataM, gidxs->getType(), true, + GlobalVariable::ExternalLinkage, + gidxs, "jl_gvar_idxs"); + gidxs_var->setVisibility(GlobalValue::HiddenVisibility); + gidxs_var->setDSOLocal(true); + idxs.clear(); + idxs.resize(data->jl_sysimg_fvars.size()); + std::iota(idxs.begin(), idxs.end(), 0); + auto fidxs = ConstantDataArray::get(Context, idxs); + auto fidxs_var = new GlobalVariable(dataM, fidxs->getType(), true, + GlobalVariable::ExternalLinkage, + fidxs, "jl_fvar_idxs"); + fidxs_var->setVisibility(GlobalValue::HiddenVisibility); + fidxs_var->setDSOLocal(true); + dataM.addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0")); + + // reflect the address of the jl_RTLD_DEFAULT_handle variable + // back to the caller, so that we can check for consistency issues + GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&dataM); + addComdat(new GlobalVariable(dataM, + jlRTLD_DEFAULT_var->getType(), + true, + GlobalVariable::ExternalLinkage, + jlRTLD_DEFAULT_var, + "jl_RTLD_DEFAULT_handle_pointer"), TheTriple); + + // let the compiler know we are going to internalize a copy of this, + // if it has a current usage with ExternalLinkage + auto small_typeof_copy = dataM.getGlobalVariable("small_typeof"); + if (small_typeof_copy) { + small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); + small_typeof_copy->setDSOLocal(true); + } } - } - - // Reserve space for the output files and names - // DO NOT DELETE, this is necessary to ensure memorybuffers - // have a stable backing store for both their object files and - // their names - outputs.reserve((threads + 1) * (!!unopt_bc_fname + !!bc_fname + !!obj_fname + !!asm_fname) * 2); - - auto compile = [&](Module &M, StringRef name, unsigned threads) { add_output( - M, *SourceTM, outputs, name, - unopt_bc_Archive, bc_Archive, obj_Archive, asm_Archive, - !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, - threads, module_info - ); }; - compile(*dataM, "text", threads); - - auto sysimageM = std::make_unique("sysimage", Context); - sysimageM->setTargetTriple(dataM->getTargetTriple()); - sysimageM->setDataLayout(dataM->getDataLayout()); -#if JL_LLVM_VERSION >= 130000 - sysimageM->setStackProtectorGuard(dataM->getStackProtectorGuard()); - sysimageM->setOverrideStackAlignment(dataM->getOverrideStackAlignment()); -#endif + has_veccall = !!dataM.getModuleFlag("julia.mv.veccall"); + }); - if (TheTriple.isOSWindows()) { - // Windows expect that the function `_DllMainStartup` is present in an dll. - // Normal compilers use something like Zig's crtdll.c instead we provide a - // a stub implementation. - auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo(); - auto T_int32 = Type::getInt32Ty(Context); - auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false); - auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", *sysimageM); - F->setCallingConv(CallingConv::X86_StdCall); - - llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F)); - builder.CreateRet(ConstantInt::get(T_int32, 1)); - } - bool has_veccall = dataM->getModuleFlag("julia.mv.veccall"); - data->M = orc::ThreadSafeModule(); // free memory for data->M - - if (sysimg_data) { - Constant *data = ConstantDataArray::get(Context, - ArrayRef((const unsigned char*)sysimg_data, sysimg_len)); - auto sysdata = new GlobalVariable(*sysimageM, data->getType(), false, - GlobalVariable::ExternalLinkage, - data, "jl_system_image_data"); - sysdata->setAlignment(Align(64)); - addComdat(sysdata, TheTriple); - Constant *len = ConstantInt::get(T_size, sysimg_len); - addComdat(new GlobalVariable(*sysimageM, len->getType(), true, - GlobalVariable::ExternalLinkage, - len, "jl_system_image_size"), TheTriple); - } - if (imaging_mode) { - auto specs = jl_get_llvm_clone_targets(); - const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0; - std::vector data; - auto push_i32 = [&] (uint32_t v) { - uint8_t buff[4]; - memcpy(buff, &v, 4); - data.insert(data.end(), buff, buff + 4); - }; - push_i32(specs.size()); - for (uint32_t i = 0; i < specs.size(); i++) { - push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME)); - auto &specdata = specs[i].data; - data.insert(data.end(), specdata.begin(), specdata.end()); + { + // Don't use withModuleDo here since we delete the TSM midway through + auto TSCtx = data->M.getContext(); + auto lock = TSCtx.getLock(); + auto dataM = data->M.getModuleUnlocked(); + + // Delete data when add_output thinks it's done with it + // Saves memory for use when multithreading + data_outputs = compile(*dataM, "text", threads, [data](Module &) { delete data; }); + } + + { + LLVMContext Context; + Module metadataM("metadata", Context); + metadataM.setTargetTriple(TheTriple.str()); + metadataM.setDataLayout(DL); + metadataM.setStackProtectorGuard(StackProtectorGuard); + metadataM.setOverrideStackAlignment(OverrideStackAlignment); + + Type *T_size = DL.getIntPtrType(Context); + Type *T_psize = T_size->getPointerTo(); + + if (TheTriple.isOSWindows()) { + // Windows expect that the function `_DllMainStartup` is present in an dll. + // Normal compilers use something like Zig's crtdll.c instead we provide a + // a stub implementation. + auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo(); + auto T_int32 = Type::getInt32Ty(Context); + auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false); + auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", metadataM); + F->setCallingConv(CallingConv::X86_StdCall); + + llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F)); + builder.CreateRet(ConstantInt::get(T_int32, 1)); } - auto value = ConstantDataArray::get(Context, data); - auto target_ids = new GlobalVariable(*sysimageM, value->getType(), true, - GlobalVariable::InternalLinkage, - value, "jl_dispatch_target_ids"); - auto shards = emit_shard_table(*sysimageM, T_size, T_psize, threads); - auto ptls = emit_ptls_table(*sysimageM, T_size, T_psize); - auto header = emit_image_header(*sysimageM, threads, nfvars, ngvars); - auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*)); - auto small_typeof_copy = new GlobalVariable(*sysimageM, AT, false, - GlobalVariable::ExternalLinkage, - Constant::getNullValue(AT), - "small_typeof"); - small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); - small_typeof_copy->setDSOLocal(true); - AT = ArrayType::get(T_psize, 5); - auto pointers = new GlobalVariable(*sysimageM, AT, false, - GlobalVariable::ExternalLinkage, - ConstantArray::get(AT, { - ConstantExpr::getBitCast(header, T_psize), - ConstantExpr::getBitCast(shards, T_psize), - ConstantExpr::getBitCast(ptls, T_psize), - ConstantExpr::getBitCast(small_typeof_copy, T_psize), - ConstantExpr::getBitCast(target_ids, T_psize) - }), - "jl_image_pointers"); - addComdat(pointers, TheTriple); - if (s) { - write_int32(s, data.size()); - ios_write(s, (const char *)data.data(), data.size()); + if (imaging_mode) { + auto specs = jl_get_llvm_clone_targets(); + const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0; + std::vector data; + auto push_i32 = [&] (uint32_t v) { + uint8_t buff[4]; + memcpy(buff, &v, 4); + data.insert(data.end(), buff, buff + 4); + }; + push_i32(specs.size()); + for (uint32_t i = 0; i < specs.size(); i++) { + push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME)); + auto &specdata = specs[i].data; + data.insert(data.end(), specdata.begin(), specdata.end()); + } + auto value = ConstantDataArray::get(Context, data); + auto target_ids = new GlobalVariable(metadataM, value->getType(), true, + GlobalVariable::InternalLinkage, + value, "jl_dispatch_target_ids"); + auto shards = emit_shard_table(metadataM, T_size, T_psize, threads); + auto ptls = emit_ptls_table(metadataM, T_size, T_psize); + auto header = emit_image_header(metadataM, threads, nfvars, ngvars); + auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*)); + auto small_typeof_copy = new GlobalVariable(metadataM, AT, false, + GlobalVariable::ExternalLinkage, + Constant::getNullValue(AT), + "small_typeof"); + small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); + small_typeof_copy->setDSOLocal(true); + AT = ArrayType::get(T_psize, 5); + auto pointers = new GlobalVariable(metadataM, AT, false, + GlobalVariable::ExternalLinkage, + ConstantArray::get(AT, { + ConstantExpr::getBitCast(header, T_psize), + ConstantExpr::getBitCast(shards, T_psize), + ConstantExpr::getBitCast(ptls, T_psize), + ConstantExpr::getBitCast(small_typeof_copy, T_psize), + ConstantExpr::getBitCast(target_ids, T_psize) + }), + "jl_image_pointers"); + addComdat(pointers, TheTriple); + if (s) { + write_int32(s, data.size()); + ios_write(s, (const char *)data.data(), data.size()); + } } - } - compile(*sysimageM, "data", 1); + // no need to free module/context, destructor handles that + metadata_outputs = compile(metadataM, "data", 1, [](Module &) {}); + } object::Archive::Kind Kind = getDefaultForHost(TheTriple); - if (unopt_bc_fname) - handleAllErrors(writeArchive(unopt_bc_fname, unopt_bc_Archive, true, - Kind, true, false), reportWriterError); - if (bc_fname) - handleAllErrors(writeArchive(bc_fname, bc_Archive, true, - Kind, true, false), reportWriterError); - if (obj_fname) - handleAllErrors(writeArchive(obj_fname, obj_Archive, true, - Kind, true, false), reportWriterError); - if (asm_fname) - handleAllErrors(writeArchive(asm_fname, asm_Archive, true, - Kind, true, false), reportWriterError); - - delete data; +#define WRITE_ARCHIVE(fname, field, prefix, suffix) \ + if (fname) {\ + std::vector archive; \ + SmallVector filenames; \ + SmallVector buffers; \ + for (size_t i = 0; i < threads; i++) { \ + filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \ + buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \ + } \ + filenames.push_back("metadata" prefix suffix); \ + buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \ + if (z) { \ + filenames.push_back("sysimg" prefix suffix); \ + buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \ + } \ + for (size_t i = 0; i < filenames.size(); i++) { \ + archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \ + } \ + handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \ + } + + WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc"); + WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc"); + WRITE_ARCHIVE(obj_fname, obj, "", ".o"); + WRITE_ARCHIVE(asm_fname, asm_, "", ".s"); } void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) @@ -2082,6 +2087,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second)); output.world = world; output.params = ¶ms; + output.imaging = imaging_default(); + // This would be nice, but currently it causes some assembly regressions that make printed output + // differ very significantly from the actual non-imaging mode code. + // // Force imaging mode for names of pointers + // output.imaging = true; + // This would also be nice, but it seems to cause OOMs on the windows32 builder + // // Force at least medium debug info for introspection + // No debug info = no variable names, + // max debug info = llvm.dbg.declare/value intrinsics which clutter IR output + output.debug_level = jl_options.debug_level; auto decls = jl_emit_code(m, mi, src, jlrettype, output); JL_UNLOCK(&jl_codegen_lock); // Might GC diff --git a/src/ast.c b/src/ast.c index 97bbc6e8227ba..06727b453d6a3 100644 --- a/src/ast.c +++ b/src/ast.c @@ -83,6 +83,7 @@ JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym; JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym; JL_DLLEXPORT jl_sym_t *jl_purity_sym; JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym; +JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym; JL_DLLEXPORT jl_sym_t *jl_macrocall_sym; JL_DLLEXPORT jl_sym_t *jl_colon_sym; JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym; @@ -342,6 +343,7 @@ void jl_init_common_symbols(void) jl_isdefined_sym = jl_symbol("isdefined"); jl_nospecialize_sym = jl_symbol("nospecialize"); jl_specialize_sym = jl_symbol("specialize"); + jl_nospecializeinfer_sym = jl_symbol("nospecializeinfer"); jl_optlevel_sym = jl_symbol("optlevel"); jl_compile_sym = jl_symbol("compile"); jl_force_compile_sym = jl_symbol("force_compile"); @@ -434,6 +436,8 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo } JL_CATCH { // if expression cannot be converted, replace with error expr + //jl_(jl_current_exception()); + //jlbacktrace(); jl_expr_t *ex = jl_exprn(jl_error_sym, 1); v = (jl_value_t*)ex; jl_array_ptr_set(ex->args, 0, jl_cstr_to_string("invalid AST")); @@ -745,7 +749,7 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali // GC Note: jl_fieldref(v, 0) allocates for GotoNode // but we don't need a GC root here because julia_to_list2_noalloc // shouldn't allocate in this case. - if (jl_typetagis(v, jl_linenumbernode_type)) { + if (jl_is_linenode(v)) { jl_value_t *file = jl_fieldref_noalloc(v,1); jl_value_t *line = jl_fieldref(v,0); value_t args = julia_to_list2_noalloc(fl_ctx, line, file, check_valid); @@ -784,7 +788,7 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len, size_t offset, jl_value_t *options) { JL_TIMING(PARSING, PARSING); - jl_timing_show_filename(jl_string_data(filename), JL_TIMING_CURRENT_BLOCK); + jl_timing_show_filename(jl_string_data(filename), JL_TIMING_DEFAULT_BLOCK); if (offset > text_len) { jl_value_t *textstr = jl_pchar_to_string(text, text_len); JL_GC_PUSH1(&textstr); @@ -834,7 +838,7 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len, } // returns either an expression or a thunk -jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule) +static jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule) { jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule); fl_context_t *fl_ctx = &ctx->fl; @@ -847,8 +851,8 @@ jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module return result; } -static jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr, - jl_module_t *inmodule, const char *file, int line) +jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr, + jl_module_t *inmodule, const char *file, int line) { jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule); fl_context_t *fl_ctx = &ctx->fl; @@ -998,7 +1002,59 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT return 0; } -static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error) +// Utility function to return whether `e` is any of the special AST types or +// will always evaluate to itself exactly unchanged. This corresponds to +// `is_self_quoting` in Core.Compiler utilities. +int jl_is_ast_node(jl_value_t *e) JL_NOTSAFEPOINT +{ + return jl_is_newvarnode(e) + || jl_is_code_info(e) + || jl_is_linenode(e) + || jl_is_gotonode(e) + || jl_is_gotoifnot(e) + || jl_is_returnnode(e) + || jl_is_ssavalue(e) + || jl_is_slotnumber(e) + || jl_is_argument(e) + || jl_is_quotenode(e) + || jl_is_globalref(e) + || jl_is_symbol(e) + || jl_is_pinode(e) + || jl_is_phinode(e) + || jl_is_phicnode(e) + || jl_is_upsilonnode(e) + || jl_is_expr(e); +} + +static int is_self_quoting_expr(jl_expr_t *e) JL_NOTSAFEPOINT +{ + return (e->head == jl_inert_sym || + e->head == jl_core_sym || + e->head == jl_line_sym || + e->head == jl_lineinfo_sym || + e->head == jl_meta_sym || + e->head == jl_boundscheck_sym || + e->head == jl_inline_sym || + e->head == jl_noinline_sym); +} + +// any AST, except those that cannot contain symbols +// and have no side effects +int need_esc_node(jl_value_t *e) JL_NOTSAFEPOINT +{ + if (jl_is_linenode(e) + || jl_is_ssavalue(e) + || jl_is_slotnumber(e) + || jl_is_argument(e) + || jl_is_quotenode(e)) + return 0; + if (jl_is_expr(e)) + return !is_self_quoting_expr((jl_expr_t*)e); + // note: jl_is_globalref(e) is not included here, since we care a little about about having a line number for it + return jl_is_ast_node(e); +} + +static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, jl_value_t **lineinfo, size_t world, int throw_load_error) { jl_task_t *ct = jl_current_task; JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION); @@ -1010,10 +1066,9 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule margs[0] = jl_array_ptr_ref(args, 0); // __source__ argument jl_value_t *lno = jl_array_ptr_ref(args, 1); + if (!jl_is_linenode(lno)) + lno = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing); margs[1] = lno; - if (!jl_typetagis(lno, jl_linenumbernode_type)) { - margs[1] = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing); - } margs[2] = (jl_value_t*)inmodule; for (i = 3; i < nargs; i++) margs[i] = jl_array_ptr_ref(args, i - 1); @@ -1031,6 +1086,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule jl_method_error(margs[0], &margs[1], nargs, ct->world_age); // unreachable } + jl_timing_show_macro(mfunc, margs[1], inmodule, JL_TIMING_DEFAULT_BLOCK); *ctx = mfunc->def.method->module; result = jl_invoke(margs[0], &margs[1], nargs - 1, mfunc); } @@ -1051,6 +1107,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule } } ct->world_age = last_age; + *lineinfo = margs[1]; JL_GC_POP(); return result; } @@ -1073,14 +1130,18 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str JL_GC_POP(); return expr; } - if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) == 2) { + if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) >= 2) { struct macroctx_stack newctx; newctx.m = (jl_module_t*)jl_exprarg(e, 1); JL_TYPECHK(hygienic-scope, module, (jl_value_t*)newctx.m); newctx.parent = macroctx; jl_value_t *a = jl_exprarg(e, 0); jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel, world, throw_load_error); - if (a != a2) + if (jl_is_expr(a2) && ((jl_expr_t*)a2)->head == jl_escape_sym && !need_esc_node(jl_exprarg(a2, 0))) + expr = jl_exprarg(a2, 0); + else if (!need_esc_node(a2)) + expr = a2; + else if (a != a2) jl_array_ptr_set(e->args, 0, a2); return expr; } @@ -1088,21 +1149,28 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str struct macroctx_stack newctx; newctx.m = macroctx ? macroctx->m : inmodule; newctx.parent = macroctx; - jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, world, throw_load_error); + jl_value_t *lineinfo = NULL; + jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, &lineinfo, world, throw_load_error); + if (!need_esc_node(result)) + return result; jl_value_t *wrap = NULL; - JL_GC_PUSH3(&result, &wrap, &newctx.m); + JL_GC_PUSH4(&result, &wrap, &newctx.m, &lineinfo); // copy and wrap the result in `(hygienic-scope ,result ,newctx) if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym) result = jl_exprarg(result, 0); else - wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 2); + wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 3); result = jl_copy_ast(result); if (!onelevel) result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world, throw_load_error); - if (wrap) { + if (wrap && need_esc_node(result)) { jl_exprargset(wrap, 0, result); jl_exprargset(wrap, 1, newctx.m); - result = wrap; + jl_exprargset(wrap, 2, lineinfo); + if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym) + result = jl_exprarg(result, 0); + else + result = wrap; } JL_GC_POP(); return result; @@ -1178,6 +1246,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmod const char *file, int line, size_t world) { JL_TIMING(LOWERING, LOWERING); + jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK); JL_GC_PUSH1(&expr); expr = jl_copy_ast(expr); expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1); @@ -1191,6 +1260,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t * const char *file, int line) { JL_TIMING(LOWERING, LOWERING); + jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK); jl_array_t *kwargs = NULL; JL_GC_PUSH2(&expr, &kwargs); expr = jl_copy_ast(expr); @@ -1278,8 +1348,8 @@ jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename, jl_svecset(args[1], 0, jl_box_uint8pointer((uint8_t*)text)); jl_svecset(args[1], 1, jl_box_long(text_len)); args[2] = filename; - args[3] = jl_box_ulong(lineno); - args[4] = jl_box_ulong(offset); + args[3] = jl_box_long(lineno); + args[4] = jl_box_long(offset); args[5] = options; jl_task_t *ct = jl_current_task; size_t last_age = ct->world_age; diff --git a/src/ast.scm b/src/ast.scm index 88220c03a7aa6..87db8449b3992 100644 --- a/src/ast.scm +++ b/src/ast.scm @@ -479,12 +479,13 @@ (define (eq-sym? a b) (or (eq? a b) (and (ssavalue? a) (ssavalue? b) (eqv? (cdr a) (cdr b))))) -(define (blockify e) +(define (blockify e (lno #f)) + (set! lno (if lno (list lno) '())) (if (and (pair? e) (eq? (car e) 'block)) (if (null? (cdr e)) - `(block (null)) - e) - `(block ,e))) + `(block ,@lno (null)) + (if (null? lno) e `(block ,@lno ,@(cdr e)))) + `(block ,@lno ,e))) (define (make-var-info name) (list name '(core Any) 0)) (define vinfo:name car) diff --git a/src/builtins.c b/src/builtins.c index a6c904c851c95..b664b8d73710f 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1363,11 +1363,11 @@ JL_CALLABLE(jl_f_apply_type) jl_vararg_t *vm = (jl_vararg_t*)args[0]; if (!vm->T) { JL_NARGS(apply_type, 2, 3); - return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL); + return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1); } else if (!vm->N) { JL_NARGS(apply_type, 2, 2); - return (jl_value_t*)jl_wrap_vararg(vm->T, args[1]); + return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1); } } else if (jl_is_unionall(args[0])) { @@ -2060,7 +2060,7 @@ void jl_init_primitives(void) JL_GC_DISABLED add_builtin("Tuple", (jl_value_t*)jl_anytuple_type); add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type); add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type); - add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL)); + add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0)); add_builtin("Module", (jl_value_t*)jl_module_type); add_builtin("MethodTable", (jl_value_t*)jl_methtable_type); diff --git a/src/ccall.cpp b/src/ccall.cpp index 90f7417c03524..47496a3a91ba6 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -135,6 +135,7 @@ static Value *runtime_sym_lookup( BasicBlock *ccall_bb = BasicBlock::Create(irbuilder.getContext(), "ccall"); Constant *initnul = ConstantPointerNull::get(T_pvoidfunc); LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, Align(sizeof(void*))); + setName(emission_context, llvmf_orig, f_name + StringRef(".cached")); // This in principle needs a consume ordering so that load from // this pointer sees a valid value. However, this is not supported by // LLVM (or agreed on in the C/C++ standard FWIW) and should be @@ -143,8 +144,10 @@ static Value *runtime_sym_lookup( // invalid load from the `cglobal` but doesn't depend on the `cglobal` // value for this to happen. llvmf_orig->setAtomic(AtomicOrdering::Unordered); + auto nonnull = irbuilder.CreateICmpNE(llvmf_orig, initnul); + setName(emission_context, nonnull, "is_cached"); irbuilder.CreateCondBr( - irbuilder.CreateICmpNE(llvmf_orig, initnul), + nonnull, ccall_bb, dlsym_lookup); @@ -170,6 +173,7 @@ static Value *runtime_sym_lookup( llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func), { libname, nameval, libptrgv }); } + setName(emission_context, llvmf, f_name + StringRef(".found")); StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, Align(sizeof(void*))); store->setAtomic(AtomicOrdering::Release); irbuilder.CreateBr(ccall_bb); @@ -179,6 +183,7 @@ static Value *runtime_sym_lookup( PHINode *p = irbuilder.CreatePHI(T_pvoidfunc, 2); p->addIncoming(llvmf_orig, enter_bb); p->addIncoming(llvmf, llvmf->getParent()); + setName(emission_context, p, f_name); return irbuilder.CreateBitCast(p, funcptype); } @@ -320,6 +325,7 @@ static Value *emit_plt( } GlobalVariable *got = prepare_global_in(jl_Module, sharedgot); LoadInst *got_val = ctx.builder.CreateAlignedLoad(got->getValueType(), got, Align(sizeof(void*))); + setName(ctx.emission_context, got_val, f_name); // See comment in `runtime_sym_lookup` above. This in principle needs a // consume ordering too. This is even less likely to cause issues though // since the only thing we do to this loaded pointer is to call it @@ -442,16 +448,20 @@ static Value *llvm_type_rewrite( unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type)); if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) { to = emit_static_alloca(ctx, target_type); + setName(ctx.emission_context, to, "type_rewrite_buffer"); cast(to)->setAlignment(Align(align)); from = emit_bitcast(ctx, to, from_type->getPointerTo()); } else { from = emit_static_alloca(ctx, from_type); + setName(ctx.emission_context, from, "type_rewrite_buffer"); cast(from)->setAlignment(Align(align)); to = emit_bitcast(ctx, from, target_type->getPointerTo()); } ctx.builder.CreateAlignedStore(v, from, Align(align)); - return ctx.builder.CreateAlignedLoad(target_type, to, Align(align)); + auto pun = ctx.builder.CreateAlignedLoad(target_type, to, Align(align)); + setName(ctx.emission_context, pun, "type_rewrite"); + return pun; } // --- argument passing and scratch space utilities --- @@ -508,6 +518,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val Value *istype = ctx.builder.CreateICmpNE( ctx.builder.CreateCall(prepare_call(jlisa_func), { vx, boxed(ctx, jlto_runtime) }), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)); + setName(ctx.emission_context, istype, "istype"); BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass", ctx.f); ctx.builder.CreateCondBr(istype, passBB, failBB); @@ -545,6 +556,7 @@ static Value *julia_to_native( // pass the address of an alloca'd thing, not a box // since those are immutable. Value *slot = emit_static_alloca(ctx, to); + setName(ctx.emission_context, slot, "native_convert_buffer"); if (!jvinfo.ispointer()) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot)); @@ -1009,6 +1021,7 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_ unsigned nb = DL.getTypeStoreSize(result->getType()); MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut; Value *strct = emit_allocobj(ctx, nb, runtime_dt); + setName(ctx.emission_context, strct, "ccall_result_box"); init_bits_value(ctx, strct, result, tbaa); return strct; } @@ -1253,6 +1266,7 @@ static bool verify_ref_type(jl_codectx_t &ctx, jl_value_t* ref, jl_unionall_t *u Value *notany = ctx.builder.CreateICmpNE( boxed(ctx, runtime_sp), track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_any_type))); + setName(ctx.emission_context, notany, "any_type.not"); error_unless(ctx, notany, make_errmsg(fname, n, rt_err_msg_notany)); always_error = false; } @@ -1587,7 +1601,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext())); const int tid_offset = offsetof(jl_task_t, tid); Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t))); + setName(ctx.emission_context, ptid, "thread_id_ptr"); LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t))); + setName(ctx.emission_context, tid, "thread_id"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); ai.decorateInst(tid); return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt); @@ -1601,15 +1617,19 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext())); const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited); Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4)); + setName(ctx.emission_context, pfinh, "finalizers_inhibited_ptr"); LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t))); + setName(ctx.emission_context, finh, "finalizers_inhibited"); Value *newval; if (is_libjulia_func(jl_gc_disable_finalizers_internal)) { newval = ctx.builder.CreateAdd(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)); + setName(ctx.emission_context, newval, "finalizers_inhibited_inc"); } else { newval = ctx.builder.CreateSelect(ctx.builder.CreateICmpEQ(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0), ctx.builder.CreateSub(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1))); + setName(ctx.emission_context, newval, "finalizers_inhibited_dec"); } ctx.builder.CreateStore(newval, pfinh); return ghostValue(ctx, jl_nothing_type); @@ -1630,6 +1650,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue); const int nt_offset = offsetof(jl_tls_states_t, next_task); Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*))); + setName(ctx.emission_context, pnt, "next_task_ptr"); ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt); return ghostValue(ctx, jl_nothing_type); } @@ -1640,8 +1661,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) JL_GC_POP(); ctx.builder.CreateCall(prepare_call(gcroot_flush_func)); Value *pdefer_sig = emit_defer_signal(ctx); + setName(ctx.emission_context, pdefer_sig, "defer_signal_ptr"); Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig); + setName(ctx.emission_context, defer_sig, "defer_signal"); defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 1)); + setName(ctx.emission_context, defer_sig, "defer_signal_inc"); ctx.builder.CreateStore(defer_sig, pdefer_sig); emit_signal_fence(ctx); return ghostValue(ctx, jl_nothing_type); @@ -1653,7 +1677,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) JL_GC_POP(); ctx.builder.CreateCall(prepare_call(gcroot_flush_func)); Value *pdefer_sig = emit_defer_signal(ctx); + setName(ctx.emission_context, pdefer_sig, "defer_signal_ptr"); Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig); + setName(ctx.emission_context, defer_sig, "defer_signal"); emit_signal_fence(ctx); error_unless(ctx, ctx.builder.CreateICmpNE(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)), @@ -1661,19 +1687,23 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) defer_sig = ctx.builder.CreateSub( defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 1)); + setName(ctx.emission_context, defer_sig, "defer_signal_dec"); ctx.builder.CreateStore(defer_sig, pdefer_sig); BasicBlock *checkBB = BasicBlock::Create(ctx.builder.getContext(), "check", ctx.f); BasicBlock *contBB = BasicBlock::Create(ctx.builder.getContext(), "cont"); + auto not_deferred = ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)); + setName(ctx.emission_context, not_deferred, "deferred.not"); ctx.builder.CreateCondBr( - ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)), + not_deferred, checkBB, contBB); ctx.builder.SetInsertPoint(checkBB); - ctx.builder.CreateLoad( + auto signal_page_load = ctx.builder.CreateLoad( ctx.types().T_size, ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1), true); + setName(ctx.emission_context, signal_page_load, "signal_page_load"); ctx.builder.CreateBr(contBB); ctx.f->getBasicBlockList().push_back(contBB); ctx.builder.SetInsertPoint(contBB); @@ -1690,7 +1720,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) } else { auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_size->getPointerTo()); + setName(ctx.emission_context, ptr, "svec_len_ptr"); len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr); + setName(ctx.emission_context, len, "svec_len"); // Only mark with TBAA if we are sure about the type. // This could otherwise be in a dead branch if (svecv.typ == (jl_value_t*)jl_simplevector_type) { @@ -1713,11 +1745,15 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) const jl_cgval_t &idxv = argv[1]; Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_long_type); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, idx, "svec_idx"); auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue); + setName(ctx.emission_context, ptr, "svec_data_ptr"); Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, ptr), idx); + setName(ctx.emission_context, slot_addr, "svec_slot_addr"); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*))); + setName(ctx.emission_context, load, "svec_slot"); load->setAtomic(AtomicOrdering::Unordered); // Only mark with TBAA if we are sure about the type. // This could otherwise be in a dead branch @@ -1754,9 +1790,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) if (stride != 1) idx = ctx.builder.CreateMul(idx, ConstantInt::get(ctx.types().T_size, stride)); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ((jl_datatype_t*)ety)->layout->first_ptr)); + setName(ctx.emission_context, idx, "array_idx"); } Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx); + setName(ctx.emission_context, slot_addr, "array_slot_addr"); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*))); + setName(ctx.emission_context, load, "array_slot"); load->setAtomic(AtomicOrdering::Unordered); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_ptrarraybuf); ai.decorateInst(load); @@ -1776,6 +1815,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) // a null pointer. auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1); strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size); + setName(ctx.emission_context, strp, "string_ptr"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt); } @@ -1790,6 +1830,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) auto strp = ctx.builder.CreateConstInBoundsGEP1_32( ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*)); strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size); + setName(ctx.emission_context, strp, "symbol_name"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt); } @@ -1860,7 +1901,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) const int hash_offset = offsetof(jl_sym_t, hash); Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo()); Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr)); + setName(ctx.emission_context, ph2, "object_id_ptr"); LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr); + setName(ctx.emission_context, hashval, "object_id"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); ai.decorateInst(hashval); return mark_or_box_ccall_result(ctx, hashval, retboxed, rt, unionall, static_rt); @@ -1879,6 +1922,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) T_pint8_derived) }; Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), makeArrayRef(args)); + setName(ctx.emission_context, ret, "object_id"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt); } @@ -1967,6 +2011,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid"); if (jl_is_pointerfree(rt)) { result = emit_static_alloca(ctx, lrt); + setName(ctx.emission_context, result, "ccall_sret"); sretty = lrt; argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0)); } @@ -1975,6 +2020,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( // and has incorrect write barriers. // instead this code path should behave like `unsafe_load` result = emit_allocobj(ctx, (jl_datatype_t*)rt); + setName(ctx.emission_context, result, "ccall_sret_box"); sretty = ctx.types().T_jlvalue; sretboxed = true; gc_uses.push_back(result); @@ -2123,6 +2169,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( return mark_julia_slot(result, rt, NULL, ctx.tbaa().tbaa_stack); ++SRetCCalls; result = ctx.builder.CreateLoad(sretty, result); + setName(ctx.emission_context, result, "returned"); } } else { @@ -2137,6 +2184,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( assert(jl_is_datatype(rt)); if (static_rt) { Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt); + setName(ctx.emission_context, strct, "ccall_ret_box"); MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut; int boxalign = julia_alignment(rt); // copy the data from the return value to the new struct @@ -2147,6 +2195,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( // ARM and AArch64 can use a LLVM type larger than the julia type. // When this happens, cast through memory. auto slot = emit_static_alloca(ctx, resultTy); + setName(ctx.emission_context, slot, "type_pun_slot"); slot->setAlignment(Align(boxalign)); ctx.builder.CreateAlignedStore(result, slot, Align(boxalign)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 9e42a6b246e9b..8442ba99bb411 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -119,10 +119,12 @@ static Value *stringConstPtr( GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M); Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0); Value *Args[] = { zero, zero }; - return irbuilder.CreateInBoundsGEP(gv->getValueType(), + auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(), // Addrspacecast in case globals are in non-0 AS irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)), Args); + setName(emission_context, gep, "string_const_ptr"); + return gep; } @@ -333,7 +335,8 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr) StringRef localname; std::string gvname; if (!gv) { - raw_string_ostream(gvname) << cname << ctx.global_targets.size(); + uint64_t id = ctx.emission_context.imaging ? jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1) : ctx.global_targets.size(); + raw_string_ostream(gvname) << cname << id; localname = StringRef(gvname); } else { @@ -514,9 +517,11 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) return literal_static_pointer_val(p, ctx.types().T_pjlvalue); Value *pgv = literal_pointer_val_slot(ctx, p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(maybe_mark_load_dereferenceable( + auto load = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))), false, jl_typeof(p))); + setName(ctx.emission_context, load, pgv->getName()); + return load; } // Returns ctx.types().T_pjlvalue @@ -531,9 +536,11 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p) jl_globalref_t *gr = p->globalref; Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(maybe_mark_load_dereferenceable( + auto load = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))), false, sizeof(jl_binding_t), alignof(jl_binding_t))); + setName(ctx.emission_context, load, pgv->getName()); + return load; } // bitcast a value, but preserve its address space when dealing with pointer types @@ -572,7 +579,9 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b) jl_globalref_t *gr = b->globalref; Value *pgv = gr ? julia_pgv(ctx, "*", gr->name, gr->mod, b) : julia_pgv(ctx, "*jl_bnd#", b); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*)))); + auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*)))); + setName(ctx.emission_context, load, pgv->getName()); + return load; } else { return literal_static_pointer_val(b, ctx.types().T_pjlvalue); @@ -975,6 +984,10 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const src = emit_bitcast(ctx, src, dstty); } if (directel) { + if (isa(src) && !src->hasName()) + setName(ctx.emission_context, src, "memcpy_refined_src"); + if (isa(dst) && !dst->hasName()) + setName(ctx.emission_context, dst, "memcpy_refined_dst"); auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile)); dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile)); ++SkippedMemcpys; @@ -1034,6 +1047,7 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN ctx.types().T_prjlvalue, emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue), idx); + setName(ctx.emission_context, vptr, "arraysize_ptr"); LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, vptr, PointerType::get(type, 0))); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.decorateInst(load); @@ -1044,7 +1058,9 @@ static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt) { if (dt->smalltag) return ConstantInt::get(ctx.types().T_size, dt->smalltag << 4); - return ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, (jl_value_t*)dt), ctx.types().T_size); + auto tag = ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, (jl_value_t*)dt), ctx.types().T_size); + setName(ctx.emission_context, tag, jl_symbol_name(dt->name->name)); + return tag; } // Returns justtag ? ctx.types.T_size : ctx.types().T_prjlvalue @@ -1109,6 +1125,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull else ptr = ConstantExpr::getAddrSpaceCast(literal_static_pointer_val((jl_value_t*)jt, ctx.types().T_pjlvalue), expr_type); datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p); + setName(ctx.emission_context, datatype_or_p, "typetag_ptr"); }, p.typ, counter); @@ -1116,6 +1133,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); if (ctx.emission_context.imaging) { Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*)))); + setName(ctx.emission_context, datatype, "typetag"); return justtag ? datatype : track_pjlvalue(ctx, datatype); } return datatype_or_p; @@ -1123,6 +1141,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull Value *res; if (!allunboxed) { Value *isnull = ctx.builder.CreateIsNull(datatype_or_p); + setName(ctx.emission_context, isnull, "typetag_isnull"); BasicBlock *boxBB = BasicBlock::Create(ctx.builder.getContext(), "boxed", ctx.f); BasicBlock *unboxBB = BasicBlock::Create(ctx.builder.getContext(), "unboxed", ctx.f); BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge", ctx.f); @@ -1140,6 +1159,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull phi->addIncoming(boxTy, boxBB); phi->addIncoming(unboxTy, unboxBB); res = phi; + setName(ctx.emission_context, res, "typetag"); } else { res = emit_unboxty(); @@ -1154,15 +1174,19 @@ static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt) Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue); Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad( + auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad( ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*)))); + setName(ctx.emission_context, types, "datatype_types"); + return types; } static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt) { Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo()); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*)))); + auto nfields = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*)))); + setName(ctx.emission_context, nfields, "datatype_nfields"); + return nfields; } static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) @@ -1174,7 +1198,9 @@ static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*)))); Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t)); Ptr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t)))); + auto Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t)))); + setName(ctx.emission_context, Size, "datatype_size"); + return Size; } /* this is valid code, it's simply unused @@ -1249,6 +1275,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ) isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1))); isprimitive = ctx.builder.CreateLShr(isprimitive, 7); isprimitive = ctx.builder.CreateTrunc(isprimitive, getInt1Ty(ctx.builder.getContext())); + setName(ctx.emission_context, isprimitive, "datatype_isprimitive"); return isprimitive; } @@ -1260,7 +1287,9 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt) emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue), ConstantInt::get(ctx.types().T_size, n)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*)))); + auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*)))); + setName(ctx.emission_context, name, "datatype_name"); + return name; } // --- generating various error checks --- @@ -1369,6 +1398,7 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2); phi->addIncoming(defval, currBB); phi->addIncoming(res, passBB); + setName(ctx.emission_context, phi, "guard_res"); return phi; } @@ -1413,7 +1443,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just assert(v != NULL && !isa(v) && "expected a conditionally boxed value"); Value *nonnull = maybenull ? null_pointer_cmp(ctx, v) : ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1); Function *typeof = prepare_call(jl_typeof_func); - return emit_guarded_test(ctx, nonnull, Constant::getNullValue(justtag ? ctx.types().T_size : typeof->getReturnType()), [&] { + auto val = emit_guarded_test(ctx, nonnull, Constant::getNullValue(justtag ? ctx.types().T_size : typeof->getReturnType()), [&] { // e.g. emit_typeof(ctx, v) Value *typetag = ctx.builder.CreateCall(typeof, {v}); if (notag) @@ -1434,6 +1464,8 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just return ai.decorateInst(small); }); }); + setName(ctx.emission_context, val, "typeof"); + return val; } static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false); @@ -1492,11 +1524,14 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data if (tindex > 0) { // optimize more when we know that this is a split union-type where tindex = 0 is invalid Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f)); - return ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)); + auto isa = ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)); + setName(ctx.emission_context, isa, "exactly_isa"); + return isa; } else if (arg.Vboxed) { // test for (arg.TIndex == 0x80 && typeof(arg.V) == type) Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)); + setName(ctx.emission_context, isboxed, "isboxed"); BasicBlock *currBB = ctx.builder.GetInsertBlock(); BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f); BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f); @@ -1509,13 +1544,16 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2); istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB); istype->addIncoming(istype_boxed, isaBB); + setName(ctx.emission_context, istype, "exactly_isa"); return istype; } else { // handle the case where we know that `arg` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); } } - return ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt)); + auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt)); + setName(ctx.emission_context, isa, "exactly_isa"); + return isa; } static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, @@ -1585,6 +1623,7 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, ctx.builder.CreateOr( ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_unionall_type)), ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_typeofbottom_type)))); + setName(ctx.emission_context, val, "is_kind"); return std::make_pair(val, false); } // intersection with Type needs to be handled specially @@ -1685,6 +1724,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ) isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1))); isconcrete = ctx.builder.CreateLShr(isconcrete, 1); isconcrete = ctx.builder.CreateTrunc(isconcrete, getInt1Ty(ctx.builder.getContext())); + setName(ctx.emission_context, isconcrete, "isconcrete"); return isconcrete; } @@ -1718,6 +1758,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v if (bounds_check_enabled(ctx, boundscheck)) { ++EmittedBoundschecks; Value *ok = ctx.builder.CreateICmpULT(im1, len); + setName(ctx.emission_context, ok, "boundscheck"); BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass"); ctx.builder.CreateCondBr(ok, passBB, failBB); @@ -1736,6 +1777,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v else if (!ainfo.ispointer()) { // CreateAlloca is OK here since we are on an error branch Value *tempSpace = ctx.builder.CreateAlloca(a->getType()); + setName(ctx.emission_context, tempSpace, "errorbox"); ctx.builder.CreateStore(a, tempSpace); a = tempSpace; } @@ -1827,12 +1869,15 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type AllocaInst *intcast = NULL; if (Order == AtomicOrdering::NotAtomic) { - if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) + if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) { intcast = emit_static_alloca(ctx, elty); + setName(ctx.emission_context, intcast, "aggregate_load_box"); + } } else { if (!isboxed && !elty->isIntOrPtrTy()) { intcast = emit_static_alloca(ctx, elty); + setName(ctx.emission_context, intcast, "atomic_load_box"); elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } } @@ -1948,8 +1993,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype); AllocaInst *intcast = nullptr; if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) { - if (!issetfield) + if (!issetfield) { intcast = emit_static_alloca(ctx, elty); + setName(ctx.emission_context, intcast, "atomic_store_box"); + } elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } Type *realelty = elty; @@ -1987,6 +2034,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) { if (isswapfield) { auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment)); + setName(ctx.emission_context, load, "swapfield_load"); if (isboxed) load->setOrdering(AtomicOrdering::Unordered); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -2007,11 +2055,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, emit_unbox_store(ctx, rhs, ptr, tbaa, alignment); } } - else if (isswapfield && !isboxed) { + else if (isswapfield && isStrongerThanMonotonic(Order)) { assert(Order != AtomicOrdering::NotAtomic && r); - // we can't handle isboxed here as a workaround for really bad LLVM - // design issue: plain Xchg only works with integers auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order); + setName(ctx.emission_context, store, "swapfield_atomicrmw"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); ai.decorateInst(store); @@ -2036,6 +2083,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ctx.builder.CreateCondBr(SameType, BB, SkipBB); ctx.builder.SetInsertPoint(SkipBB); LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment)); + setName(ctx.emission_context, load, "atomic_replacefield_initial"); load->setOrdering(FailOrder == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Monotonic : FailOrder); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); @@ -2066,6 +2114,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, else { // swap or modify LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment)); Current->setOrdering(Order == AtomicOrdering::NotAtomic && !isboxed ? Order : AtomicOrdering::Monotonic); + setName(ctx.emission_context, Current, "atomic_initial"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); Compare = ai.decorateInst(Current); @@ -2268,8 +2317,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, // Returns ctx.types().T_pjlvalue static Value *julia_bool(jl_codectx_t &ctx, Value *cond) { - return ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true), + auto boolean = ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true), literal_pointer_val(ctx, jl_false)); + setName(ctx.emission_context, boolean, "bool"); + return boolean; } // --- accessing the representations of built-in data types --- @@ -2355,6 +2406,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)), fld); } + setName(ctx.emission_context, fld, "getfield"); jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type; if (isboxed && maybe_null) null_pointer_check(ctx, fld); @@ -2388,7 +2440,9 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, ctx.types().T_prjlvalue, emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue), idx0()); + setName(ctx.emission_context, fldptr, "getfield_ptr"); LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*))); + setName(ctx.emission_context, fld, "getfield"); fld->setOrdering(AtomicOrdering::Unordered); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strct.tbaa); ai.decorateInst(fld); @@ -2440,6 +2494,7 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, // move value to an immutable stack slot (excluding tindex) Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al); AllocaInst *lv = emit_static_alloca(ctx, AT); + setName(ctx.emission_context, lv, "immutable_union"); if (al > 1) lv->setAlignment(Align(al)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -2510,7 +2565,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx); } if (jl_field_isptr(jt, idx)) { + setName(ctx.emission_context, addr, "getfield_addr"); LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*))); + setName(ctx.emission_context, Load, "getfield"); Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order)); maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -2573,6 +2630,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st IntegerType *ET = cast(T->getStructElementType(st_idx)); unsigned align = (ET->getBitWidth() + 7) / 8; lv = emit_static_alloca(ctx, ET); + setName(ctx.emission_context, lv, "union_split"); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align)); // emit all of the align-sized words unsigned i = 0; @@ -2716,6 +2774,7 @@ static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value * t, ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)), tbaa, ctx.types().T_size); + setName(ctx.emission_context, load, "arraysize"); MDBuilder MDB(ctx.builder.getContext()); auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ))); load->setMetadata(LLVMContext::MD_range, rng); @@ -2751,7 +2810,9 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo) Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), 1); //index (not offset) of length field in ctx.types().T_pjlarray + setName(ctx.emission_context, addr, "arraylen_ptr"); LoadInst *len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, addr, ctx.types().alignof_ptr); + setName(ctx.emission_context, len, "arraylen"); len->setOrdering(AtomicOrdering::NotAtomic); MDBuilder MDB(ctx.builder.getContext()); auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ))); @@ -2770,6 +2831,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, ++EmittedArrayptr; Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, t, ctx.types().T_pjlarray), 0); + setName(ctx.emission_context, addr, "arrayptr_ptr"); // Normally allocated array of 0 dimension always have a inline pointer. // However, we can't rely on that here since arrays can also be constructed from C pointers. PointerType *PT = cast(addr->getType()); @@ -2788,6 +2850,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, } LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *))); + setName(ctx.emission_context, LI, "arrayptr"); LI->setOrdering(AtomicOrdering::NotAtomic); LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None)); jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr); @@ -2828,8 +2891,11 @@ static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo) ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), arrayflag_field); + setName(ctx.emission_context, addr, "arrayflags_ptr"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayflags); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + auto flags = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + setName(ctx.emission_context, flags, "arrayflags"); + return flags; } static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary) @@ -2839,6 +2905,7 @@ static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary) cast(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(ctx.builder.getContext(), None)); flags = ctx.builder.CreateLShr(flags, 2); flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1 + setName(ctx.emission_context, flags, "arrayndims"); return flags; } @@ -2850,8 +2917,11 @@ static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo) Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), elsize_field); + setName(ctx.emission_context, addr, "arrayelsize_ptr"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + auto elsize = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + setName(ctx.emission_context, elsize, "arrayelsize"); + return elsize; } static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd) @@ -2866,8 +2936,11 @@ static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int n ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), offset_field); + setName(ctx.emission_context, addr, "arrayoffset_ptr"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayoffset); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t)))); + auto offset = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t)))); + setName(ctx.emission_context, offset, "arrayoffset"); + return offset; } // Returns the size of the array represented by `tinfo` for the given dimension `dim` if @@ -2910,12 +2983,15 @@ static Value *emit_array_nd_index( if (bc) { BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "ib"); // if !(i < d) goto error - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(ii, d), okBB, failBB); + auto bc = ctx.builder.CreateICmpULT(ii, d); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, okBB, failBB); ctx.f->getBasicBlockList().push_back(okBB); ctx.builder.SetInsertPoint(okBB); } #endif stride = ctx.builder.CreateMul(stride, d); + setName(ctx.emission_context, stride, "stride"); } } #if CHECK_BOUNDS==1 @@ -2925,20 +3001,26 @@ static Value *emit_array_nd_index( if (nidxs == 1) { // Linear indexing: Check against the entire linear span of the array Value *alen = emit_arraylen(ctx, ainfo); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(i, alen), endBB, failBB); + auto bc = ctx.builder.CreateICmpULT(i, alen); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, endBB, failBB); } else if (nidxs >= (size_t)nd){ // No dimensions were omitted; just check the last remaining index assert(nd >= 0); Value *last_index = ii; Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), endBB, failBB); + auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, endBB, failBB); } else { // There were fewer indices than dimensions; check the last remaining index BasicBlock *checktrailingdimsBB = BasicBlock::Create(ctx.builder.getContext(), "dimsib"); assert(nd >= 0); Value *last_index = ii; Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), checktrailingdimsBB, failBB); + auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, checktrailingdimsBB, failBB); ctx.f->getBasicBlockList().push_back(checktrailingdimsBB); ctx.builder.SetInsertPoint(checktrailingdimsBB); // And then also make sure that all dimensions that weren't explicitly @@ -2946,18 +3028,23 @@ static Value *emit_array_nd_index( for (size_t k = nidxs+1; k < (size_t)nd; k++) { BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok"); Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)), dimsokBB, failBB); + auto bc = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, dimsokBB, failBB); ctx.f->getBasicBlockList().push_back(dimsokBB); ctx.builder.SetInsertPoint(dimsokBB); } Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)), endBB, failBB); + auto bc2 = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, bc2, "inbounds"); + ctx.builder.CreateCondBr(bc2, endBB, failBB); } ctx.f->getBasicBlockList().push_back(failBB); ctx.builder.SetInsertPoint(failBB); // CreateAlloca is OK here since we are on an error branch Value *tmp = ctx.builder.CreateAlloca(ctx.types().T_size, ConstantInt::get(ctx.types().T_size, nidxs)); + setName(ctx.emission_context, tmp, "errorbox"); for (size_t k = 0; k < nidxs; k++) { ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(ctx.types().T_size, tmp, ConstantInt::get(ctx.types().T_size, k)), ctx.types().alignof_ptr); } @@ -3175,6 +3262,7 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype_tag, jl_valu }, ut, counter); + setName(ctx.emission_context, tindex, "tindex"); return tindex; } @@ -3225,6 +3313,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, // try to pick an Integer type size such that SROA will emit reasonable code Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align); AllocaInst *lv = emit_static_alloca(ctx, AT); + setName(ctx.emission_context, lv, "unionalloca"); if (align > 1) lv->setAlignment(Align(align)); return lv; @@ -3281,6 +3370,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB box = _boxed_special(ctx, vinfo_r, t); if (!box) { box = emit_allocobj(ctx, jt); + setName(ctx.emission_context, box, "unionbox"); init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); } } @@ -3409,6 +3499,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab box = emit_allocobj(ctx, (jl_datatype_t*)jt); Value *decayed = decay_derived(ctx, box); AllocaInst *originalAlloca = cast(vinfo.V); + box->takeName(originalAlloca); decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived)); // Warning: Very illegal IR here temporarily originalAlloca->mutateType(decayed->getType()); @@ -3419,6 +3510,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab ctx.builder.restoreIP(IP); } else { box = emit_allocobj(ctx, (jl_datatype_t*)jt); + setName(ctx.emission_context, box, "box"); init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); } } @@ -3529,6 +3621,7 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std Value *istype = ctx.builder.CreateICmpEQ(emit_datatype_name(ctx, t), literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename)); + setName(ctx.emission_context, istype, "istype"); BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass"); ctx.builder.CreateCondBr(istype, passBB, failBB); @@ -3770,6 +3863,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } else { strct = emit_static_alloca(ctx, lt); + setName(ctx.emission_context, strct, "newstruct"); if (tracked.count) undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack); } @@ -3848,6 +3942,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al); assert(lt->getStructElementType(llvm_idx) == ET); AllocaInst *lv = emit_static_alloca(ctx, ET); + setName(ctx.emission_context, lv, "unioninit"); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + al - 1) / al)); emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr); // emit all of the align-sized words @@ -3938,6 +4033,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } } Value *strct = emit_allocobj(ctx, sty); + setName(ctx.emission_context, strct, "newstruct"); jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty); strct = decay_derived(ctx, strct); undef_derived_strct(ctx, strct, sty, strctinfo.tbaa); diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 1c52f969a11f7..de5f2a2770c04 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -12,13 +12,13 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len, ios_t *s) UNAVAILABLE + ios_t *z, ios_t *s) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world, - char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE + char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE @@ -83,9 +83,9 @@ JL_DLLEXPORT void jl_dump_llvm_opt_fallback(void *s) { } -JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE +JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE -JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE +JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(jl_llvmf_dump_t* dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw) UNAVAILABLE JL_DLLEXPORT void jl_get_function_id_fallback(void *native_code, jl_code_instance_t *ncode, int32_t *func_idx, int32_t *specfunc_idx) UNAVAILABLE @@ -109,6 +109,11 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr) JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, int lower_intrinsics) UNAVAILABLE +JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, int Speedup, int Size, + int lower_intrinsics, int dump_native, int external_use, int llvm_only) UNAVAILABLE + +JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { } + JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE @@ -138,3 +143,42 @@ JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE + +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE +#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE + +#include "llvm-julia-passes.inc" + +#undef MODULE_PASS +#undef CGSCC_PASS +#undef FUNCTION_PASS +#undef LOOP_PASS + +//LLVM C api to the julia JIT +JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT void* JLJITGetJuliaOJIT_fallback(void) UNAVAILABLE + +JL_DLLEXPORT void* JLJITGetExternalJITDylib_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT void* JLJITAddObjectFile_fallback(void* JIT, void* JD, void* ObjBuffer) UNAVAILABLE + +JL_DLLEXPORT void* JLJITAddLLVMIRModule_fallback(void* JIT, void* JD, void* TSM) UNAVAILABLE + +JL_DLLEXPORT void* JLJITLookup_fallback(void* JIT, void* Result, const char *Name) UNAVAILABLE + +JL_DLLEXPORT void* JLJITMangleAndIntern_fallback(void* JIT, const char *Name) UNAVAILABLE + +JL_DLLEXPORT const char *JLJITGetTripleString_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT const char JLJITGetGlobalPrefix_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT const char *JLJITGetDataLayoutString_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT void* JLJITGetIRCompileLayer_fallback(void* JIT) UNAVAILABLE diff --git a/src/codegen.cpp b/src/codegen.cpp index a5d54f16ed2e6..122170ae3fa97 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -167,6 +167,16 @@ typedef Instruction TerminatorInst; #undef DEBUG_TYPE //LLVM occasionally likes to set DEBUG_TYPE in a header... #define DEBUG_TYPE "julia_irgen_codegen" +void setName(jl_codegen_params_t ¶ms, Value *V, const Twine &Name) +{ + // we do the constant check again later, duplicating it here just makes sure the assertion + // fires on debug builds even if debug info is not enabled + assert((isa(V) || isa(V)) && "Should only set names on instructions!"); + if (params.debug_level && !isa(V)) { + V->setName(Name); + } +} + STATISTIC(EmittedAllocas, "Number of allocas emitted"); STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted"); STATISTIC(ModulesCreated, "Number of LLVM Modules created"); @@ -1284,7 +1294,7 @@ static const auto &builtin_func_map() { static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs}; -static _Atomic(int) globalUniqueGeneratedNames{1}; +static _Atomic(uint64_t) globalUniqueGeneratedNames{1}; // --- code generation --- extern "C" { @@ -1296,6 +1306,7 @@ extern "C" { #endif (int) DICompileUnit::DebugEmissionKind::FullDebug, 1, + 1, jl_rettype_inferred_addr, NULL }; } @@ -1603,7 +1614,6 @@ class jl_codectx_t { Value *pgcstack = NULL; Instruction *topalloca = NULL; - bool debug_enabled = false; bool use_cache = false; bool external_linkage = false; const jl_cgparams_t *params = NULL; @@ -1719,7 +1729,7 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) { } static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL); -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure); +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg); static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1); static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s, jl_binding_t **pbnd, bool assign); @@ -1852,6 +1862,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ) typ = (jl_value_t*)jl_typeofbottom_type->super; } if (jl_is_type_type(typ)) { + assert(is_uniquerep_Type(typ)); // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); constant.constant = jl_tparam0(typ); @@ -1923,16 +1934,14 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isboxed, jl_value_t *typ) { - if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) { - // no need to explicitly load/store a constant/ghost value - return ghostValue(ctx, typ); - } if (jl_is_type_type(typ)) { - jl_value_t *tp0 = jl_tparam0(typ); - if (jl_is_concrete_type(tp0) || tp0 == jl_bottom_type) { + if (is_uniquerep_Type(typ)) { // replace T::Type{T} with T return ghostValue(ctx, typ); } + } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) { + // no need to explicitly load/store a constant/ghost value + return ghostValue(ctx, typ); } Type *T = julia_type_to_llvm(ctx, typ); if (type_is_ghost(T)) { @@ -2023,6 +2032,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi) assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things"); if (vi.usedUndef) { vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext())); + setName(ctx.emission_context, vi.defFlag, "isdefined"); store_def_flag(ctx, vi, false); } } @@ -2121,6 +2131,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & v.typ, counter); } + setName(ctx.emission_context, new_tindex, "tindex"); // some of the values are still unboxed if (!isa(new_tindex)) { @@ -2135,6 +2146,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)); new_tindex = ctx.builder.CreateOr(wasboxed, new_tindex); wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); + setName(ctx.emission_context, wasboxed, "wasboxed"); BasicBlock *currBB = ctx.builder.GetInsertBlock(); @@ -2171,6 +2183,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & }, typ, counter); + setName(ctx.emission_context, union_box_tindex, "union_box_tindex"); if (union_box_dt) { BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_isa", ctx.f); ctx.builder.CreateBr(postBB); @@ -2182,6 +2195,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & tindex_phi->addIncoming(new_tindex, currBB); tindex_phi->addIncoming(union_box_tindex, post_union_isaBB); new_tindex = tindex_phi; + setName(ctx.emission_context, new_tindex, "tindex"); } } if (!skip_box.all()) { @@ -2340,6 +2354,21 @@ std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &conte return m; } +static void jl_name_jlfunc_args(jl_codegen_params_t ¶ms, Function *F) { + assert(F->arg_size() == 3); + F->getArg(0)->setName("function"); + F->getArg(1)->setName("args"); + F->getArg(2)->setName("nargs"); +} + +static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) { + assert(F->arg_size() == 4); + F->getArg(0)->setName("function"); + F->getArg(1)->setName("args"); + F->getArg(2)->setName("nargs"); + F->getArg(3)->setName("sparams"); +} + static void jl_init_function(Function *F, const Triple &TT) { // set any attributes that *must* be set on all functions @@ -2821,6 +2850,7 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t * if (bnd->constp) return mark_julia_const(ctx, v); LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))); + setName(ctx.emission_context, v, jl_symbol_name(name)); v->setOrdering(order); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding); ai.decorateInst(v); @@ -2895,6 +2925,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, Value *tindex2 = arg2.TIndex; tindex2 = ctx.builder.CreateAnd(tindex2, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f)); Value *typeeq = ctx.builder.CreateICmpEQ(tindex, tindex2); + setName(ctx.emission_context, typeeq, "typematch"); tindex = ctx.builder.CreateSelect(typeeq, tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x00)); BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "unionbits_is_boxed", ctx.f); SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB); @@ -2926,6 +2957,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, ctx.builder.CreateCall(trap_func); ctx.builder.CreateUnreachable(); ctx.builder.SetInsertPoint(postBB); + setName(ctx.emission_context, phi, "unionbits_is"); return phi; } @@ -3409,14 +3441,16 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } else { Value *idx_dyn = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type); - error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size)), - "arraysize: dimension out of range"); + auto positive = ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size)); + setName(ctx.emission_context, positive, "ispositive"); + error_unless(ctx, positive, "arraysize: dimension out of range"); BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f); BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange"); BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize"); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpSLE(idx_dyn, - ConstantInt::get(ctx.types().T_size, ndims)), - inBB, outBB); + auto oor = ctx.builder.CreateICmpSLE(idx_dyn, + ConstantInt::get(ctx.types().T_size, ndims)); + setName(ctx.emission_context, oor, "sizeddim"); + ctx.builder.CreateCondBr(oor, inBB, outBB); ctx.builder.SetInsertPoint(outBB); Value *v_one = ConstantInt::get(ctx.types().T_size, 1); ctx.builder.CreateBr(ansBB); @@ -3430,6 +3464,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, PHINode *result = ctx.builder.CreatePHI(ctx.types().T_size, 2); result->addIncoming(v_one, outBB); result->addIncoming(v_sz, inBB); + setName(ctx.emission_context, result, "arraysize"); *ret = mark_julia_type(ctx, result, false, jl_long_type); return true; } @@ -3477,17 +3512,25 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, data = emit_bitcast(ctx, data, AT->getPointerTo()); // isbits union selector bytes are stored after a->maxsize Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd)); + setName(ctx.emission_context, ndims, "ndims"); Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1)); + setName(ctx.emission_context, is_vector, "isvec"); Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size)); + setName(ctx.emission_context, selidx_v, "selidx_v"); Value *selidx_m = emit_arraylen(ctx, ary); Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m); + setName(ctx.emission_context, selidx, "selidx"); ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx); + setName(ctx.emission_context, ptindex, "ptindex"); data = ctx.builder.CreateInBoundsGEP(AT, data, idx); + setName(ctx.emission_context, data, "data"); } ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext())); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx); *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte); + if (ret->V) + setName(ctx.emission_context, ret->V, "arrayref"); } else { MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.noalias().aliasscope.current : nullptr; @@ -3498,6 +3541,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, aliasscope, isboxed, AtomicOrdering::NotAtomic); + if (ret->V) + setName(ctx.emission_context, ret->V, "arrayref"); } return true; } @@ -3544,6 +3589,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, // the owner of the data is ary itself except if ary->how == 3 flags = ctx.builder.CreateAnd(flags, 3); Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 3)); + setName(ctx.emission_context, is_owned, "has_owner"); BasicBlock *curBB = ctx.builder.GetInsertBlock(); BasicBlock *ownedBB = BasicBlock::Create(ctx.builder.getContext(), "array_owned", ctx.f); BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge_own", ctx.f); @@ -3557,6 +3603,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, emit_bitcast(ctx, decay_derived(ctx, aryv), ctx.types().T_pprjlvalue), jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)), Align(sizeof(void*))); + setName(ctx.emission_context, own_ptr, "external_owner"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); ai.decorateInst(maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type)); } @@ -3570,6 +3617,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, data_owner = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2); data_owner->addIncoming(aryv, curBB); data_owner->addIncoming(own_ptr, ownedBB); + setName(ctx.emission_context, data_owner, "data_owner"); } if (!isboxed && jl_is_uniontype(ety)) { Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al); @@ -3586,15 +3634,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, else { Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd)); Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1)); + setName(ctx.emission_context, is_vector, "is_vector"); Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size)); + setName(ctx.emission_context, selidx_v, "selidx_v"); Value *selidx_m = emit_arraylen(ctx, ary); Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m); + setName(ctx.emission_context, selidx, "selidx"); ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx); + setName(ctx.emission_context, ptindex, "ptindex"); data = ctx.builder.CreateInBoundsGEP(AT, data, idx); + setName(ctx.emission_context, data, "data"); } ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext())); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx); + setName(ctx.emission_context, ptindex, "ptindex"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayselbyte); ai.decorateInst(ctx.builder.CreateStore(tindex, ptindex)); if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) { @@ -3693,6 +3747,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)); Instruction *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, idx), Align(sizeof(void*))); + setName(ctx.emission_context, v, "getfield"); // if we know the result type of this load, we will mark that information here too jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_value); ai.decorateInst(maybe_mark_load_dereferenceable(v, false, rt)); @@ -3861,6 +3916,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *fieldtyp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*)))); + setName(ctx.emission_context, fieldtyp, "fieldtype"); *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type); return true; } @@ -3899,6 +3955,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX)); cast(len)->setMetadata(LLVMContext::MD_range, rng); } + setName(ctx.emission_context, len, "sizeof"); *ret = mark_julia_type(ctx, len, false, jl_long_type); return true; } @@ -3913,6 +3970,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), ctx.types().T_size); } *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type); + if (ret->V) + setName(ctx.emission_context, ret->V, "sizeof"); return true; } } @@ -4029,6 +4088,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } } Value *isdef = ctx.builder.CreateIsNotNull(fldv); + setName(ctx.emission_context, isdef, "isdefined"); *ret = mark_julia_type(ctx, isdef, false, jl_bool_type); } else { @@ -4107,7 +4167,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos { ++EmittedSpecfunCalls; // emit specialized call site - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure); + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); *cc = returninfo.cc; *return_roots = returninfo.return_roots; @@ -4129,6 +4190,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos break; case jl_returninfo_t::Union: result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes)); + setName(ctx.emission_context, result, "sret_box"); if (returninfo.union_align > 1) result->setAlignment(Align(returninfo.union_align)); argvals[idx] = result; @@ -4141,7 +4203,10 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos argvals[idx] = return_roots; idx++; } - + if (gcstack_arg) { + argvals[idx] = ctx.pgcstack; + idx++; + } for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig @@ -4202,9 +4267,12 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos } jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*)))); + setName(ctx.emission_context, TheCallee, namep); } CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals); call->setAttributes(returninfo.attrs); + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); jl_cgval_t retval; switch (returninfo.cc) { @@ -4269,6 +4337,7 @@ static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty } jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*)))); + setName(ctx.emission_context, theFptr, namep); } else { theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee(); @@ -4567,16 +4636,20 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), ctx.types().T_pjlvalue, false, GlobalVariable::PrivateLinkage, initnul); LoadInst *cachedval = ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, bindinggv, Align(sizeof(void*))); + setName(ctx.emission_context, cachedval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".cached"); cachedval->setOrdering(AtomicOrdering::Unordered); BasicBlock *have_val = BasicBlock::Create(ctx.builder.getContext(), "found"); BasicBlock *not_found = BasicBlock::Create(ctx.builder.getContext(), "notfound"); BasicBlock *currentbb = ctx.builder.GetInsertBlock(); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpNE(cachedval, initnul), have_val, not_found); + auto iscached = ctx.builder.CreateICmpNE(cachedval, initnul); + setName(ctx.emission_context, iscached, "iscached"); + ctx.builder.CreateCondBr(iscached, have_val, not_found); ctx.f->getBasicBlockList().push_back(not_found); ctx.builder.SetInsertPoint(not_found); Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func), { literal_pointer_val(ctx, (jl_value_t*)m), literal_pointer_val(ctx, (jl_value_t*)s) }); + setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found"); ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release); ctx.builder.CreateBr(have_val); ctx.f->getBasicBlockList().push_back(have_val); @@ -4584,6 +4657,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t PHINode *p = ctx.builder.CreatePHI(ctx.types().T_pjlvalue, 2); p->addIncoming(cachedval, currentbb); p->addIncoming(bval, not_found); + setName(ctx.emission_context, p, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s)); return p; } if (assign) { @@ -4607,6 +4681,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa) { LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))); + setName(ctx.emission_context, v, jl_symbol_name(name) + StringRef(".checked")); if (isvol) v->setVolatile(true); v->setOrdering(AtomicOrdering::Unordered); @@ -4633,6 +4708,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)))); + setName(ctx.emission_context, sp, "sparam"); Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type)); jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig; for (size_t j = 0; j < i; j++) { @@ -4738,9 +4814,11 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va else { // copy value to a non-mutable (non-volatile SSA) location AllocaInst *varslot = cast(vi.value.V); + setName(ctx.emission_context, varslot, jl_symbol_name(varname)); Type *T = varslot->getAllocatedType(); assert(!varslot->isArrayAllocation() && "variables not expected to be VLA"); AllocaInst *ssaslot = cast(varslot->clone()); + setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa")); ssaslot->insertAfter(varslot); if (vi.isVolatile) { Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, @@ -4793,8 +4871,10 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va isnull = box_isnull; } } - if (isnull) + if (isnull) { + setName(ctx.emission_context, isnull, jl_symbol_name(varname) + StringRef("_is_null")); undef_var_error_ifnot(ctx, isnull, varname); + } return v; } @@ -5263,6 +5343,7 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met Function::ExternalLinkage, fname, jl_Module); jl_init_function(F, ctx.emission_context.TargetTriple); + jl_name_jlfunc_args(ctx.emission_context, F); F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()})); } Function *specF = NULL; @@ -5273,7 +5354,7 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); if (specF) { jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL, - closure_decls.specFunctionObject, sigtype, rettype, true); + closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); specF = cast(returninfo.decl.getCallee()); } } @@ -5735,6 +5816,7 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod GlobalVariable::InternalLinkage, name, M); jl_init_function(f, params.TargetTriple); + jl_name_jlfunc_args(params, f); //f->setAlwaysInline(); ctx.f = f; // for jl_Module BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f); @@ -5786,13 +5868,15 @@ static void emit_cfunc_invalidate( DebugLoc noDbg; ctx.builder.SetCurrentDebugLocation(noDbg); allocate_gc_frame(ctx, b0); - Function::arg_iterator AI = gf_thunk->arg_begin(); SmallVector myargs(nargs); if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union) ++AI; if (return_roots) ++AI; + if (JL_FEAT_TEST(ctx,gcstack_arg)){ + ++AI; // gcstack_arg + } for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(calltype, i); // n.b. specTypes is required to be a datatype by construction for specsig @@ -6221,6 +6305,7 @@ static Function* gen_cfun_wrapper( theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage, fname, jl_Module); jl_init_function(theFptr, ctx.emission_context.TargetTriple); + jl_name_jlfunc_args(ctx.emission_context, theFptr); addRetAttr(theFptr, Attribute::NonNull); } else { @@ -6258,8 +6343,9 @@ static Function* gen_cfun_wrapper( bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; assert(calltype == 3); // emit a specsig call + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst); - jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure); + jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet); @@ -6274,9 +6360,11 @@ static Function* gen_cfun_wrapper( else { if (jlfunc_sret) { result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType()); + setName(ctx.emission_context, result, "sret"); assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); } else { result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes)); + setName(ctx.emission_context, result, "result_union"); assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); } } @@ -6284,8 +6372,11 @@ static Function* gen_cfun_wrapper( } if (returninfo.return_roots) { AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots)); + setName(ctx.emission_context, return_roots, "return_roots"); args.push_back(return_roots); } + if (gcstack_arg) + args.push_back(ctx.pgcstack); for (size_t i = 0; i < nargs + 1; i++) { // figure out how to repack the arguments jl_cgval_t &inputarg = inputargs[i]; @@ -6332,11 +6423,15 @@ static Function* gen_cfun_wrapper( emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context); theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk); } + assert(cast(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType())); CallInst *call = ctx.builder.CreateCall( returninfo.decl.getFunctionType(), theFptr, ArrayRef(args)); call->setAttributes(returninfo.attrs); + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); + switch (returninfo.cc) { case jl_returninfo_t::Boxed: retval = mark_julia_type(ctx, call, true, astrt); @@ -6417,6 +6512,8 @@ static Function* gen_cfun_wrapper( GlobalVariable::ExternalLinkage, funcName, M); jl_init_function(cw_make, ctx.emission_context.TargetTriple); + cw_make->getArg(0)->setName("wrapper"); + cw_make->getArg(1)->setName("newval"); BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw_make); IRBuilder<> cwbuilder(b0); Function::arg_iterator AI = cw_make->arg_begin(); @@ -6580,6 +6677,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con if (outboxed) { assert(jl_datatype_size(output_type) == sizeof(void*) * 4); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type); + setName(ctx.emission_context, strct, "cfun_result"); Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo()); MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -6658,7 +6756,10 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret ++GeneratedInvokeWrappers; Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M); jl_init_function(w, params.TargetTriple); + jl_name_jlfunc_args(params, w); w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()})); + w->addFnAttr(Attribute::OptimizeNone); + w->addFnAttr(Attribute::NoInline); Function::arg_iterator AI = w->arg_begin(); Value *funcArg = &*AI++; Value *argArray = &*AI++; @@ -6692,6 +6793,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret case jl_returninfo_t::SRet: assert(cast(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType())); result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()); + setName(ctx.emission_context, result, "sret"); args[idx] = result; idx++; break; @@ -6701,14 +6803,20 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret result->setAlignment(Align(f.union_align)); args[idx] = result; idx++; + setName(ctx.emission_context, result, "result_union"); break; } if (f.return_roots) { AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots)); + setName(ctx.emission_context, return_roots, "return_roots"); args[idx] = return_roots; idx++; } - + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + if (gcstack_arg) { + args[idx] = ctx.pgcstack; + idx++; + } bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) { jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : @@ -6746,7 +6854,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } CallInst *call = ctx.builder.CreateCall(f.decl, args); call->setAttributes(f.attrs); - + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); jl_cgval_t retval; if (retarg != -1) { Value *theArg; @@ -6788,7 +6897,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret return w; } -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure) +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg) { jl_returninfo_t props = {}; SmallVector fsig; @@ -6873,6 +6982,14 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0)); } + if (gcstack_arg){ + AttrBuilder param(ctx.builder.getContext()); + param.addAttribute(Attribute::SwiftSelf); + param.addAttribute(Attribute::NonNull); + attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param)); + fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0)); + } + for (size_t i = 0; i < jl_nparams(sig); i++) { jl_value_t *jt = jl_tparam(sig, i); bool isboxed = false; @@ -6934,7 +7051,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value else fval = emit_bitcast(ctx, fval, ftype->getPointerTo()); } - + if (gcstack_arg && isa(fval)) + cast(fval)->setCallingConv(CallingConv::Swift); props.decl = FunctionCallee(ftype, fval); props.attrs = attributes; return props; @@ -7076,11 +7194,9 @@ static jl_llvm_functions_t // jl_printf(JL_STDERR, "\n*** compiling %s at %s:%d\n\n", // jl_symbol_name(ctx.name), ctx.file.str().c_str(), toplineno); - ctx.debug_enabled = true; + bool debug_enabled = ctx.emission_context.debug_level != 0; if (dbgFuncName.empty()) // Should never happen anymore? - ctx.debug_enabled = 0; - if (jl_options.debug_level == 0) - ctx.debug_enabled = 0; + debug_enabled = false; // step 2. process var-info lists to see what vars need boxing int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes); @@ -7155,13 +7271,15 @@ static jl_llvm_functions_t // allocate Function declarations and wrapper objects //Safe because params holds ctx lock Module *M = TSM.getModuleUnlocked(); + M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level); jl_debugcache_t debuginfo; debuginfo.initialize(M); jl_returninfo_t returninfo = {}; Function *f = NULL; bool has_sret = false; if (specsig) { // assumes !va and !needsparams - returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure); + returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, + jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg)); f = cast(returninfo.decl.getCallee()); has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); jl_init_function(f, ctx.emission_context.TargetTriple); @@ -7203,6 +7321,11 @@ static jl_llvm_functions_t GlobalVariable::ExternalLinkage, declarations.specFunctionObject, M); jl_init_function(f, ctx.emission_context.TargetTriple); + if (needsparams) { + jl_name_jlfuncparams_args(ctx.emission_context, f); + } else { + jl_name_jlfunc_args(ctx.emission_context, f); + } f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()})); returninfo.decl = f; declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args"; @@ -7248,14 +7371,14 @@ static jl_llvm_functions_t tableKind = DICompileUnit::DebugNameTableKind::GNU; else tableKind = DICompileUnit::DebugNameTableKind::None; - DIBuilder dbuilder(*M, true, ctx.debug_enabled ? getOrCreateJuliaCU(*M, emissionKind, tableKind) : NULL); + DIBuilder dbuilder(*M, true, debug_enabled ? getOrCreateJuliaCU(*M, emissionKind, tableKind) : NULL); DIFile *topfile = NULL; DISubprogram *SP = NULL; DebugLoc noDbg, topdebugloc; - if (ctx.debug_enabled) { + if (debug_enabled) { topfile = dbuilder.createFile(ctx.file, "."); DISubroutineType *subrty; - if (jl_options.debug_level <= 1) + if (ctx.emission_context.debug_level <= 1) subrty = debuginfo.jl_di_func_null_sig; else if (!specsig) subrty = debuginfo.jl_di_func_sig; @@ -7276,7 +7399,7 @@ static jl_llvm_functions_t ); topdebugloc = DILocation::get(ctx.builder.getContext(), toplineno, 0, SP, NULL); f->setSubprogram(SP); - if (jl_options.debug_level >= 2) { + if (ctx.emission_context.debug_level >= 2) { const bool AlwaysPreserve = true; // Go over all arguments and local variables and initialize their debug information for (i = 0; i < nreq; i++) { @@ -7338,6 +7461,7 @@ static jl_llvm_functions_t fArg = &*AI++; argArray = &*AI++; pargArray = ctx.builder.CreateAlloca(argArray->getType()); + setName(ctx.emission_context, pargArray, "stackargs"); ctx.builder.CreateStore(argArray, pargArray, true/*volatile store to prevent removal of this alloca*/); argCount = &*AI++; ctx.argArray = argArray; @@ -7346,7 +7470,6 @@ static jl_llvm_functions_t ctx.spvals_ptr = &*AI++; } } - // step 6. set up GC frame allocate_gc_frame(ctx, b0); Value *last_age = NULL; @@ -7380,6 +7503,7 @@ static jl_llvm_functions_t lv->setName(jl_symbol_name(s)); varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); + setName(ctx.emission_context, varinfo.pTIndex, "tindex"); } else if (allunbox) { // all ghost values just need a selector allocated @@ -7406,7 +7530,7 @@ static jl_llvm_functions_t } varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); alloc_def_flag(ctx, varinfo); - if (ctx.debug_enabled && varinfo.dinfo) { + if (debug_enabled && varinfo.dinfo) { assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt); dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(), topdebugloc, @@ -7423,7 +7547,7 @@ static jl_llvm_functions_t StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*))); SI->insertAfter(ctx.topalloca); varinfo.boxroot = av; - if (ctx.debug_enabled && varinfo.dinfo) { + if (debug_enabled && varinfo.dinfo) { DIExpression *expr; if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) { expr = dbuilder.createExpression(); @@ -7552,6 +7676,12 @@ static jl_llvm_functions_t param.addAlignmentAttr(Align(sizeof(jl_value_t*))); attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes } + if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){ + Argument *Arg = &*AI; + ++AI; + AttrBuilder param(ctx.builder.getContext()); + attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); + } for (i = 0; i < nreq; i++) { jl_sym_t *s = slot_symbol(ctx, i); jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); @@ -7618,7 +7748,7 @@ static jl_llvm_functions_t ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), false, vi.value.typ)); theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { + if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); addr.push_back(llvm::dwarf::DW_OP_plus_uconst); @@ -7637,7 +7767,7 @@ static jl_llvm_functions_t assert(vi.value.V == NULL && "unexpected variable slot created for argument"); // keep track of original (possibly boxed) value to avoid re-boxing or moving vi.value = theArg; - if (specsig && theArg.V && ctx.debug_enabled && vi.dinfo) { + if (specsig && theArg.V && debug_enabled && vi.dinfo) { SmallVector addr; Value *parg; if (theArg.ispointer()) { @@ -7763,7 +7893,7 @@ static jl_llvm_functions_t else info.is_user_code = in_user_mod(module); info.is_tracked = in_tracked_path(info.file); - if (ctx.debug_enabled) { + if (debug_enabled) { StringRef fname; if (jl_is_method_instance(method)) method = ((jl_method_instance_t*)method)->def.value; @@ -8004,7 +8134,7 @@ static jl_llvm_functions_t while (cursor != -1) { int32_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[cursor]; if (debuginfoloc > 0) { - if (ctx.debug_enabled) + if (debug_enabled) ctx.builder.SetCurrentDebugLocation(linetable.at(debuginfoloc).loc); coverageVisitStmt(debuginfoloc); } @@ -8406,7 +8536,7 @@ static jl_llvm_functions_t for (auto &I : BB) { CallBase *call = dyn_cast(&I); if (call) { - if (ctx.debug_enabled && !I.getDebugLoc()) { + if (debug_enabled && !I.getDebugLoc()) { // LLVM Verifier: inlinable function call in a function with debug info must have a !dbg location // make sure that anything we attempt to call has some inlining info, just in case optimization messed up // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram) @@ -8431,7 +8561,7 @@ static jl_llvm_functions_t in_prologue = false; } } - if (ctx.debug_enabled) + if (debug_enabled) dbuilder.finalize(); if (ctx.vaSlot > 0) { @@ -8517,7 +8647,7 @@ jl_llvm_functions_t jl_emit_code( jl_codegen_params_t ¶ms) { JL_TIMING(CODEGEN, CODEGEN_LLVM); - jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK); // caller must hold codegen_lock jl_llvm_functions_t decls = {}; assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache || @@ -8562,7 +8692,7 @@ static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codeg jl_llvm_functions_t declarations; declarations.functionObject = "jl_f_opaque_closure_call"; if (uses_specsig(mi->specTypes, false, true, rettype, true)) { - jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, 1); + jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); @@ -8579,7 +8709,7 @@ jl_llvm_functions_t jl_emit_codeinst( jl_codegen_params_t ¶ms) { JL_TIMING(CODEGEN, CODEGEN_Codeinst); - jl_timing_show_method_instance(codeinst->def, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method_instance(codeinst->def, JL_TIMING_DEFAULT_BLOCK); JL_GC_PUSH1(&src); if (!src) { src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); @@ -8627,6 +8757,7 @@ jl_llvm_functions_t jl_emit_codeinst( if (// keep code when keeping everything !(JL_DELETE_NON_INLINEABLE) || // aggressively keep code when debugging level >= 2 + // note that this uses the global jl_options.debug_level, not the local emission_ctx.debug_level jl_options.debug_level > 1) { // update the stored code if (inferred != (jl_value_t*)src) { @@ -9074,7 +9205,6 @@ extern "C" void jl_init_llvm(void) } #endif -#ifndef JL_USE_JITLINK #ifdef JL_USE_INTEL_JITEVENTS if (jl_using_intel_jitevents) jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener()); @@ -9090,7 +9220,6 @@ extern "C" void jl_init_llvm(void) jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener()); #endif #endif -#endif #endif cl::PrintOptionValues(); diff --git a/src/disasm.cpp b/src/disasm.cpp index 96595d4381987..9414c0a2a065d 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -575,7 +575,7 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset) // print a native disassembly for the function starting at fptr extern "C" JL_DLLEXPORT_CODEGEN -jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) +jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary) { assert(fptr != 0); std::string code; @@ -600,7 +600,7 @@ jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_va return jl_pchar_to_string("", 0); } - if (raw_mc) { + if (emit_mc) { return (jl_value_t*)jl_pchar_to_array((char*)fptr, symsize); } @@ -1203,7 +1203,7 @@ class LineNumberPrinterHandler : public AsmPrinterHandler { // get a native assembly for llvm::Function extern "C" JL_DLLEXPORT_CODEGEN -jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) +jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw) { // precise printing via IR assembler SmallVector ObjBufferSV; @@ -1217,12 +1217,15 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const if (f != &f2 && !f->isDeclaration()) f2.deleteBody(); } + // add a nounwind attribute to get rid of cfi instructions + if (!raw) + f->addFnAttr(Attribute::NoUnwind); }); auto TMBase = jl_ExecutionEngine->cloneTargetMachine(); LLVMTargetMachine *TM = static_cast(TMBase.get()); legacy::PassManager PM; addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis()); - if (raw_mc) { + if (emit_mc) { raw_svector_ostream obj_OS(ObjBufferSV); if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr)) return jl_an_empty_string; diff --git a/src/dlload.c b/src/dlload.c index 3fb5a08ba2438..ffa9a053d5f1c 100644 --- a/src/dlload.c +++ b/src/dlload.c @@ -284,7 +284,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, JL_TIMING(DL_OPEN, DL_OPEN); if (!(flags & JL_RTLD_NOLOAD)) - jl_timing_puts(JL_TIMING_CURRENT_BLOCK, modname); + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, modname); // Detect if our `modname` is something like `@rpath/libfoo.dylib` #ifdef _OS_DARWIN_ @@ -342,7 +342,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, #endif handle = jl_dlopen(path, flags); if (handle && !(flags & JL_RTLD_NOLOAD)) - jl_timing_puts(JL_TIMING_CURRENT_BLOCK, jl_pathname_for_handle(handle)); + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle)); if (handle) return handle; #ifdef _OS_WINDOWS_ @@ -364,7 +364,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, snprintf(path, PATHBUF, "%s%s", modname, ext); handle = jl_dlopen(path, flags); if (handle && !(flags & JL_RTLD_NOLOAD)) - jl_timing_puts(JL_TIMING_CURRENT_BLOCK, jl_pathname_for_handle(handle)); + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle)); if (handle) return handle; #ifdef _OS_WINDOWS_ @@ -436,12 +436,12 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t JL_DLLEXPORT const char *jl_dlfind(const char *f_name) { void * dummy; - if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0)) - return JL_EXE_LIBNAME; if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0)) return JL_LIBJULIA_INTERNAL_DL_LIBNAME; if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0)) return JL_LIBJULIA_DL_LIBNAME; + if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0)) + return JL_EXE_LIBNAME; #ifdef _OS_WINDOWS_ if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0)) return "kernel32"; diff --git a/src/flisp/Makefile b/src/flisp/Makefile index c2bf30300b041..17292d301115b 100644 --- a/src/flisp/Makefile +++ b/src/flisp/Makefile @@ -95,6 +95,13 @@ $(BUILDDIR)/$(LIBTARGET).a: $(OBJS) | $(BUILDDIR) CCLD := $(CC) +# Override `-shared-libasan` from root Make.inc +ifeq ($(SANITIZE),1) +ifeq ($(SANITIZE_ADDRESS),1) +JLDFLAGS += -static-libsan +endif +endif + $(BUILDDIR)/$(EXENAME)-debug$(EXE): $(DOBJS) $(LIBFILES_debug) $(BUILDDIR)/$(LIBTARGET)-debug.a $(BUILDDIR)/flmain.dbg.obj | $(BUILDDIR)/flisp.boot @$(call PRINT_LINK, $(CCLD) $(DEBUGFLAGS) $(JLDFLAGS) $(DOBJS) $(BUILDDIR)/flmain.dbg.obj -o $@ $(BUILDDIR)/$(LIBTARGET)-debug.a $(LIBFILES_debug) $(LIBS) $(OSLIBS)) diff --git a/src/gc-common.c b/src/gc-common.c index 3eacc2b2fd92d..38f737ada576f 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -11,6 +11,13 @@ int gc_first_tid; int64_t live_bytes = 0; +// These should be moved to gc.c + +// Number of GC threads that may run parallel marking +int jl_n_markthreads; +// Number of GC threads that may run concurrent sweeping (0 or 1) +int jl_n_sweepthreads; + JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0; // mutex for gc-heap-snapshot. diff --git a/src/gc-debug.c b/src/gc-debug.c index df2e3487506fa..91367eb585bc6 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -29,19 +29,16 @@ jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) // the end of the page. JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) { - if (!page_metadata(p)) + if (!gc_alloc_map_is_set(p)) // Not in the pool return NULL; - struct jl_gc_metadata_ext info = page_metadata_ext(p); + jl_gc_pagemeta_t *meta = page_metadata(p); char *page_begin = gc_page_data(p) + GC_PAGE_OFFSET; // In the page header if (p < page_begin) return NULL; size_t ofs = p - page_begin; - // Check if this is a free page - if (!(info.pagetable0->allocmap[info.pagetable0_i32] & (uint32_t)(1 << info.pagetable0_i))) - return NULL; - int osize = info.meta->osize; + int osize = meta->osize; // Shouldn't be needed, just in case if (osize == 0) return NULL; @@ -113,44 +110,14 @@ static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits) } } -static void gc_clear_mark_pagetable0(pagetable0_t *pagetable0, int bits) -{ - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_page(pagetable0->meta[pg_i * 32 + j], bits); - } - } - } - } -} - -static void gc_clear_mark_pagetable1(pagetable1_t *pagetable1, int bits) -{ - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_pagetable0(pagetable1->meta0[pg_i * 32 + j], bits); - } - } - } - } -} - -static void gc_clear_mark_pagetable(int bits) +static void gc_clear_mark_outer(int bits) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_pagetable1(memory_map.meta1[pg_i * 32 + j], bits); - } - } + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + while (pg != NULL) { + gc_clear_mark_page(pg, bits); + pg = pg->next; } } } @@ -186,7 +153,7 @@ static void clear_mark(int bits) v = v->next; } - gc_clear_mark_pagetable(bits); + gc_clear_mark_outer(bits); } static void restore(void) @@ -563,7 +530,6 @@ void gc_scrub_record_task(jl_task_t *t) JL_NO_ASAN static void gc_scrub_range(char *low, char *high) { - jl_ptls_t ptls = jl_current_task->ptls; jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; if (jl_setjmp(buf, 0)) { @@ -582,14 +548,6 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high) // Make sure the sweep rebuild the freelist pg->has_marked = 1; pg->has_young = 1; - // Find the age bit - char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET; - int obj_id = (((char*)tag) - page_begin) / osize; - uint32_t *ages = pg->ages + obj_id / 32; - // Force this to be a young object to save some memory - // (especially on 32bit where it's more likely to have pointer-like - // bit patterns) - *ages &= ~(1 << (obj_id % 32)); memset(tag, 0xff, osize); // set mark to GC_MARKED (young and marked) tag->bits.gc = GC_MARKED; @@ -703,45 +661,37 @@ void gc_final_pause_end(int64_t t0, int64_t tend) static void gc_stats_pagetable0(pagetable0_t *pagetable0, unsigned *p0) { - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i] | pagetable0->freemap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - (*p0)++; - } - } + for (int pg_i = 0; pg_i < REGION0_PG_COUNT; pg_i++) { + uint8_t meta = pagetable0->meta[pg_i]; + assert(meta == GC_PAGE_UNMAPPED || meta == GC_PAGE_ALLOCATED || + meta == GC_PAGE_LAZILY_FREED || meta == GC_PAGE_FREED); + if (meta != GC_PAGE_UNMAPPED) { + (*p0)++; } } } static void gc_stats_pagetable1(pagetable1_t *pagetable1, unsigned *p1, unsigned *p0) { - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i] | pagetable1->freemap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - (*p1)++; - gc_stats_pagetable0(pagetable1->meta0[pg_i * 32 + j], p0); - } - } + for (int pg_i = 0; pg_i < REGION1_PG_COUNT; pg_i++) { + pagetable0_t *pagetable0 = pagetable1->meta0[pg_i]; + if (pagetable0 == NULL) { + continue; } + (*p1)++; + gc_stats_pagetable0(pagetable0, p0); } } static void gc_stats_pagetable(unsigned *p2, unsigned *p1, unsigned *p0) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i] | memory_map.freemap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - (*p2)++; - gc_stats_pagetable1(memory_map.meta1[pg_i * 32 + j], p1, p0); - } - } + for (int pg_i = 0; pg_i < REGION2_PG_COUNT; pg_i++) { + pagetable1_t *pagetable1 = alloc_map.meta1[pg_i]; + if (pagetable1 == NULL) { + continue; } + (*p2)++; + gc_stats_pagetable1(pagetable1, p1, p0); } } @@ -750,7 +700,7 @@ void jl_print_gc_stats(JL_STREAM *s) #ifdef _OS_LINUX_ malloc_stats(); #endif - double ptime = jl_clock_now() - process_t0; + double ptime = jl_hrtime() - process_t0; jl_safe_printf("exec time\t%.5f sec\n", ptime); if (gc_num.pause > 0) { jl_safe_printf("gc time \t%.5f sec (%2.1f%%) in %d (%d full) collections\n", @@ -971,7 +921,7 @@ void jl_gc_debug_init(void) #endif #ifdef GC_FINAL_STATS - process_t0 = jl_clock_now(); + process_t0 = jl_hrtime(); #endif } @@ -1093,7 +1043,7 @@ void gc_stats_big_obj(void) static int64_t poolobj_sizes[4]; static int64_t empty_pages; -static void gc_count_pool_page(jl_gc_pagemeta_t *pg) +static void gc_count_pool_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { int osize = pg->osize; char *data = pg->data; @@ -1112,44 +1062,16 @@ static void gc_count_pool_page(jl_gc_pagemeta_t *pg) } } -static void gc_count_pool_pagetable0(pagetable0_t *pagetable0) -{ - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_page(pagetable0->meta[pg_i * 32 + j]); - } - } - } - } -} - -static void gc_count_pool_pagetable1(pagetable1_t *pagetable1) -{ - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_pagetable0(pagetable1->meta0[pg_i * 32 + j]); - } - } - } - } -} - static void gc_count_pool_pagetable(void) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_pagetable1(memory_map.meta1[pg_i * 32 + j]); - } + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + while (pg != NULL) { + if (gc_alloc_map_is_set(pg->data)) { + gc_count_pool_page(pg); } + pg = pg->next; } } } diff --git a/src/gc-pages.c b/src/gc-pages.c index e367334450863..9242842851147 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -21,7 +21,6 @@ extern "C" { #define MIN_BLOCK_PG_ALLOC (1) // 16 KB static int block_pg_cnt = DEFAULT_BLOCK_PG_ALLOC; -static size_t current_pg_count = 0; void jl_gc_init_page(void) { @@ -35,7 +34,7 @@ void jl_gc_init_page(void) // Try to allocate a memory block for multiple pages // Return `NULL` if allocation failed. Result is aligned to `GC_PAGE_SZ`. -static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT +char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT { size_t pages_sz = GC_PAGE_SZ * pg_cnt; #ifdef _OS_WINDOWS_ @@ -65,13 +64,12 @@ static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown. // Assumes `gc_perm_lock` is acquired, the lock is released before the // exception is thrown. -static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT +char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT { - // try to allocate a large block of memory (or a small one) - unsigned pg, pg_cnt = block_pg_cnt; + unsigned pg_cnt = block_pg_cnt; char *mem = NULL; while (1) { - if (__likely((mem = jl_gc_try_alloc_pages(pg_cnt)))) + if (__likely((mem = jl_gc_try_alloc_pages_(pg_cnt)))) break; size_t min_block_pg_alloc = MIN_BLOCK_PG_ALLOC; if (GC_PAGE_SZ * min_block_pg_alloc < jl_page_size) @@ -88,204 +86,78 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT jl_throw(jl_memory_exception); } } - - // now need to insert these pages into the pagetable metadata - // if any allocation fails, this just stops recording more pages from that point - // and will free (munmap) the remainder - jl_gc_pagemeta_t *page_meta = - (jl_gc_pagemeta_t*)jl_gc_perm_alloc_nolock(pg_cnt * sizeof(jl_gc_pagemeta_t), 1, - sizeof(void*), 0); - pg = 0; - if (page_meta) { - for (; pg < pg_cnt; pg++) { - struct jl_gc_metadata_ext info; - uint32_t msk; - unsigned i; - pagetable1_t **ppagetable1; - pagetable0_t **ppagetable0; - jl_gc_pagemeta_t **pmeta; - - char *ptr = mem + (GC_PAGE_SZ * pg); - page_meta[pg].data = ptr; - - // create & store the level 2 / outermost info - i = REGION_INDEX(ptr); - info.pagetable_i = i % 32; - info.pagetable_i32 = i / 32; - msk = (1u << info.pagetable_i); - if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) - memory_map.freemap1[info.pagetable_i32] |= msk; // has free - info.pagetable1 = *(ppagetable1 = &memory_map.meta1[i]); - if (!info.pagetable1) { - info.pagetable1 = (pagetable1_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable1_t), 1, - sizeof(void*), 0); - *ppagetable1 = info.pagetable1; - if (!info.pagetable1) - break; - } - - // create & store the level 1 info - i = REGION1_INDEX(ptr); - info.pagetable1_i = i % 32; - info.pagetable1_i32 = i / 32; - msk = (1u << info.pagetable1_i); - if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0) - info.pagetable1->freemap0[info.pagetable1_i32] |= msk; // has free - info.pagetable0 = *(ppagetable0 = &info.pagetable1->meta0[i]); - if (!info.pagetable0) { - info.pagetable0 = (pagetable0_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable0_t), 1, - sizeof(void*), 0); - *ppagetable0 = info.pagetable0; - if (!info.pagetable0) - break; - } - - // create & store the level 0 / page info - i = REGION0_INDEX(ptr); - info.pagetable0_i = i % 32; - info.pagetable0_i32 = i / 32; - msk = (1u << info.pagetable0_i); - info.pagetable0->freemap[info.pagetable0_i32] |= msk; // is free - pmeta = &info.pagetable0->meta[i]; - info.meta = (*pmeta = &page_meta[pg]); - } - } - - if (pg < pg_cnt) { -#ifndef _OS_WINDOWS_ - // Trim the allocation to only cover the region - // that we successfully created the metadata for. - // This is not supported by the Windows kernel, - // so we have to just skip it there and just lose these virtual addresses. - munmap(mem + LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size), - GC_PAGE_SZ * pg_cnt - LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size)); -#endif - if (pg == 0) { - uv_mutex_unlock(&gc_perm_lock); - jl_throw(jl_memory_exception); - } - } - return page_meta; + return mem; } // get a new page, either from the freemap // or from the kernel if none are available NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT { - struct jl_gc_metadata_ext info; - uv_mutex_lock(&gc_perm_lock); - int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); #endif - // scan over memory_map page-table for existing allocated but unused pages - for (info.pagetable_i32 = memory_map.lb; info.pagetable_i32 < (REGION2_PG_COUNT + 31) / 32; info.pagetable_i32++) { - uint32_t freemap1 = memory_map.freemap1[info.pagetable_i32]; - for (info.pagetable_i = 0; freemap1; info.pagetable_i++, freemap1 >>= 1) { - unsigned next = ffs_u32(freemap1); - info.pagetable_i += next; - freemap1 >>= next; - info.pagetable1 = memory_map.meta1[info.pagetable_i + info.pagetable_i32 * 32]; - // repeat over page-table level 1 - for (info.pagetable1_i32 = info.pagetable1->lb; info.pagetable1_i32 < REGION1_PG_COUNT / 32; info.pagetable1_i32++) { - uint32_t freemap0 = info.pagetable1->freemap0[info.pagetable1_i32]; - for (info.pagetable1_i = 0; freemap0; info.pagetable1_i++, freemap0 >>= 1) { - unsigned next = ffs_u32(freemap0); - info.pagetable1_i += next; - freemap0 >>= next; - info.pagetable0 = info.pagetable1->meta0[info.pagetable1_i + info.pagetable1_i32 * 32]; - // repeat over page-table level 0 - for (info.pagetable0_i32 = info.pagetable0->lb; info.pagetable0_i32 < REGION0_PG_COUNT / 32; info.pagetable0_i32++) { - uint32_t freemap = info.pagetable0->freemap[info.pagetable0_i32]; - if (freemap) { - info.pagetable0_i = ffs_u32(freemap); - info.meta = info.pagetable0->meta[info.pagetable0_i + info.pagetable0_i32 * 32]; - assert(info.meta->data); - // new pages available starting at min of lb and pagetable_i32 - if (memory_map.lb < info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb < info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb < info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; - goto have_free_page; // break out of all of these loops - } - } - info.pagetable1->freemap0[info.pagetable1_i32] &= ~(uint32_t)(1u << info.pagetable1_i); // record that this was full - } - } - memory_map.freemap1[info.pagetable_i32] &= ~(uint32_t)(1u << info.pagetable_i); // record that this was full - } + jl_gc_pagemeta_t *meta = NULL; + + // try to get page from `pool_lazily_freed` + meta = pop_lf_page_metadata_back(&global_page_pool_lazily_freed); + if (meta != NULL) { + gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); + // page is already mapped + return meta; } - // no existing pages found, allocate a new one - { - jl_gc_pagemeta_t *meta = jl_gc_alloc_new_page(); - info = page_metadata_ext(meta->data); - assert(meta == info.meta); - // new pages are now available starting at max of lb and pagetable_i32 - if (memory_map.lb > info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb > info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb > info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; + // try to get page from `pool_clean` + meta = pop_lf_page_metadata_back(&global_page_pool_clean); + if (meta != NULL) { + gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); + goto exit; } -have_free_page: - // in-use pages are now ending at min of ub and pagetable_i32 - if (memory_map.ub < info.pagetable_i32) - memory_map.ub = info.pagetable_i32; - if (info.pagetable1->ub < info.pagetable1_i32) - info.pagetable1->ub = info.pagetable1_i32; - if (info.pagetable0->ub < info.pagetable0_i32) - info.pagetable0->ub = info.pagetable0_i32; - - // mark this entry as in-use and not free - info.pagetable0->freemap[info.pagetable0_i32] &= ~(uint32_t)(1u << info.pagetable0_i); - info.pagetable0->allocmap[info.pagetable0_i32] |= (uint32_t)(1u << info.pagetable0_i); - info.pagetable1->allocmap0[info.pagetable1_i32] |= (uint32_t)(1u << info.pagetable1_i); - memory_map.allocmap1[info.pagetable_i32] |= (uint32_t)(1u << info.pagetable_i); + // try to get page from `pool_freed` + meta = pop_lf_page_metadata_back(&global_page_pool_freed); + if (meta != NULL) { + gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); + goto exit; + } + uv_mutex_lock(&gc_perm_lock); + // another thread may have allocated a large block while we were waiting... + meta = pop_lf_page_metadata_back(&global_page_pool_clean); + if (meta != NULL) { + uv_mutex_unlock(&gc_perm_lock); + gc_alloc_map_set(meta->data, 1); + goto exit; + } + // must map a new set of pages + char *data = jl_gc_try_alloc_pages(); + meta = (jl_gc_pagemeta_t*)malloc_s(block_pg_cnt * sizeof(jl_gc_pagemeta_t)); + for (int i = 0; i < block_pg_cnt; i++) { + jl_gc_pagemeta_t *pg = &meta[i]; + pg->data = data + GC_PAGE_SZ * i; + gc_alloc_map_maybe_create(pg->data); + if (i == 0) { + gc_alloc_map_set(pg->data, 1); + } + else { + push_lf_page_metadata_back(&global_page_pool_clean, pg); + } + } + uv_mutex_unlock(&gc_perm_lock); +exit: #ifdef _OS_WINDOWS_ - VirtualAlloc(info.meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); -#endif -#ifdef _OS_WINDOWS_ + VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); SetLastError(last_error); #endif errno = last_errno; - current_pg_count++; - gc_final_count_page(current_pg_count); - uv_mutex_unlock(&gc_perm_lock); - return info.meta; + return meta; } // return a page to the freemap allocator -void jl_gc_free_page(void *p) JL_NOTSAFEPOINT +void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { - // update the allocmap and freemap to indicate this contains a free entry - struct jl_gc_metadata_ext info = page_metadata_ext(p); - uint32_t msk; - msk = (uint32_t)(1u << info.pagetable0_i); - assert(!(info.pagetable0->freemap[info.pagetable0_i32] & msk)); - assert(info.pagetable0->allocmap[info.pagetable0_i32] & msk); - info.pagetable0->allocmap[info.pagetable0_i32] &= ~msk; - info.pagetable0->freemap[info.pagetable0_i32] |= msk; - - msk = (uint32_t)(1u << info.pagetable1_i); - assert(info.pagetable1->allocmap0[info.pagetable1_i32] & msk); - if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0) - info.pagetable1->freemap0[info.pagetable1_i32] |= msk; - - msk = (uint32_t)(1u << info.pagetable_i); - assert(memory_map.allocmap1[info.pagetable_i32] & msk); - if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) - memory_map.freemap1[info.pagetable_i32] |= msk; - - free(info.meta->ages); - info.meta->ages = NULL; - + void *p = pg->data; + gc_alloc_map_set((char*)p, GC_PAGE_FREED); // tell the OS we don't need these pages right now size_t decommit_size = GC_PAGE_SZ; if (GC_PAGE_SZ < jl_page_size) { @@ -295,10 +167,9 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT void *otherp = (void*)((uintptr_t)p & ~(jl_page_size - 1)); // round down to the nearest physical page p = otherp; while (n_pages--) { - struct jl_gc_metadata_ext info = page_metadata_ext(otherp); - msk = (uint32_t)(1u << info.pagetable0_i); - if (info.pagetable0->allocmap[info.pagetable0_i32] & msk) - goto no_decommit; + if (gc_alloc_map_is_set((char*)otherp)) { + return; + } otherp = (void*)((char*)otherp + GC_PAGE_SZ); } } @@ -318,20 +189,7 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT #else madvise(p, decommit_size, MADV_DONTNEED); #endif - /* TODO: Should we leave this poisoned and rather allow the GC to read poisoned pointers from - * the page when it sweeps pools? - */ msan_unpoison(p, decommit_size); - -no_decommit: - // new pages are now available starting at max of lb and pagetable_i32 - if (memory_map.lb > info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb > info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb > info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; - current_pg_count--; } #ifdef __cplusplus diff --git a/src/gc.c b/src/gc.c index 08741df919dfa..4846549af93e4 100644 --- a/src/gc.c +++ b/src/gc.c @@ -20,6 +20,8 @@ _Atomic(int) gc_master_tid; // Mutex/cond used to synchronize sleep/wakeup of GC threads uv_mutex_t gc_threads_lock; uv_cond_t gc_threads_cond; +// To indicate whether concurrent sweeping should run +uv_sem_t gc_sweep_assists_needed; // Linked list of callback functions @@ -346,7 +348,7 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) { JL_TIMING(GC, GC_Stop); #ifdef USE_TRACY - TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx); + TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx; TracyCZoneColor(ctx, 0x696969); #endif assert(gc_n_threads); @@ -469,7 +471,7 @@ static int mark_reset_age = 0; * * <-[(quick)sweep]- * | - * ----> GC_OLD <--[(quick)sweep && age>promotion]-- + * ----> GC_OLD <--[(quick)sweep]------------------- * | | | * | | GC_MARKED (in remset) | * | | ^ | | @@ -486,9 +488,9 @@ static int mark_reset_age = 0; * ========= above this line objects are old ========= | * | * ----[new]------> GC_CLEAN ------[mark]-----------> GC_MARKED - * | ^ | - * <-[(quick)sweep]--- | | - * --[(quick)sweep && age<=promotion]--- + * | + * <-[(quick)sweep]--- + * */ // A quick sweep is a sweep where `!sweep_full` @@ -502,19 +504,10 @@ static int mark_reset_age = 0; // When a write barrier triggers, the offending marked object is both queued, // so as not to trigger the barrier again, and put in the remset. - -#define PROMOTE_AGE 1 -// this cannot be increased as is without changing : -// - sweep_page which is specialized for 1bit age -// - the size of the age storage in jl_gc_pagemeta_t - - static int64_t scanned_bytes; // young bytes scanned while marking static int64_t perm_scanned_bytes; // old bytes scanned while marking int prev_sweep_full = 1; -#define inc_sat(v,s) v = (v) >= s ? s : (v)+1 - // Full collection heuristics extern int64_t live_bytes; static int64_t promoted_bytes = 0; @@ -583,7 +576,7 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr, FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { assert(gc_marked(mark_mode)); - uintptr_t tag = o->header; + uintptr_t tag = jl_atomic_load_relaxed((_Atomic(uintptr_t)*)&o->header); if (gc_marked(tag)) return 0; if (mark_reset_age) { @@ -597,9 +590,9 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N tag = tag | mark_mode; assert((tag & 0x3) == mark_mode); } - tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag); - verify_val(jl_valueof(o)); - return !gc_marked(tag); + jl_atomic_store_relaxed((_Atomic(uintptr_t)*)&o->header, tag); //xchg here was slower than + verify_val(jl_valueof(o)); //potentially redoing work because of a stale tag. + return 1; } // This function should be called exactly once during marking for each big @@ -607,7 +600,7 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { - assert(!page_metadata(o)); + assert(!gc_alloc_map_is_set((char*)o)); bigval_t *hdr = bigval_header(o); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3; @@ -618,9 +611,8 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, // We can't easily tell if the object is old or being promoted // from the gc bits but if the `age` is `0` then the object // must be already on a young list. - if (mark_reset_age && hdr->age) { + if (mark_reset_age) { // Reset the object as if it was just allocated - hdr->age = 0; gc_queue_big_marked(ptls, hdr, 1); } } @@ -631,13 +623,11 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, // This function should be called exactly once during marking for each pool // object being marked to update the page metadata. STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, - uint8_t mark_mode, - jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT + uint8_t mark_mode, jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT { #ifdef MEMDEBUG gc_setmark_big(ptls, o, mark_mode); #else - jl_assume(page); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += page->osize; static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), ""); @@ -647,10 +637,6 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, ptls->gc_cache.scanned_bytes += page->osize; if (mark_reset_age) { page->has_young = 1; - char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET; - int obj_id = (((char*)o) - page_begin) / page->osize; - uint32_t *ages = page->ages + obj_id / 32; - jl_atomic_fetch_and_relaxed((_Atomic(uint32_t)*)ages, ~(1 << (obj_id % 32))); } } objprofile_count(jl_typeof(jl_valueof(o)), @@ -662,7 +648,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, STATIC_INLINE void gc_setmark_pool(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { - gc_setmark_pool_(ptls, o, mark_mode, page_metadata(o)); + gc_setmark_pool_(ptls, o, mark_mode, page_metadata((char*)o)); } STATIC_INLINE void gc_setmark(jl_ptls_t ptls, jl_taggedvalue_t *o, @@ -686,9 +672,9 @@ STATIC_INLINE void gc_setmark_buf_(jl_ptls_t ptls, void *o, uint8_t mark_mode, s // sure. if (__likely(gc_try_setmark_tag(buf, mark_mode)) && !gc_verifying) { if (minsz <= GC_MAX_SZCLASS) { - jl_gc_pagemeta_t *page = page_metadata(buf); - if (page) { - gc_setmark_pool_(ptls, buf, bits, page); + jl_gc_pagemeta_t *meta = page_metadata(buf); + if (meta != NULL) { + gc_setmark_pool_(ptls, buf, bits, meta); return; } } @@ -701,37 +687,6 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL gc_setmark_buf_(ptls, o, mark_mode, minsz); } -void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT -{ - jl_taggedvalue_t *o = jl_astaggedvalue(v); - jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v); - size_t dtsz = jl_datatype_size(dt); - if (o->bits.gc == GC_OLD_MARKED) - return; - o->bits.gc = GC_OLD_MARKED; - if (dt == jl_simplevector_type) { - size_t l = jl_svec_len(v); - dtsz = l * sizeof(void*) + sizeof(jl_svec_t); - } - else if (dt->name == jl_array_typename) { - jl_array_t *a = (jl_array_t*)v; - if (!a->flags.pooled) - dtsz = GC_MAX_SZCLASS + 1; - } - else if (dt == jl_module_type) { - dtsz = sizeof(jl_module_t); - } - else if (dt == jl_task_type) { - dtsz = sizeof(jl_task_t); - } - else if (dt == jl_symbol_type) { - return; - } - gc_setmark(ptls, o, GC_OLD_MARKED, dtsz); - if (dt->layout->npointers != 0) - jl_gc_queue_root(v); -} - inline void maybe_collect(jl_ptls_t ptls) { if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { @@ -827,7 +782,6 @@ inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) memset(v, 0xee, allocsz); #endif v->sz = allocsz; - v->age = 0; gc_big_object_link(v, &ptls->heap.big_objects); return jl_valueof(&v->header); } @@ -843,16 +797,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT int old_bits = bits; if (gc_marked(bits)) { pv = &v->next; - int age = v->age; - if (age >= PROMOTE_AGE || bits == GC_OLD_MARKED) { - if (sweep_full || bits == GC_MARKED) { - bits = GC_OLD; - } - } - else { - inc_sat(age, PROMOTE_AGE); - v->age = age; - bits = GC_CLEAN; + if (sweep_full || bits == GC_MARKED) { + bits = GC_OLD; } v->bits.gc = bits; } @@ -942,48 +888,41 @@ static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT } // pool allocation -STATIC_INLINE jl_taggedvalue_t *reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT +STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { assert(GC_PAGE_OFFSET >= sizeof(void*)); pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize; pg->pool_n = p - ptls2->heap.norm_pools; - memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1); jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET); - jl_taggedvalue_t *next = (jl_taggedvalue_t*)pg->data; - if (fl == NULL) { - next->next = NULL; - } - else { - // Insert free page after first page. - // This prevents unnecessary fragmentation from multiple pages - // being allocated from at the same time. Instead, objects will - // only ever be allocated from the first object in the list. - // This is specifically being relied on by the implementation - // of jl_gc_internal_obj_base_ptr() so that the function does - // not have to traverse the entire list. - jl_taggedvalue_t *flpage = (jl_taggedvalue_t *)gc_page_data(fl); - next->next = flpage->next; - flpage->next = beg; - beg = fl; - } pg->has_young = 0; pg->has_marked = 0; - pg->fl_begin_offset = -1; - pg->fl_end_offset = -1; + pg->prev_nold = 0; + pg->nold = 0; + pg->fl_begin_offset = UINT16_MAX; + pg->fl_end_offset = UINT16_MAX; return beg; } +jl_gc_global_page_pool_t global_page_pool_lazily_freed; +jl_gc_global_page_pool_t global_page_pool_clean; +jl_gc_global_page_pool_t global_page_pool_freed; +pagetable_t alloc_map; + // Add a new page to the pool. Discards any pages in `p->newpages` before. -static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT +static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT { // Do not pass in `ptls` as argument. This slows down the fast path // in pool_alloc significantly jl_ptls_t ptls = jl_current_task->ptls; - jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); + jl_gc_pagemeta_t *pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed); + if (pg == NULL) { + pg = jl_gc_alloc_page(); + } pg->osize = p->osize; - pg->ages = (uint32_t*)malloc_s(LLT_ALIGN(GC_PAGE_SZ / 8 / p->osize + 1, sizeof(uint32_t))); pg->thread_n = ptls->tid; - jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL); + set_page_metadata(pg); + push_page_metadata_back(&ptls->page_metadata_allocd, pg); + jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg); p->newpages = fl; return fl; } @@ -1013,7 +952,7 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, if (__unlikely(gc_page_data(v) != gc_page_data(next))) { // we only update pg's fields when the freelist changes page // since pg's metadata is likely not in cache - jl_gc_pagemeta_t *pg = jl_assume(page_metadata(v)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(v)); assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; @@ -1031,15 +970,12 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, if (v != NULL) { // like the freelist case, // but only update the page metadata when it is full - jl_gc_pagemeta_t *pg = jl_assume(page_metadata((char*)v - 1)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe((char*)v - 1)); assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; - v = *(jl_taggedvalue_t**)cur_page; } - // Not an else!! - if (v == NULL) - v = add_page(p); + v = gc_add_page(p); next = (jl_taggedvalue_t*)((char*)v + osize); } p->newpages = next; @@ -1052,33 +988,36 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int64_t lazy_freed_pages = 0; // Returns pointer to terminal pointer of list rooted at *pfl. -static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT +static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allocd, + jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT { char *data = pg->data; - uint32_t *ages = pg->ages; jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET); - char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize; + char *lim = data + GC_PAGE_SZ - osize; + char *lim_newpages = data + GC_PAGE_SZ; + if (gc_page_data((char*)p->newpages - 1) == data) { + lim_newpages = (char*)p->newpages; + } size_t old_nfree = pg->nfree; size_t nfree; + int re_use_page = 1; + int freed_lazily = 0; int freedall = 1; int pg_skpd = 1; if (!pg->has_marked) { + re_use_page = 0; + #ifdef _P64 // TODO: re-enable on `_P32`? // lazy version: (empty) if the whole page was already unused, free it (return it to the pool) // eager version: (freedall) free page as soon as possible // the eager one uses less memory. // FIXME - need to do accounting on a per-thread basis // on quick sweeps, keep a few pages empty but allocated for performance if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) { - jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n]; - jl_taggedvalue_t *begin = reset_page(ptls2, p, pg, p->newpages); - p->newpages = begin; - begin->next = (jl_taggedvalue_t*)0; lazy_freed_pages++; + freed_lazily = 1; } - else { - jl_gc_free_page(data); - } + #endif nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize; goto done; } @@ -1106,47 +1045,25 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t int16_t prev_nold = 0; int pg_nfree = 0; jl_taggedvalue_t **pfl_begin = NULL; - uint32_t msk = 1; // mask for the age bit in the current age byte - uint32_t age = *ages; while ((char*)v <= lim) { - if (!msk) { - msk = 1; - *ages = age; - ages++; - age = *ages; - } int bits = v->bits.gc; - if (!gc_marked(bits)) { + // if an object is past `lim_newpages` then we can guarantee it's garbage + if (!gc_marked(bits) || (char*)v >= lim_newpages) { *pfl = v; pfl = &v->next; - pfl_begin = pfl_begin ? pfl_begin : pfl; + pfl_begin = (pfl_begin != NULL) ? pfl_begin : pfl; pg_nfree++; - age &= ~msk; } else { // marked young or old - if (age & msk || bits == GC_OLD_MARKED) { // old enough - // `!age && bits == GC_OLD_MARKED` is possible for - // non-first-class objects like array buffers - // (they may get promoted by jl_gc_wb_buf for example, - // or explicitly by jl_gc_force_mark_old) - if (sweep_full || bits == GC_MARKED) { - bits = v->bits.gc = GC_OLD; // promote - } - prev_nold++; - } - else { - assert(bits == GC_MARKED); - bits = v->bits.gc = GC_CLEAN; // unmark - has_young = 1; + if (sweep_full || bits == GC_MARKED) { // old enough + bits = v->bits.gc = GC_OLD; // promote } + prev_nold++; has_marked |= gc_marked(bits); - age |= msk; freedall = 0; } v = (jl_taggedvalue_t*)((char*)v + osize); - msk <<= 1; } - *ages = age; assert(!freedall); pg->has_marked = has_marked; pg->has_young = has_young; @@ -1155,8 +1072,8 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t pg->fl_end_offset = (char*)pfl - data; } else { - pg->fl_begin_offset = -1; - pg->fl_end_offset = -1; + pg->fl_begin_offset = UINT16_MAX; + pg->fl_end_offset = UINT16_MAX; } pg->nfree = pg_nfree; @@ -1168,97 +1085,42 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t nfree = pg->nfree; done: + if (re_use_page) { + push_page_metadata_back(allocd, pg); + } + else if (freed_lazily) { + push_page_metadata_back(lazily_freed, pg); + } + else { + #ifdef _P64 // only enable concurrent sweeping on 64bit + if (jl_n_sweepthreads == 0) { + jl_gc_free_page(pg); + push_lf_page_metadata_back(&global_page_pool_freed, pg); + } + else { + gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED); + push_lf_page_metadata_back(&global_page_pool_lazily_freed, pg); + } + #else + jl_gc_free_page(pg); + push_lf_page_metadata_back(&global_page_pool_freed, pg); + #endif + } gc_time_count_page(freedall, pg_skpd); gc_num.freed += (nfree - old_nfree) * osize; return pfl; } // the actual sweeping over all allocated pages in a memory pool -STATIC_INLINE void sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT +STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **allocd, + jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT { int p_n = pg->pool_n; int t_n = pg->thread_n; jl_ptls_t ptls2 = gc_all_tls_states[t_n]; jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n]; int osize = pg->osize; - pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); -} - -// sweep over a pagetable0 for all allocated pages -STATIC_INLINE int sweep_pool_pagetable0(jl_taggedvalue_t ***pfl, pagetable0_t *pagetable0, int sweep_full) JL_NOTSAFEPOINT -{ - unsigned ub = 0; - unsigned alloc = 0; - for (unsigned pg_i = 0; pg_i <= pagetable0->ub; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - unsigned j; - if (!line) - continue; - ub = pg_i; - alloc = 1; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - jl_gc_pagemeta_t *pg = pagetable0->meta[pg_i * 32 + j]; - sweep_pool_page(pfl, pg, sweep_full); - } - } - pagetable0->ub = ub; - return alloc; -} - -// sweep over pagetable1 for all pagetable0 that may contain allocated pages -STATIC_INLINE int sweep_pool_pagetable1(jl_taggedvalue_t ***pfl, pagetable1_t *pagetable1, int sweep_full) JL_NOTSAFEPOINT -{ - unsigned ub = 0; - unsigned alloc = 0; - for (unsigned pg_i = 0; pg_i <= pagetable1->ub; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - unsigned j; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - pagetable0_t *pagetable0 = pagetable1->meta0[pg_i * 32 + j]; - if (pagetable0 && !sweep_pool_pagetable0(pfl, pagetable0, sweep_full)) - pagetable1->allocmap0[pg_i] &= ~(1 << j); // no allocations found, remember that for next time - } - if (pagetable1->allocmap0[pg_i]) { - ub = pg_i; - alloc = 1; - } - } - pagetable1->ub = ub; - return alloc; -} - -// sweep over all memory for all pagetable1 that may contain allocated pages -static void sweep_pool_pagetable(jl_taggedvalue_t ***pfl, int sweep_full) JL_NOTSAFEPOINT -{ - if (REGION2_PG_COUNT == 1) { // compile-time optimization - pagetable1_t *pagetable1 = memory_map.meta1[0]; - if (pagetable1 != NULL) - sweep_pool_pagetable1(pfl, pagetable1, sweep_full); - return; - } - unsigned ub = 0; - for (unsigned pg_i = 0; pg_i <= memory_map.ub; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - unsigned j; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - pagetable1_t *pagetable1 = memory_map.meta1[pg_i * 32 + j]; - if (pagetable1 && !sweep_pool_pagetable1(pfl, pagetable1, sweep_full)) - memory_map.allocmap1[pg_i] &= ~(1 << j); // no allocations found, remember that for next time - } - if (memory_map.allocmap1[pg_i]) { - ub = pg_i; - } - } - memory_map.ub = ub; + pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, allocd, lazily_freed, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); } // sweep over all memory that is being used and not in a pool @@ -1270,7 +1132,7 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT { - assert(pg->fl_begin_offset != (uint16_t)-1); + assert(pg->fl_begin_offset != UINT16_MAX); char *cur_pg = gc_page_data(last); // Fast path for page that has no allocation jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset); @@ -1311,8 +1173,8 @@ static void gc_sweep_pool(int sweep_full) for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; - if (last) { - jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last)); + if (last != NULL) { + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last)); gc_pool_sync_nfree(pg, last); pg->has_young = 1; } @@ -1320,19 +1182,37 @@ static void gc_sweep_pool(int sweep_full) pfl[t_i * JL_GC_N_POOLS + i] = &p->freelist; last = p->newpages; - if (last) { + if (last != NULL) { char *last_p = (char*)last; - jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last_p - 1)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last_p - 1)); assert(last_p - gc_page_data(last_p - 1) >= GC_PAGE_OFFSET); pg->nfree = (GC_PAGE_SZ - (last_p - gc_page_data(last_p - 1))) / p->osize; pg->has_young = 1; } p->newpages = NULL; } + jl_gc_pagemeta_t *pg = ptls2->page_metadata_lazily_freed; + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + lazy_freed_pages++; + pg = pg2; + } } // the actual sweeping - sweep_pool_pagetable(pfl, sweep_full); + for (int t_i = 0; t_i < n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 != NULL) { + jl_gc_pagemeta_t *allocd = NULL; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + gc_sweep_pool_page(pfl, &allocd, &ptls2->page_metadata_lazily_freed, pg, sweep_full); + pg = pg2; + } + ptls2->page_metadata_allocd = allocd; + } + } // null out terminal pointers of free lists for (int t_i = 0; t_i < n_threads; t_i++) { @@ -1344,6 +1224,13 @@ static void gc_sweep_pool(int sweep_full) } } +#ifdef _P64 // only enable concurrent sweeping on 64bit + // wake thread up to sweep concurrently + if (jl_n_sweepthreads > 0) { + uv_sem_post(&gc_sweep_assists_needed); + } +#endif + gc_time_pool_end(sweep_full); } @@ -1431,14 +1318,37 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset, return *(uintptr_t*)real_addr; } -JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt, - jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT +STATIC_INLINE void gc_assert_parent_validity(jl_value_t *parent, jl_value_t *child) JL_NOTSAFEPOINT { - jl_safe_printf("GC error (probable corruption) :\n"); - jl_gc_debug_print_status(); - jl_(vt); - jl_gc_debug_critical_error(); - abort(); +#ifdef GC_ASSERT_PARENT_VALIDITY + jl_taggedvalue_t *child_astagged = jl_astaggedvalue(child); + jl_taggedvalue_t *child_vtag = (jl_taggedvalue_t *)(child_astagged->header & ~(uintptr_t)0xf); + uintptr_t child_vt = (uintptr_t)child_vtag; + if (child_vt == (jl_datatype_tag << 4) || + child_vt == (jl_unionall_tag << 4) || + child_vt == (jl_uniontype_tag << 4) || + child_vt == (jl_tvar_tag << 4) || + child_vt == (jl_vararg_tag << 4)) { + // Skip, since these wouldn't hit the object assert anyway + return; + } + else if (child_vt < jl_max_tags << 4) { + // Skip, since these wouldn't hit the object assert anyway + return; + } + if (__unlikely(!jl_is_datatype((jl_datatype_t *)child_vt) || ((jl_datatype_t *)child_vt)->smalltag)) { + jl_safe_printf("GC error (probable corruption)\n"); + jl_gc_debug_print_status(); + jl_safe_printf("Parent %p\n", (void *)parent); + jl_safe_printf("of type:\n"); + jl_(jl_typeof(parent)); + jl_safe_printf("While marking child at %p\n", (void *)child); + jl_safe_printf("of type:\n"); + jl_(child_vtag); + jl_gc_debug_critical_error(); + abort(); + } +#endif } // Check if `nptr` is tagged for `old + refyoung`, @@ -1502,6 +1412,28 @@ STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEP return c; } +// Dump mark queue on critical error +JL_NORETURN NOINLINE void gc_dump_queue_and_abort(jl_ptls_t ptls, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + jl_safe_printf("GC error (probable corruption)\n"); + jl_gc_debug_print_status(); + jl_(vt); + jl_gc_debug_critical_error(); + if (jl_n_gcthreads == 0) { + jl_safe_printf("\n"); + jl_value_t *new_obj; + jl_gc_markqueue_t *mq = &ptls->mark_queue; + jl_safe_printf("thread %d ptr queue:\n", ptls->tid); + jl_safe_printf("~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n"); + while ((new_obj = gc_ptr_queue_steal_from(mq)) != NULL) { + jl_(new_obj); + jl_safe_printf("==========\n"); + } + jl_safe_printf("~~~~~~~~~~ ptr queue bottom ~~~~~~~~~~\n"); + } + abort(); +} + // Steal chunk from `mq2` STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_steal_from(jl_gc_markqueue_t *mq2) JL_NOTSAFEPOINT { @@ -1538,6 +1470,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_ if (new_obj != NULL) { verify_parent2("object", obj8_parent, slot, "field(%d)", gc_slot_to_fieldidx(obj8_parent, slot, (jl_datatype_t*)jl_typeof(obj8_parent))); + gc_assert_parent_validity((jl_value_t *)obj8_parent, new_obj); if (obj8_begin + 1 != obj8_end) { gc_try_claim_and_push(mq, new_obj, &nptr); } @@ -1569,6 +1502,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint if (new_obj != NULL) { verify_parent2("object", obj16_parent, slot, "field(%d)", gc_slot_to_fieldidx(obj16_parent, slot, (jl_datatype_t*)jl_typeof(obj16_parent))); + gc_assert_parent_validity((jl_value_t *)obj16_parent, new_obj); if (obj16_begin + 1 != obj16_end) { gc_try_claim_and_push(mq, new_obj, &nptr); } @@ -1600,6 +1534,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint if (new_obj != NULL) { verify_parent2("object", obj32_parent, slot, "field(%d)", gc_slot_to_fieldidx(obj32_parent, slot, (jl_datatype_t*)jl_typeof(obj32_parent))); + gc_assert_parent_validity((jl_value_t *)obj32_parent, new_obj); if (obj32_begin + 1 != obj32_end) { gc_try_claim_and_push(mq, new_obj, &nptr); } @@ -1665,6 +1600,7 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v if (new_obj != NULL) { verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)", gc_slot_to_arrayidx(obj_parent, obj_begin)); + gc_assert_parent_validity(obj_parent, new_obj); gc_try_claim_and_push(mq, new_obj, &nptr); gc_heap_snapshot_record_array_edge(obj_parent, &new_obj); } @@ -1738,6 +1674,7 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va if (new_obj != NULL) { verify_parent2("array", ary8_parent, &new_obj, "elem(%d)", gc_slot_to_arrayidx(ary8_parent, ary8_begin)); + gc_assert_parent_validity(ary8_parent, new_obj); gc_try_claim_and_push(mq, new_obj, &nptr); gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj); } @@ -1812,6 +1749,7 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_ if (new_obj != NULL) { verify_parent2("array", ary16_parent, &new_obj, "elem(%d)", gc_slot_to_arrayidx(ary16_parent, ary16_begin)); + gc_assert_parent_validity(ary16_parent, new_obj); gc_try_claim_and_push(mq, new_obj, &nptr); gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj); } @@ -1965,12 +1903,16 @@ STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent if (b == (jl_binding_t *)jl_nothing) continue; verify_parent1("module", mb_parent, mb_begin, "binding_buff"); + gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)b); gc_try_claim_and_push(mq, b, &nptr); } jl_value_t *bindings = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindings); + gc_assert_parent_validity((jl_value_t *)mb_parent, bindings); gc_try_claim_and_push(mq, bindings, &nptr); jl_value_t *bindingkeyset = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindingkeyset); + gc_assert_parent_validity((jl_value_t *)mb_parent, bindingkeyset); gc_try_claim_and_push(mq, bindingkeyset, &nptr); + gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->parent); gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->parent, &nptr); size_t nusings = mb_parent->usings.len; if (nusings > 0) { @@ -2000,7 +1942,7 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t * } for (; fl_begin < fl_end; fl_begin++) { new_obj = *fl_begin; - if (__unlikely(!new_obj)) + if (__unlikely(new_obj == NULL)) continue; if (gc_ptr_tag(new_obj, 1)) { new_obj = (jl_value_t *)gc_ptr_clear_tag(new_obj, 1); @@ -2192,7 +2134,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_ else { jl_datatype_t *vt = (jl_datatype_t *)vtag; if (__unlikely(!jl_is_datatype(vt) || vt->smalltag)) - gc_assert_datatype_fail(ptls, vt, mq); + gc_dump_queue_and_abort(ptls, vt); } jl_datatype_t *vt = (jl_datatype_t *)vtag; if (vt->name == jl_array_typename) { @@ -2421,8 +2363,8 @@ void gc_mark_and_steal(jl_ptls_t ptls) // of work for the mark loop steal : { // Try to steal chunk from random GC thread - for (int i = 0; i < 4 * jl_n_gcthreads; i++) { - uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_gcthreads; + for (int i = 0; i < 4 * jl_n_markthreads; i++) { + uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads; jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue; c = gc_chunkqueue_steal_from(mq2); if (c.cid != GC_empty_chunk) { @@ -2431,7 +2373,7 @@ void gc_mark_and_steal(jl_ptls_t ptls) } } // Sequentially walk GC threads to try to steal chunk - for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue; c = gc_chunkqueue_steal_from(mq2); if (c.cid != GC_empty_chunk) { @@ -2448,15 +2390,15 @@ void gc_mark_and_steal(jl_ptls_t ptls) } } // Try to steal pointer from random GC thread - for (int i = 0; i < 4 * jl_n_gcthreads; i++) { - uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_gcthreads; + for (int i = 0; i < 4 * jl_n_markthreads; i++) { + uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads; jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue; new_obj = gc_ptr_queue_steal_from(mq2); if (new_obj != NULL) goto mark; } // Sequentially walk GC threads to try to steal pointer - for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue; new_obj = gc_ptr_queue_steal_from(mq2); if (new_obj != NULL) @@ -2471,19 +2413,6 @@ void gc_mark_and_steal(jl_ptls_t ptls) } } -#define GC_BACKOFF_MIN 4 -#define GC_BACKOFF_MAX 12 - -void gc_mark_backoff(int *i) -{ - if (*i < GC_BACKOFF_MAX) { - (*i)++; - } - for (int j = 0; j < (1 << *i); j++) { - jl_cpu_pause(); - } -} - void gc_mark_loop_parallel(jl_ptls_t ptls, int master) { int backoff = GC_BACKOFF_MIN; @@ -2505,13 +2434,13 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master) } jl_atomic_fetch_add(&gc_n_threads_marking, -1); // Failed to steal - gc_mark_backoff(&backoff); + gc_backoff(&backoff); } } void gc_mark_loop(jl_ptls_t ptls) { - if (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled) { + if (jl_n_markthreads == 0 || gc_heap_snapshot_enabled) { gc_mark_loop_serial(ptls); } else { @@ -2529,7 +2458,7 @@ void gc_mark_loop_barrier(void) void gc_mark_clean_reclaim_sets(void) { - // Clean up `reclaim-sets` and reset `top/bottom` of queues + // Clean up `reclaim-sets` for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set; @@ -2812,13 +2741,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) } assert(gc_n_threads); - int single_threaded = (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled); + int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled); for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; jl_ptls_t ptls_dest = ptls; jl_gc_markqueue_t *mq_dest = mq; - if (!single_threaded) { - ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_gcthreads]; + if (!single_threaded_mark) { + ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads]; mq_dest = &ptls_dest->mark_queue; } if (ptls2 != NULL) { @@ -2970,12 +2899,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t start_sweep_time = jl_hrtime(); JL_PROBE_GC_SWEEP_BEGIN(sweep_full); { - JL_TIMING(GC, GC_Sweep); + JL_TIMING_CREATE_BLOCK(incremental_timing_block, + GC, GC_IncrementalSweep); + JL_TIMING_CREATE_BLOCK(full_timing_block, + GC, GC_FullSweep); + jl_timing_block_start(sweep_full ? &full_timing_block : &incremental_timing_block); #ifdef USE_TRACY - if (sweep_full) { - TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx); - TracyCZoneColor(ctx, 0xFFA500); - } + TracyCZoneColor(full_timing_block.tracy_ctx, 0xFFA500); #endif sweep_weak_refs(); sweep_stack_pools(); @@ -2994,6 +2924,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t sweep_time = gc_end_time - start_sweep_time; gc_num.total_sweep_time += sweep_time; gc_num.sweep_time = sweep_time; + if (sweep_full) { + gc_num.last_full_sweep = gc_end_time; + } // sweeping is over // 7. if it is a quick sweep, put back the remembered objects in queued state @@ -3099,7 +3032,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); @@ -3110,16 +3043,15 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); - // `jl_safepoint_start_gc()` makes sure only one thread can - // run the GC. + // `jl_safepoint_start_gc()` makes sure only one thread can run the GC. uint64_t t0 = jl_hrtime(); if (!jl_safepoint_start_gc()) { - // Multithread only. See assertion in `safepoint.c` + // either another thread is running GC, or the GC got disabled just now. jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING); return; } - JL_TIMING_SUSPEND(GC, ct); + JL_TIMING_SUSPEND_TASK(GC, ct); JL_TIMING(GC, GC); int last_errno = errno; @@ -3151,7 +3083,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) gc_invoke_callbacks(jl_gc_cb_pre_gc_t, gc_cblist_pre_gc, (collection)); - if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) { JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock #ifndef __clang_gcanalyzer__ if (_jl_gc_collect(ptls, collection)) { @@ -3253,13 +3185,13 @@ void jl_deinit_thread_heap(jl_ptls_t ptls) // System-wide initializations void jl_gc_init(void) { - JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&gc_cache_lock); uv_mutex_init(&gc_perm_lock); uv_mutex_init(&gc_threads_lock); uv_cond_init(&gc_threads_cond); + uv_sem_init(&gc_sweep_assists_needed, 0); jl_gc_init_page(); jl_gc_debug_init(); @@ -3386,7 +3318,6 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) // old pointer. bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0); newbig->sz = allocsz; - newbig->age = 0; gc_big_object_link(newbig, &ptls->heap.big_objects); jl_value_t *snew = jl_valueof(&newbig->header); *(size_t*)snew = sz; @@ -3421,7 +3352,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) { p = (char *) p - 1; jl_gc_pagemeta_t *meta = page_metadata(p); - if (meta && meta->ages) { + if (meta != NULL) { char *page = gc_page_data(p); // offset within page. size_t off = (char *)p - page; @@ -3430,6 +3361,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) // offset within object size_t off2 = (off - GC_PAGE_OFFSET); size_t osize = meta->osize; + if (osize == 0) + return NULL; off2 %= osize; if (off - off2 + osize > GC_PAGE_SZ) return NULL; @@ -3456,7 +3389,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) char *data = gc_page_data(newpages); if (data != meta->data) { // Pages on newpages form a linked list where only the - // first one is allocated from (see reset_page()). + // first one is allocated from (see gc_reset_page()). // All other pages are empty. return NULL; } @@ -3484,7 +3417,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) // entries and 1 for live objects. The above subcases arise // because allocating a cell will not update the age bit, so we // need extra logic for pages that have been allocated from. - unsigned obj_id = (off - off2) / osize; // We now distinguish between the second and third subcase. // Freelist entries are consumed in ascending order. Anything // before the freelist pointer was either live during the last @@ -3492,11 +3424,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) if (gc_page_data(cell) == gc_page_data(pool->freelist) && (char *)cell < (char *)pool->freelist) goto valid_object; - // We know now that the age bit reflects liveness status during - // the last sweep and that the cell has not been reused since. - if (!(meta->ages[obj_id / 32] & (1 << (obj_id % 32)))) { - return NULL; - } // Not a freelist entry, therefore a valid object. valid_object: // We have to treat objects with type `jl_buff_tag` differently, diff --git a/src/gc.h b/src/gc.h index 9fa780c24c30f..0630a039f2b94 100644 --- a/src/gc.h +++ b/src/gc.h @@ -113,6 +113,7 @@ typedef struct { uint64_t mark_time; uint64_t total_sweep_time; uint64_t total_mark_time; + uint64_t last_full_sweep; } jl_gc_num_t; extern jl_gc_num_t gc_num; @@ -130,10 +131,7 @@ extern void reset_thread_gc_counts(void); JL_EXTENSION typedef struct _bigval_t { struct _bigval_t *next; struct _bigval_t **prev; // pointer to the next field of the prev entry - union { - size_t sz; - uintptr_t age : 2; - }; + size_t sz; #ifdef _P64 // Add padding so that the value is 64-byte aligned // (8 pointers of 8 bytes each) - (4 other pointers in struct) void *_padding[8 - 4]; @@ -265,7 +263,8 @@ typedef struct _jl_gc_chunk_t { #define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14) // initial size of chunk-queue // pool page metadata -typedef struct { +typedef struct _jl_gc_pagemeta_t { + struct _jl_gc_pagemeta_t *next; // index of pool that owns this page uint8_t pool_n; // Whether any cell in the page is marked @@ -292,36 +291,63 @@ typedef struct { // number of free objects in this page. // invalid if pool that owns this page is allocating objects from this page. uint16_t nfree; - uint16_t osize; // size of each object in this page + uint16_t osize; // size of each object in this page uint16_t fl_begin_offset; // offset of first free object in this page uint16_t fl_end_offset; // offset of last free object in this page uint16_t thread_n; // thread id of the heap that owns this page char *data; - uint32_t *ages; } jl_gc_pagemeta_t; -// Page layout: -// Newpage freelist: sizeof(void*) -// Padding: GC_PAGE_OFFSET - sizeof(void*) -// Blocks: osize * n -// Tag: sizeof(jl_taggedvalue_t) -// Data: <= osize - sizeof(jl_taggedvalue_t) +typedef struct { + _Atomic(jl_gc_pagemeta_t *) page_metadata_back; +} jl_gc_global_page_pool_t; + +extern jl_gc_global_page_pool_t global_page_pool_lazily_freed; +extern jl_gc_global_page_pool_t global_page_pool_clean; +extern jl_gc_global_page_pool_t global_page_pool_freed; + +#define GC_BACKOFF_MIN 4 +#define GC_BACKOFF_MAX 12 + +STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT +{ + if (*i < GC_BACKOFF_MAX) { + (*i)++; + } + for (int j = 0; j < (1 << *i); j++) { + jl_cpu_pause(); + } +} + +// Lock-free stack implementation taken +// from Herlihy's "The Art of Multiprocessor Programming" + +STATIC_INLINE void push_lf_page_metadata_back(jl_gc_global_page_pool_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT +{ + while (1) { + jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back); + elt->next = old_back; + if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, elt)) { + break; + } + jl_cpu_pause(); + } +} + +STATIC_INLINE jl_gc_pagemeta_t *pop_lf_page_metadata_back(jl_gc_global_page_pool_t *pool) JL_NOTSAFEPOINT +{ + while (1) { + jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back); + if (old_back == NULL) { + return NULL; + } + if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, old_back->next)) { + return old_back; + } + jl_cpu_pause(); + } +} -// Memory map: -// The complete address space is divided up into a multi-level page table. -// The three levels have similar but slightly different structures: -// - pagetable0_t: the bottom/leaf level (covers the contiguous addresses) -// - pagetable1_t: the middle level -// - pagetable2_t: the top/leaf level (covers the entire virtual address space) -// Corresponding to these similar structures is a large amount of repetitive -// code that is nearly the same but not identical. It could be made less -// repetitive with C macros, but only at the cost of debuggability. The specialized -// structure of this representation allows us to partially unroll and optimize -// various conditions at each level. - -// The following constants define the branching factors at each level. -// The constants and GC_PAGE_LG2 must therefore sum to sizeof(void*). -// They should all be multiples of 32 (sizeof(uint32_t)) except that REGION2_PG_COUNT may also be 1. #ifdef _P64 #define REGION0_PG_COUNT (1 << 16) #define REGION1_PG_COUNT (1 << 16) @@ -340,35 +366,117 @@ typedef struct { // define the representation of the levels of the page-table (0 to 2) typedef struct { - jl_gc_pagemeta_t *meta[REGION0_PG_COUNT]; - uint32_t allocmap[REGION0_PG_COUNT / 32]; - uint32_t freemap[REGION0_PG_COUNT / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; + uint8_t meta[REGION0_PG_COUNT]; } pagetable0_t; typedef struct { pagetable0_t *meta0[REGION1_PG_COUNT]; - uint32_t allocmap0[REGION1_PG_COUNT / 32]; - uint32_t freemap0[REGION1_PG_COUNT / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; } pagetable1_t; typedef struct { pagetable1_t *meta1[REGION2_PG_COUNT]; - uint32_t allocmap1[(REGION2_PG_COUNT + 31) / 32]; - uint32_t freemap1[(REGION2_PG_COUNT + 31) / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; } pagetable_t; +#define GC_PAGE_UNMAPPED 0 +#define GC_PAGE_ALLOCATED 1 +#define GC_PAGE_LAZILY_FREED 2 +#define GC_PAGE_FREED 3 + +extern pagetable_t alloc_map; + +STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT +{ + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + pagetable1_t *r1 = alloc_map.meta1[i]; + if (r1 == NULL) + return 0; + i = REGION1_INDEX(data); + pagetable0_t *r0 = r1->meta0[i]; + if (r0 == NULL) + return 0; + i = REGION0_INDEX(data); + return (r0->meta[i] == GC_PAGE_ALLOCATED); +} + +STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT +{ + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + pagetable1_t *r1 = alloc_map.meta1[i]; + assert(r1 != NULL); + i = REGION1_INDEX(data); + pagetable0_t *r0 = r1->meta0[i]; + assert(r0 != NULL); + i = REGION0_INDEX(data); + r0->meta[i] = v; +} + +STATIC_INLINE void gc_alloc_map_maybe_create(char *_data) JL_NOTSAFEPOINT +{ + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + pagetable1_t *r1 = alloc_map.meta1[i]; + if (r1 == NULL) { + r1 = (pagetable1_t*)calloc_s(sizeof(pagetable1_t)); + alloc_map.meta1[i] = r1; + } + i = REGION1_INDEX(data); + pagetable0_t *r0 = r1->meta0[i]; + if (r0 == NULL) { + r0 = (pagetable0_t*)calloc_s(sizeof(pagetable0_t)); + r1->meta0[i] = r0; + } +} + +// Page layout: +// Metadata pointer: sizeof(jl_gc_pagemeta_t*) +// Padding: GC_PAGE_OFFSET - sizeof(jl_gc_pagemeta_t*) +// Blocks: osize * n +// Tag: sizeof(jl_taggedvalue_t) +// Data: <= osize - sizeof(jl_taggedvalue_t) + +STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT +{ + return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2); +} + +STATIC_INLINE jl_gc_pagemeta_t *page_metadata_unsafe(void *_data) JL_NOTSAFEPOINT +{ + return *(jl_gc_pagemeta_t**)(gc_page_data(_data)); +} + +STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT +{ + if (!gc_alloc_map_is_set((char*)_data)) { + return NULL; + } + return page_metadata_unsafe(_data); +} + +STATIC_INLINE void set_page_metadata(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT +{ + *(jl_gc_pagemeta_t**)(pg->data) = pg; +} + +STATIC_INLINE void push_page_metadata_back(jl_gc_pagemeta_t **ppg, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT +{ + elt->next = *ppg; + *ppg = elt; +} + +STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) JL_NOTSAFEPOINT +{ + jl_gc_pagemeta_t *v = *ppg; + if (*ppg != NULL) { + *ppg = (*ppg)->next; + } + return v; +} + #ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */ unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT; #else @@ -390,12 +498,6 @@ STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT return container_of(o, bigval_t, header); } -// round an address inside a gcpage's data to its beginning -STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT -{ - return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2); -} - STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT { return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset); @@ -418,52 +520,6 @@ STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT NOINLINE uintptr_t gc_get_stack_ptr(void); -STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT -{ - uintptr_t data = ((uintptr_t)_data); - unsigned i; - i = REGION_INDEX(data); - pagetable1_t *r1 = memory_map.meta1[i]; - if (!r1) - return NULL; - i = REGION1_INDEX(data); - pagetable0_t *r0 = r1->meta0[i]; - if (!r0) - return NULL; - i = REGION0_INDEX(data); - return r0->meta[i]; -} - -struct jl_gc_metadata_ext { - pagetable1_t *pagetable1; - pagetable0_t *pagetable0; - jl_gc_pagemeta_t *meta; - unsigned pagetable_i32, pagetable_i; - unsigned pagetable1_i32, pagetable1_i; - unsigned pagetable0_i32, pagetable0_i; -}; - -STATIC_INLINE struct jl_gc_metadata_ext page_metadata_ext(void *_data) JL_NOTSAFEPOINT -{ - uintptr_t data = (uintptr_t)_data; - struct jl_gc_metadata_ext info; - unsigned i; - i = REGION_INDEX(data); - info.pagetable_i = i % 32; - info.pagetable_i32 = i / 32; - info.pagetable1 = memory_map.meta1[i]; - i = REGION1_INDEX(data); - info.pagetable1_i = i % 32; - info.pagetable1_i32 = i / 32; - info.pagetable0 = info.pagetable1->meta0[i]; - i = REGION0_INDEX(data); - info.pagetable0_i = i % 32; - info.pagetable0_i32 = i / 32; - info.meta = info.pagetable0->meta[i]; - assert(info.meta); - return info; -} - STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT { *hdr->prev = hdr->next; @@ -483,12 +539,11 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE extern uv_mutex_t gc_threads_lock; extern uv_cond_t gc_threads_cond; +extern uv_sem_t gc_sweep_assists_needed; extern _Atomic(int) gc_n_threads_marking; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); -void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, - jl_value_t **fl_end) JL_NOTSAFEPOINT; -void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, - size_t start) JL_NOTSAFEPOINT; +void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT; +void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT; void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq); void gc_mark_loop_serial(jl_ptls_t ptls); void gc_mark_loop_parallel(jl_ptls_t ptls, int master); @@ -497,9 +552,9 @@ void jl_gc_debug_init(void); // GC pages -void jl_gc_init_page(void); +void jl_gc_init_page(void) JL_NOTSAFEPOINT; NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT; -void jl_gc_free_page(void *p) JL_NOTSAFEPOINT; +void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT; // GC debug diff --git a/src/gf.c b/src/gf.c index 431443dbbf451..294e1fccb8783 100644 --- a/src/gf.c +++ b/src/gf.c @@ -367,7 +367,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) fargs[1] = (jl_value_t*)mi; fargs[2] = jl_box_ulong(world); - jl_timing_show_method_instance(mi, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK); #ifdef TRACE_INFERENCE if (mi->specTypes != (jl_value_t*)jl_emptytuple_type) { jl_printf(JL_STDERR,"inference on "); @@ -735,7 +735,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams) vm = T_has_tv ? jl_type_unionall(v, T) : T; if (N_has_tv) N = NULL; - vm = (jl_value_t*)jl_wrap_vararg(vm, N); // this cannot throw for these inputs + vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1); // this cannot throw for these inputs } sp++; decl = ((jl_unionall_t*)decl)->body; @@ -984,7 +984,7 @@ static void jl_compilation_sig( // avoid Vararg{Type{Type{...}}} if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i))) type_i = (jl_value_t*)jl_type_type; - type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL); // this cannot throw for these inputs + type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1); // this cannot throw for these inputs } else { type_i = inst_varargp_in_env(decl, sparams); @@ -1986,7 +1986,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method JL_TIMING(ADD_METHOD, ADD_METHOD); assert(jl_is_method(method)); assert(jl_is_mtable(mt)); - jl_timing_show_method(method, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK); jl_value_t *type = method->sig; jl_value_t *oldvalue = NULL; jl_array_t *oldmi = NULL; @@ -2392,7 +2392,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) { jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1); jl_atomic_store_release(&codeinst->invoke, invoke); - jl_atomic_store_release(&codeinst->specsigflags, specsigflags); + // unspec is probably not specsig, but might be using specptr + jl_atomic_store_release(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag } else { // someone else already compiled it while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) { @@ -2565,7 +2566,8 @@ JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst) return -1; } -JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m) +JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m, + int return_if_compileable) { jl_tupletype_t *tt = NULL; jl_svec_t *newparams = NULL; @@ -2589,7 +2591,7 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t if (!is_compileable) is_compileable = jl_isa_compileable_sig(tt, env, m); JL_GC_POP(); - return is_compileable ? (jl_value_t*)tt : jl_nothing; + return (!return_if_compileable || is_compileable) ? (jl_value_t*)tt : jl_nothing; } jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT) @@ -2600,7 +2602,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_ jl_methtable_t *mt = jl_method_get_table(def); if ((jl_value_t*)mt == jl_nothing) return mi; - jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def); + jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def, 1); if (compilationsig == jl_nothing || jl_egal(compilationsig, mi->specTypes)) return mi; jl_svec_t *env = NULL; @@ -2633,7 +2635,7 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor JL_UNLOCK(&mt->writelock); } else { - jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m); + jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m, 1); if (tt != jl_nothing) { JL_GC_PUSH2(&tt, &env); if (!jl_egal(tt, (jl_value_t*)ti)) { @@ -3575,9 +3577,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int intersections, size_t world, int cache_result, size_t *min_valid, size_t *max_valid, int *ambig) { - JL_TIMING(METHOD_MATCH, METHOD_MATCH); if (world > jl_atomic_load_acquire(&jl_world_counter)) return jl_nothing; // the future is not enumerable + JL_TIMING(METHOD_MATCH, METHOD_MATCH); int has_ambiguity = 0; jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)type); assert(jl_is_datatype(unw)); diff --git a/src/init.c b/src/init.c index 9c18a60eb8b06..52f4740ccc306 100644 --- a/src/init.c +++ b/src/init.c @@ -875,7 +875,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ if (jl_base_module == NULL) { // nthreads > 1 requires code in Base jl_atomic_store_relaxed(&jl_n_threads, 1); + jl_n_markthreads = 0; + jl_n_sweepthreads = 0; jl_n_gcthreads = 0; + jl_n_threads_per_pool[0] = 1; + jl_n_threads_per_pool[1] = 0; } jl_start_threads(); diff --git a/src/interpreter.c b/src/interpreter.c index c08496f72ce04..2ad56e76b2549 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -349,20 +349,34 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_ { size_t from = s->ip; size_t ip = to; - unsigned nphi = 0; + unsigned nphiblockstmts = 0; for (ip = to; ip < ns; ip++) { jl_value_t *e = jl_array_ptr_ref(stmts, ip); - if (!jl_is_phinode(e)) - break; - nphi += 1; + if (!jl_is_phinode(e)) { + if (jl_is_expr(e) || jl_is_returnnode(e) || jl_is_gotoifnot(e) || + jl_is_gotonode(e) || jl_is_phicnode(e) || jl_is_upsilonnode(e) || + jl_is_ssavalue(e)) { + break; + } + // Everything else is allowed in the phi-block for implementation + // convenience - fall through. + } + nphiblockstmts += 1; } - if (nphi) { + if (nphiblockstmts) { jl_value_t **dest = &s->locals[jl_source_nslots(s->src) + to]; - jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphi); - JL_GC_PUSHARGS(phis, nphi); - for (unsigned i = 0; i < nphi; i++) { + jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphiblockstmts); + JL_GC_PUSHARGS(phis, nphiblockstmts); + for (unsigned i = 0; i < nphiblockstmts; i++) { jl_value_t *e = jl_array_ptr_ref(stmts, to + i); - assert(jl_is_phinode(e)); + if (!jl_is_phinode(e)) { + // IR verification guarantees that the only thing that gets + // evaluated here are constants, so it doesn't matter if we + // update the locals or the phis, but let's be consistent + // for simplicity. + phis[i] = eval_value(e, s); + continue; + } jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(e, 0); ssize_t edge = -1; size_t closest = to; // implicit edge has `to <= edge - 1 < to + i` @@ -405,7 +419,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_ i -= n_oldphi; dest += n_oldphi; to += n_oldphi; - nphi -= n_oldphi; + nphiblockstmts -= n_oldphi; } if (edge != -1) { // if edges list doesn't contain last branch, or the value is explicitly undefined @@ -418,7 +432,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_ phis[i] = val; } // now move all phi values to their position in edges - for (unsigned j = 0; j < nphi; j++) { + for (unsigned j = 0; j < nphiblockstmts; j++) { dest[j] = phis[j]; } JL_GC_POP(); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 7bef27f477534..810982370de19 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -345,12 +345,14 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) { assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to)); AllocaInst *cast = ctx.builder.CreateAlloca(ty); + setName(ctx.emission_context, cast, "coercion"); ctx.builder.CreateStore(unboxed, cast); unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo())); } else if (frompointer) { Type *INTT_to = INTT(to, DL); unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to); + setName(ctx.emission_context, unboxed, "coercion"); if (INTT_to != to) unboxed = ctx.builder.CreateBitCast(unboxed, to); } @@ -359,6 +361,7 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) if (to != INTT_to) unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to); unboxed = emit_inttoptr(ctx, unboxed, to); + setName(ctx.emission_context, unboxed, "coercion"); } else { unboxed = ctx.builder.CreateBitCast(unboxed, to); @@ -394,6 +397,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va if (jt == (jl_value_t*)jl_bool_type || to->isIntegerTy(1)) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext())))); + setName(ctx.emission_context, unbox_load, "unbox"); if (jt == (jl_value_t*)jl_bool_type) unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), { ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)), @@ -421,12 +425,14 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) && DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) { Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment)); + setName(ctx.emission_context, load, "unbox"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); return emit_unboxed_coercion(ctx, to, ai.decorateInst(load)); } } p = maybe_bitcast(ctx, p, ptype); Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment)); + setName(ctx.emission_context, load, "unbox"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); return ai.decorateInst(load); } @@ -531,8 +537,10 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) } else { Value *size = emit_datatype_size(ctx, typ); + auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb)); + setName(ctx.emission_context, sizecheck, "sizecheck"); error_unless(ctx, - ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb)), + sizecheck, "bitcast: argument size does not match size of target type"); } } @@ -555,20 +563,25 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) storage_type, emit_bitcast(ctx, data_pointer(ctx, v), storage_type->getPointerTo()))); + setName(ctx.emission_context, vx, "bitcast"); } vxt = vx->getType(); if (vxt != llvmt) { - if (llvmt->isIntegerTy(1)) + if (llvmt->isIntegerTy(1)) { vx = ctx.builder.CreateTrunc(vx, llvmt); - else if (vxt->isIntegerTy(1) && llvmt->isIntegerTy(8)) + } else if (vxt->isIntegerTy(1) && llvmt->isIntegerTy(8)) { vx = ctx.builder.CreateZExt(vx, llvmt); - else if (vxt->isPointerTy() && !llvmt->isPointerTy()) + } else if (vxt->isPointerTy() && !llvmt->isPointerTy()) { vx = ctx.builder.CreatePtrToInt(vx, llvmt); - else if (!vxt->isPointerTy() && llvmt->isPointerTy()) + setName(ctx.emission_context, vx, "bitcast_coercion"); + } else if (!vxt->isPointerTy() && llvmt->isPointerTy()) { vx = emit_inttoptr(ctx, vx, llvmt); - else + setName(ctx.emission_context, vx, "bitcast_coercion"); + } else { vx = emit_bitcast(ctx, vx, llvmt); + setName(ctx.emission_context, vx, "bitcast_coercion"); + } } if (jl_is_concrete_type((jl_value_t*)bt)) { @@ -576,6 +589,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) } else { Value *box = emit_allocobj(ctx, nb, bt_value_rt); + setName(ctx.emission_context, box, "bitcast_box"); init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut); return mark_julia_type(ctx, box, true, bt->name->wrapper); } @@ -618,8 +632,10 @@ static jl_cgval_t generic_cast( // but if we start looking at more bits we need to actually do the // rounding first instead of carrying around incorrect low bits. Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType()); + setName(ctx.emission_context, jlfloattemp_var, "rounding_slot"); ctx.builder.CreateStore(from, jlfloattemp_var); from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true); + setName(ctx.emission_context, from, "rounded"); } } Value *ans = ctx.builder.CreateCast(Op, from, to); @@ -632,6 +648,7 @@ static jl_cgval_t generic_cast( Value *targ_rt = boxed(ctx, targ); emit_concretecheck(ctx, targ_rt, std::string(jl_intrinsic_name(f)) + ": target type not a leaf primitive type"); Value *box = emit_allocobj(ctx, nb, targ_rt); + setName(ctx.emission_context, box, "cast_box"); init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut); return mark_julia_type(ctx, box, true, jlto->name->wrapper); } @@ -667,22 +684,28 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type); Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, im1, "pointerref_idx"); if (ety == (jl_value_t*)jl_any_type) { Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ); + setName(ctx.emission_context, thePtr, "unbox_any_ptr"); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb)); + setName(ctx.emission_context, load, "any_unbox"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(load); return mark_julia_type(ctx, load, true, ety); } - else if (!jl_isbits(ety)) { + else if (!deserves_stack(ety)) { assert(jl_is_datatype(ety)); uint64_t size = jl_datatype_size(ety); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety); + setName(ctx.emission_context, strct, "pointerref_box"); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); + setName(ctx.emission_context, im1, "pointerref_offset"); Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1); + setName(ctx.emission_context, thePtr, "pointerref_src"); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1); return mark_julia_type(ctx, strct, true, ety); @@ -693,7 +716,9 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(!isboxed); if (!type_is_ghost(ptrty)) { Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); - return typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb); + auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb); + setName(ctx.emission_context, load.V, "pointerref"); + return load; } else { return ghostValue(ctx, ety); @@ -736,23 +761,29 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv) Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type); Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, im1, "pointerset_idx"); Value *thePtr; if (ety == (jl_value_t*)jl_any_type) { // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots. thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ); - Instruction *store = ctx.builder.CreateAlignedStore( - ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size), - ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1), Align(align_nb)); + auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1); + setName(ctx.emission_context, gep, "pointerset_ptr"); + auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size); + setName(ctx.emission_context, val, "pointerset_val"); + Instruction *store = ctx.builder.CreateAlignedStore(val, gep, Align(align_nb)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(store); } - else if (!jl_isbits(ety)) { + else if (x.ispointer()) { thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); uint64_t size = jl_datatype_size(ety); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); - emit_memcpy(ctx, ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1), jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb); + setName(ctx.emission_context, im1, "pointerset_offset"); + auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1); + setName(ctx.emission_context, gep, "pointerset_ptr"); + emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb); } else { bool isboxed; @@ -803,6 +834,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) if (ety == (jl_value_t*)jl_any_type) { Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, thePtr, Align(sizeof(jl_value_t*))); + setName(ctx.emission_context, load, "atomic_pointerref"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(load); load->setOrdering(llvm_order); @@ -820,14 +852,16 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) return jl_cgval_t(); } - if (!jl_isbits(ety)) { + if (!deserves_stack(ety)) { assert(jl_is_datatype(ety)); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety); + setName(ctx.emission_context, strct, "atomic_pointerref_box"); Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8); thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo()); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb)); + setName(ctx.emission_context, load, "atomic_pointerref"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.decorateInst(load); load->setOrdering(llvm_order); @@ -842,7 +876,9 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(!isboxed); if (!type_is_ghost(ptrty)) { Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); - return typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, true, nb); + auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, false, nb); + setName(ctx.emission_context, load.V, "atomic_pointerref"); + return load; } else { if (order > jl_memory_order_monotonic) @@ -918,6 +954,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl } if (!jl_isbits(ety)) { + //if (!deserves_stack(ety)) //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); //uint64_t size = jl_datatype_size(ety); return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations @@ -926,7 +963,11 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl bool isboxed; Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed); assert(!isboxed); - Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); + Value *thePtr; + if (!type_is_ghost(ptrty)) + thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); + else + thePtr = nullptr; // could use any value here, since typed_store will not use it jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed, llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify"); if (issetfield) @@ -938,15 +979,18 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den) { Type *t = den->getType(); + auto ndivby0 = ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0)); + setName(ctx.emission_context, ndivby0, "ndivby0"); raise_exception_unless(ctx, - ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0)), + ndivby0, literal_pointer_val(ctx, jl_diverror_exception)); BasicBlock *m1BB = BasicBlock::Create(ctx.builder.getContext(), "minus1", ctx.f); BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "oksrem", ctx.f); BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_srem", ctx.f); PHINode *ret = PHINode::Create(t, 2); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(den ,ConstantInt::get(t, -1, true)), - m1BB, okBB); + auto divbym1 = ctx.builder.CreateICmpEQ(den, ConstantInt::get(t, -1, true)); + setName(ctx.emission_context, divbym1, "divbym1"); + ctx.builder.CreateCondBr(divbym1, m1BB, okBB); ctx.builder.SetInsertPoint(m1BB); ctx.builder.CreateBr(cont); ctx.builder.SetInsertPoint(okBB); @@ -956,6 +1000,7 @@ static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den) ret->addIncoming(// rem(typemin, -1) is undefined ConstantInt::get(t, 0), m1BB); ret->addIncoming(sremval, okBB); + setName(ctx.emission_context, ret, "checked_srem"); ctx.builder.Insert(ret); return ret; } @@ -991,6 +1036,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_cgval_t y, jl_value_t *rt_hint) { Value *isfalse = emit_condition(ctx, c, "ifelse"); + setName(ctx.emission_context, isfalse, "ifelse_cond"); jl_value_t *t1 = x.typ; jl_value_t *t2 = y.typ; // handle cases where the condition is irrelevant based on type info @@ -1063,6 +1109,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ if (x_ptr->getType() != y_ptr->getType()) y_ptr = ctx.builder.CreateBitCast(y_ptr, x_ptr->getType()); ifelse_result = ctx.builder.CreateSelect(isfalse, y_ptr, x_ptr); + setName(ctx.emission_context, ifelse_result, "ifelse_result"); ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa); if (ifelse_tbaa == NULL) { // LLVM won't return a TBAA result for the root, but mark_julia_struct requires it: make it now @@ -1107,6 +1154,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ ctx.builder.SetInsertPoint(post); ctx.builder.Insert(ret); tindex = ret; + setName(ctx.emission_context, tindex, "ifelse_tindex"); } jl_cgval_t ret = mark_julia_slot(ifelse_result, rt_hint, tindex, ifelse_tbaa); if (x_vboxed || y_vboxed) { @@ -1115,6 +1163,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ if (!y_vboxed) y_vboxed = ConstantPointerNull::get(cast(x_vboxed->getType())); ret.Vboxed = ctx.builder.CreateSelect(isfalse, y_vboxed, x_vboxed); + setName(ctx.emission_context, ret.Vboxed, "ifelse_vboxed"); assert(ret.Vboxed->getType() == ctx.types().T_prjlvalue); } return ret; @@ -1122,6 +1171,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ ifelse_result = ctx.builder.CreateSelect(isfalse, boxed(ctx, y), boxed(ctx, x)); + setName(ctx.emission_context, ifelse_result, "ifelse_result"); } jl_value_t *jt = (t1 == t2 ? t1 : rt_hint); return mark_julia_type(ctx, ifelse_result, isboxed, jt); @@ -1407,7 +1457,9 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, makeArrayRef(t)); Value *res = ctx.builder.CreateCall(intr, {x, y}); Value *val = ctx.builder.CreateExtractValue(res, ArrayRef(0)); + setName(ctx.emission_context, val, "checked"); Value *obit = ctx.builder.CreateExtractValue(res, ArrayRef(1)); + setName(ctx.emission_context, obit, "overflow"); Value *obyte = ctx.builder.CreateZExt(obit, getInt8Ty(ctx.builder.getContext())); jl_value_t *params[2]; @@ -1425,30 +1477,31 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg case checked_sdiv_int: { Value *typemin = ctx.builder.CreateShl(ConstantInt::get(t, 1), t->getPrimitiveSizeInBits() - 1); - raise_exception_unless(ctx, - ctx.builder.CreateAnd( - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), - ctx.builder.CreateOr( - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)), - ctx.builder.CreateICmpNE(x, typemin))), - literal_pointer_val(ctx, jl_diverror_exception)); + auto cond = ctx.builder.CreateAnd( + ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), + ctx.builder.CreateOr( + ctx.builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)), + ctx.builder.CreateICmpNE(x, typemin))); + setName(ctx.emission_context, cond, "divisor_valid"); + raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception)); return ctx.builder.CreateSDiv(x, y); } - case checked_udiv_int: - raise_exception_unless(ctx, - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), - literal_pointer_val(ctx, jl_diverror_exception)); + case checked_udiv_int: { + auto cond = ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)); + setName(ctx.emission_context, cond, "ndivby0"); + raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception)); return ctx.builder.CreateUDiv(x, y); - + } case checked_srem_int: return emit_checked_srem_int(ctx, x, y); - case checked_urem_int: - raise_exception_unless(ctx, - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), - literal_pointer_val(ctx, jl_diverror_exception)); + case checked_urem_int: { + auto cond = ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)); + setName(ctx.emission_context, cond, "ndivby0"); + raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception)); return ctx.builder.CreateURem(x, y); + } case eq_int: *newtyp = jl_bool_type; return ctx.builder.CreateICmpEQ(x, y); case ne_int: *newtyp = jl_bool_type; return ctx.builder.CreateICmpNE(x, y); diff --git a/src/ircode.c b/src/ircode.c index 4121d6691aa5b..bc5cc61e7f892 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -434,13 +434,14 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) } } -static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, - uint8_t has_fcall, uint8_t inlining, uint8_t constprop) +static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, uint8_t has_fcall, + uint8_t nospecializeinfer, uint8_t inlining, uint8_t constprop) { jl_code_info_flags_t flags; flags.bits.inferred = inferred; flags.bits.propagate_inbounds = propagate_inbounds; flags.bits.has_fcall = has_fcall; + flags.bits.nospecializeinfer = nospecializeinfer; flags.bits.inlining = inlining; flags.bits.constprop = constprop; return flags; @@ -785,8 +786,8 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) 1 }; - jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, - code->has_fcall, code->inlining, code->constprop); + jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, code->has_fcall, + code->nospecializeinfer, code->inlining, code->constprop); write_uint8(s.s, flags.packed); write_uint8(s.s, code->purity.bits); write_uint16(s.s, code->inlining_cost); @@ -885,6 +886,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->inferred = flags.bits.inferred; code->propagate_inbounds = flags.bits.propagate_inbounds; code->has_fcall = flags.bits.has_fcall; + code->nospecializeinfer = flags.bits.nospecializeinfer; code->purity.bits = read_uint8(s.s); code->inlining_cost = read_uint16(s.s); diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 895fed9c056e2..ae4a2ed02fb7e 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -39,16 +39,13 @@ using namespace llvm; #include "julia_assert.h" #include "processor.h" -#ifdef JL_USE_JITLINK # include # include # include # if JL_LLVM_VERSION >= 150000 # include # endif -#else # include -#endif #define DEBUG_TYPE "julia_jitlayers" @@ -190,8 +187,7 @@ static jl_callptr_t _jl_compile_codeinst( JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); #ifdef USE_TRACY if (is_recompile) { - TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx); - TracyCZoneColor(ctx, 0xFFA500); + TracyCZoneColor(JL_TIMING_DEFAULT_BLOCK->tracy_ctx, 0xFFA500); } #endif jl_callptr_t fptr = NULL; @@ -199,6 +195,8 @@ static jl_callptr_t _jl_compile_codeinst( jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context params.cache = true; params.world = world; + params.imaging = imaging_default(); + params.debug_level = jl_options.debug_level; jl_workqueue_t emitted; { orc::ThreadSafeModule result_m = @@ -213,35 +211,46 @@ static jl_callptr_t _jl_compile_codeinst( if (params._shared_module) jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); - StringMap NewExports; - StringMap NewGlobals; - for (auto &global : params.globals) { - NewGlobals[global.second->getName()] = global.first; - } - for (auto &def : emitted) { - orc::ThreadSafeModule &TSM = std::get<0>(def.second); - //The underlying context object is still locked because params is not destroyed yet - auto M = TSM.getModuleUnlocked(); - for (auto &F : M->global_objects()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - NewExports[F.getName()] = &TSM; + if (!params.imaging) { + StringMap NewExports; + StringMap NewGlobals; + for (auto &global : params.globals) { + NewGlobals[global.second->getName()] = global.first; + } + for (auto &def : emitted) { + orc::ThreadSafeModule &TSM = std::get<0>(def.second); + //The underlying context object is still locked because params is not destroyed yet + auto M = TSM.getModuleUnlocked(); + for (auto &F : M->global_objects()) { + if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { + NewExports[F.getName()] = &TSM; + } + } + // Let's link all globals here also (for now) + for (auto &GV : M->globals()) { + auto InitValue = NewGlobals.find(GV.getName()); + if (InitValue != NewGlobals.end()) { + jl_link_global(&GV, InitValue->second); + } } } - // Let's link all globals here also (for now) - for (auto &GV : M->globals()) { - auto InitValue = NewGlobals.find(GV.getName()); - if (InitValue != NewGlobals.end()) { - jl_link_global(&GV, InitValue->second); + DenseMap Queued; + std::vector Stack; + for (auto &def : emitted) { + // Add the results to the execution engine now + orc::ThreadSafeModule &M = std::get<0>(def.second); + jl_add_to_ee(M, NewExports, Queued, Stack); + assert(Queued.empty() && Stack.empty() && !M); + } + } else { + jl_jit_globals(params.globals); + auto main = std::move(emitted[codeinst].first); + for (auto &def : emitted) { + if (def.first != codeinst) { + jl_merge_module(main, std::move(def.second.first)); } } - } - DenseMap Queued; - std::vector Stack; - for (auto &def : emitted) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - jl_add_to_ee(M, NewExports, Queued, Stack); - assert(Queued.empty() && Stack.empty() && !M); + jl_ExecutionEngine->addModule(std::move(main)); } ++CompiledCodeinsts; MaxWorkqueueSize.updateMax(emitted.size()); @@ -252,7 +261,7 @@ static jl_callptr_t _jl_compile_codeinst( for (auto &def : emitted) { jl_code_instance_t *this_code = def.first; if (i < jl_timing_print_limit) - jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK); jl_llvm_functions_t decls = std::get<1>(def.second); jl_callptr_t addr; @@ -301,7 +310,7 @@ static jl_callptr_t _jl_compile_codeinst( i++; } if (i > jl_timing_print_limit) - jl_timing_printf(JL_TIMING_CURRENT_BLOCK, "... <%d methods truncated>", i - 10); + jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10); uint64_t end_time = 0; if (timed) @@ -351,6 +360,8 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); }); jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second)); + params.imaging = imaging_default(); + params.debug_level = jl_options.debug_level; if (pparams == NULL) pparams = ¶ms; assert(pparams->tsctx.getContext() == into->getContext().getContext()); @@ -536,21 +547,18 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) jl_method_t *def = unspec->def->def.method; if (jl_is_method(def)) { src = (jl_code_info_t*)def->source; - if (src == NULL) { - // TODO: this is wrong - assert(def->generator); - // TODO: jl_code_for_staged can throw - src = jl_code_for_staged(unspec->def, ~(size_t)0); - } if (src && (jl_value_t*)src != jl_nothing) src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); } else { src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred); + assert(src); + } + if (src) { + assert(jl_is_code_info(src)); + ++UnspecFPtrCount; + _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0); } - assert(src && jl_is_code_info(src)); - ++UnspecFPtrCount; - _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0); jl_callptr_t null = nullptr; // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); @@ -570,14 +578,14 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) // get a native disassembly for a compiled method extern "C" JL_DLLEXPORT_CODEGEN jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, - char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) + char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) { // printing via disassembly jl_code_instance_t *codeinst = jl_generate_fptr(mi, world); if (codeinst) { uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke); if (getwrapper) - return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary); + return jl_dump_fptr_asm(fptr, emit_mc, asm_variant, debuginfo, binary); uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) { // normally we prevent native code from being generated for these functions, @@ -625,7 +633,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, } } if (specfptr != 0) - return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary); + return jl_dump_fptr_asm(specfptr, emit_mc, asm_variant, debuginfo, binary); } // whatever, that didn't work - use the assembler output instead @@ -633,7 +641,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, jl_get_llvmf_defn(&llvmf_dump, mi, world, getwrapper, true, jl_default_cgparams); if (!llvmf_dump.F) return jl_an_empty_string; - return jl_dump_function_asm(&llvmf_dump, raw_mc, asm_variant, debuginfo, binary); + return jl_dump_function_asm(&llvmf_dump, emit_mc, asm_variant, debuginfo, binary, false); } CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) @@ -684,8 +692,6 @@ void jl_register_jit_object(const object::ObjectFile &debugObj, std::function getLoadAddress, std::function lookupWriteAddress) JL_NOTSAFEPOINT; -#ifdef JL_USE_JITLINK - namespace { using namespace llvm::orc; @@ -797,7 +803,7 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { PassConfig.PostAllocationPasses.push_back([&Info, this](jitlink::LinkGraph &G) -> Error { std::lock_guard lock(PluginMutex); for (const jitlink::Section &Sec : G.sections()) { -#ifdef _OS_DARWIN_ +#if defined(_OS_DARWIN_) // Canonical JITLink section names have the segment name included, e.g. // "__TEXT,__text" or "__DWARF,__debug_str". There are some special internal // sections without a comma separator, which we can just ignore. @@ -861,6 +867,8 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { } graph_size += secsize; } + (void) code_size; + (void) data_size; this->total_size.fetch_add(graph_size, std::memory_order_relaxed); jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size); @@ -870,6 +878,17 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { } }; +// replace with [[maybe_unused]] when we get to C++17 +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. std::unique_ptr createJITLinkMemoryManager() { @@ -879,33 +898,28 @@ std::unique_ptr createJITLinkMemoryManager() { return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); #endif } -} - -# ifdef LLVM_SHLIB -# define EHFRAME_RANGE(name) orc::ExecutorAddrRange name -# define UNPACK_EHFRAME_RANGE(name) \ - name.Start.toPtr(), \ - static_cast(name.size()) +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic pop +#endif +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic pop +#endif +} class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { public: - Error registerEHFrames(EHFRAME_RANGE(EHFrameSection)) override { - register_eh_frames( - UNPACK_EHFRAME_RANGE(EHFrameSection)); + Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override { + register_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); return Error::success(); } - Error deregisterEHFrames(EHFRAME_RANGE(EHFrameSection)) override { - deregister_eh_frames( - UNPACK_EHFRAME_RANGE(EHFrameSection)); + Error deregisterEHFrames(orc::ExecutorAddrRange EHFrameSection) override { + deregister_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); return Error::success(); } }; -# endif - -#else // !JL_USE_JITLINK RTDyldMemoryManager* createRTDyldMemoryManager(void); @@ -1005,7 +1019,6 @@ void registerRTDyldJITObject(const object::ObjectFile &Object, #endif ); } -#endif namespace { static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); @@ -1289,6 +1302,7 @@ JuliaOJIT::JuliaOJIT() ES(cantFail(orc::SelfExecutorProcessControl::Create())), GlobalJD(ES.createBareJITDylib("JuliaGlobals")), JD(ES.createBareJITDylib("JuliaOJIT")), + ExternalJD(ES.createBareJITDylib("JuliaExternal")), ContextPool([](){ auto ctx = std::make_unique(); return orc::ThreadSafeContext(std::move(ctx)); @@ -1313,7 +1327,9 @@ JuliaOJIT::JuliaOJIT() std::make_unique(LockLayer, *TM, 2, PrintLLVMTimers), std::make_unique(LockLayer, *TM, 3, PrintLLVMTimers), }, - OptSelLayer(Pipelines) + OptSelLayer(Pipelines), + ExternalCompileLayer(ES, LockLayer, + std::make_unique(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM, 2)) { #ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) @@ -1385,6 +1401,9 @@ JuliaOJIT::JuliaOJIT() } JD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); + JD.addToLinkOrder(ExternalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); + ExternalJD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); + ExternalJD.addToLinkOrder(JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); #if JULIA_FLOAT16_ABI == 1 orc::SymbolAliasMap jl_crt = { @@ -1484,10 +1503,34 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) } } +Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize) +{ + if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error + { + if (M.getDataLayout().isDefault()) + M.setDataLayout(DL); + if (M.getDataLayout() != DL) + return make_error( + "Added modules have incompatible data layouts: " + + M.getDataLayout().getStringRepresentation() + " (module) vs " + + DL.getStringRepresentation() + " (jit)", + inconvertibleErrorCode()); + + return Error::success(); + })) + return Err; + return ExternalCompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM)); +} + +Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr Obj) { + assert(Obj && "Can not add null object"); + return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); +} + JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - orc::JITDylib* SearchOrders[2] = {&JD, &GlobalJD}; - ArrayRef SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 2 : 1); + orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; + ArrayRef SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1); auto Sym = ES.lookup(SearchOrder, Name); if (Sym) return *Sym; @@ -1499,6 +1542,14 @@ JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name) return findSymbol(getMangledName(Name), true); } +Expected JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) +{ + orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD}; + ArrayRef SearchOrder = makeArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3); + auto Sym = ES.lookup(SearchOrder, getMangledName(Name)); + return Sym; +} + uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name) { auto addr = findSymbol(getMangledName(Name), false); diff --git a/src/jitlayers.h b/src/jitlayers.h index c056a6b3418a3..3aa3998d3ac23 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -41,7 +41,7 @@ // However, JITLink is a relatively young library and lags behind in platform // and feature support (e.g. Windows, JITEventListeners for various profilers, // etc.). Thus, we currently only use JITLink where absolutely required, that is, -// for Mac/aarch64. +// for Mac/aarch64 and Linux/aarch64. // #define JL_FORCE_JITLINK #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_) @@ -49,16 +49,21 @@ #endif // The sanitizers don't play well with our memory manager -#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER) +#if defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER) # define JL_USE_JITLINK +#else +# if defined(_CPU_AARCH64_) +# if defined(_OS_LINUX_) && JL_LLVM_VERSION < 150000 +# pragma message("On aarch64-gnu-linux, LLVM version >= 15 is required for JITLink; fallback suffers from occasional segfaults") +# else +# define JL_USE_JITLINK +# endif +# endif #endif -#ifdef JL_USE_JITLINK # include -#else # include # include -#endif using namespace llvm; @@ -232,6 +237,7 @@ typedef struct _jl_codegen_params_t { bool cache = false; bool external_linkage = false; bool imaging; + int debug_level; _jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()), DL(std::move(DL)), TargetTriple(std::move(triple)), imaging(imaging_default()) {} @@ -475,6 +481,16 @@ class JuliaOJIT { void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT; void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT; + //Methods for the C API + Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, + bool ShouldOptimize = false) JL_NOTSAFEPOINT; + Error addObjectFile(orc::JITDylib &JD, + std::unique_ptr Obj) JL_NOTSAFEPOINT; + Expected findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT; + orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; }; + orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; } + orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; } + JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT; JL_JITSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT; uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; @@ -523,7 +539,7 @@ class JuliaOJIT { orc::ExecutionSession ES; orc::JITDylib &GlobalJD; orc::JITDylib &JD; - + orc::JITDylib &ExternalJD; //Map and inc are guarded by RLST_mutex std::mutex RLST_mutex{}; int RLST_inc = 0; @@ -548,6 +564,8 @@ class JuliaOJIT { LockLayerT LockLayer; const std::array, 4> Pipelines; OptSelLayerT OptSelLayer; + CompileLayerT ExternalCompileLayer; + }; extern JuliaOJIT *jl_ExecutionEngine; std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 41692e9be680e..e4edf8069a581 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -562,6 +562,8 @@ YY(jl_getUnwindInfo) \ YY(jl_get_libllvm) \ YY(jl_add_optimization_passes) \ + YY(jl_build_newpm_pipeline) \ + YY(jl_register_passbuilder_callbacks) \ YY(LLVMExtraAddLowerSimdLoopPass) \ YY(LLVMExtraAddFinalLowerGCPass) \ YY(LLVMExtraAddPropagateJuliaAddrspaces) \ @@ -577,5 +579,33 @@ YY(LLVMExtraAddGCInvariantVerifierPass) \ YY(LLVMExtraAddDemoteFloat16Pass) \ YY(LLVMExtraAddCPUFeaturesPass) \ + YY(LLVMExtraMPMAddCPUFeaturesPass) \ + YY(LLVMExtraMPMAddRemoveNIPass) \ + YY(LLVMExtraMPMAddLowerSIMDLoopPass) \ + YY(LLVMExtraMPMAddFinalLowerGCPass) \ + YY(LLVMExtraMPMAddMultiVersioningPass) \ + YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ + YY(LLVMExtraMPMAddRemoveAddrspacesPass) \ + YY(LLVMExtraMPMAddLowerPTLSPass) \ + YY(LLVMExtraFPMAddDemoteFloat16Pass) \ + YY(LLVMExtraFPMAddCombineMulAddPass) \ + YY(LLVMExtraFPMAddLateLowerGCPass) \ + YY(LLVMExtraFPMAddAllocOptPass) \ + YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \ + YY(LLVMExtraFPMAddLowerExcHandlersPass) \ + YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ + YY(LLVMExtraLPMAddJuliaLICMPass) \ + YY(JLJITGetLLVMOrcExecutionSession) \ + YY(JLJITGetJuliaOJIT) \ + YY(JLJITGetExternalJITDylib) \ + YY(JLJITAddObjectFile) \ + YY(JLJITAddLLVMIRModule) \ + YY(JLJITLookup) \ + YY(JLJITMangleAndIntern) \ + YY(JLJITGetTripleString) \ + YY(JLJITGetGlobalPrefix) \ + YY(JLJITGetDataLayoutString) \ + YY(JLJITGetIRCompileLayer) \ + // end of file diff --git a/src/jlapi.c b/src/jlapi.c index 001253fed71a8..0dffaac627288 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -690,9 +690,6 @@ static void rr_detach_teleport(void) { JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[]) { #ifdef USE_TRACY - // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and - // JULIA_TIMING_METADATA_PRINT_LIMIT=20 - jl_timing_apply_env(); if (getenv("JULIA_WAIT_FOR_TRACY")) while (!TracyCIsConnected) jl_cpu_pause(); // Wait for connection #endif diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index f72c79f281480..d376bc27085ab 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -93,18 +93,38 @@ ;; lowering entry points +; find the first line number in this expression, before we might eliminate them +(define (first-lineno blk) + (cond ((not (pair? blk)) #f) + ((eq? (car blk) 'line) blk) + ((and (eq? (car blk) 'hygienic-scope) (pair? (cdddr blk)) (pair? (cadddr blk)) (eq? (car (cadddr blk)) 'line)) + (cadddr blk)) + ((memq (car blk) '(escape hygienic-scope)) + (first-lineno (cadr blk))) + ((memq (car blk) '(toplevel block)) + (let loop ((xs (cdr blk))) + (and (pair? xs) + (let ((elt (first-lineno (car xs)))) + (or elt (loop (cdr xs))))))) + (else #f))) + ;; return a lambda expression representing a thunk for a top-level expression ;; note: expansion of stuff inside module is delayed, so the contents obey ;; toplevel expansion order (don't expand until stuff before is evaluated). (define (expand-toplevel-expr-- e file line) - (let ((ex0 (julia-expand-macroscope e))) + (let ((lno (first-lineno e)) + (ex0 (julia-expand-macroscope e))) + (if (and lno (or (not (length= lno 3)) (not (atom? (caddr lno))))) (set! lno #f)) (if (toplevel-only-expr? ex0) - ex0 - (let* ((ex (julia-expand0 ex0 file line)) + (if (and (pair? e) (memq (car ex0) '(error incomplete))) + ex0 + (if lno `(toplevel ,lno ,ex0) ex0)) + (let* ((linenode (if (and lno (or (= line 0) (eq? file 'none))) lno `(line ,line ,file))) + (ex (julia-expand0 ex0 linenode)) (th (julia-expand1 `(lambda () () (scope-block - ,(blockify ex))) + ,(blockify ex lno))) file line))) (if (and (null? (cdadr (caddr th))) (and (length= (lam:body th) 2) @@ -179,14 +199,9 @@ ;; construct default definitions of `eval` for non-bare modules ;; called by jl_eval_module_expr -(define (module-default-defs e) +(define (module-default-defs name file line) (jl-expand-to-thunk - (let* ((name (caddr e)) - (body (cadddr e)) - (loc (if (null? (cdr body)) () (cadr body))) - (loc (if (and (pair? loc) (eq? (car loc) 'line)) - (list loc) - '())) + (let* ((loc (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file)))) (x (if (eq? name 'x) 'y 'x)) (mex (if (eq? name 'mapexpr) 'map_expr 'mapexpr))) `(block @@ -202,7 +217,7 @@ (block ,@loc (call (core _call_latest) (top include) ,mex ,name ,x))))) - 'none 0)) + file line)) ; run whole frontend on a string. useful for testing. (define (fe str) diff --git a/src/jloptions.c b/src/jloptions.c index 4c0b59f811643..129ba9df2510e 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -40,7 +40,8 @@ JL_DLLEXPORT void jl_init_options(void) NULL, // cpu_target ("native", "core2", etc...) 0, // nthreadpools 0, // nthreads - 0, // ngcthreads + 0, // nmarkthreads + 0, // nsweepthreads NULL, // nthreads_per_pool 0, // nprocs NULL, // machine_file @@ -87,6 +88,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // rr-detach 0, // strip-metadata 0, // strip-ir + 0, // permalloc_pkgimg 0, // heap-size-hint }; jl_options_initialized = 1; @@ -129,7 +131,8 @@ static const char opts[] = " interface if supported (Linux and Windows) or to the number of CPU\n" " threads if not supported (MacOS) or if process affinity is not\n" " configured, and sets M to 1.\n" - " --gcthreads=N Use N threads for GC, set to half of the number of compute threads if unspecified.\n" + " --gcthreads=M[,N] Use M threads for the mark phase of GC and N (0 or 1) threads for the concurrent sweeping phase of GC.\n" + " M is set to half of the number of compute threads and N is set to 0 if unspecified.\n" " -p, --procs {N|auto} Integer value N launches N additional local worker processes\n" " \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n" " --machine-file Run processes on hosts listed in \n\n" @@ -209,6 +212,7 @@ static const char opts_hidden[] = " --trace-compile={stderr,name}\n" " Print precompile statements for methods compiled during execution or save to a path\n" " --image-codegen Force generate code in imaging mode\n" + " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n" ; JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) @@ -254,6 +258,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_strip_ir, opt_heap_size_hint, opt_gc_threads, + opt_permalloc_pkgimg }; static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:"; static const struct option longopts[] = { @@ -313,6 +318,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "rr-detach", no_argument, 0, opt_rr_detach }, { "strip-metadata", no_argument, 0, opt_strip_metadata }, { "strip-ir", no_argument, 0, opt_strip_ir }, + { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg }, { "heap-size-hint", required_argument, 0, opt_heap_size_hint }, { 0, 0, 0, 0 } }; @@ -822,10 +828,27 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) break; case opt_gc_threads: errno = 0; - long ngcthreads = strtol(optarg, &endptr, 10); - if (errno != 0 || optarg == endptr || *endptr != 0 || ngcthreads < 1 || ngcthreads >= INT16_MAX) - jl_errorf("julia: --gcthreads=; n must be an integer >= 1"); - jl_options.ngcthreads = (int16_t)ngcthreads; + long nmarkthreads = strtol(optarg, &endptr, 10); + if (errno != 0 || optarg == endptr || nmarkthreads < 1 || nmarkthreads >= INT16_MAX) { + jl_errorf("julia: --gcthreads=[,]; n must be an integer >= 1"); + } + jl_options.nmarkthreads = (int16_t)nmarkthreads; + if (*endptr == ',') { + errno = 0; + char *endptri; + long nsweepthreads = strtol(&endptr[1], &endptri, 10); + if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nsweepthreads < 0 || nsweepthreads > 1) + jl_errorf("julia: --gcthreads=,; n must be 0 or 1"); + jl_options.nsweepthreads = (int8_t)nsweepthreads; + } + break; + case opt_permalloc_pkgimg: + if (!strcmp(optarg,"yes")) + jl_options.permalloc_pkgimg = 1; + else if (!strcmp(optarg,"no")) + jl_options.permalloc_pkgimg = 0; + else + jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg); break; default: jl_errorf("julia: unhandled option -- %c\n" diff --git a/src/jloptions.h b/src/jloptions.h index c44a8cfe05770..8649c405112d7 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -15,7 +15,8 @@ typedef struct { const char *cpu_target; int8_t nthreadpools; int16_t nthreads; - int16_t ngcthreads; + int16_t nmarkthreads; + int8_t nsweepthreads; const int16_t *nthreads_per_pool; int32_t nprocs; const char *machine_file; @@ -58,6 +59,7 @@ typedef struct { int8_t rr_detach; int8_t strip_metadata; int8_t strip_ir; + int8_t permalloc_pkgimg; uint64_t heap_size_hint; } jl_options_t; diff --git a/src/jltypes.c b/src/jltypes.c index 1a30df637a706..444923f600569 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -847,14 +847,14 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body) if (T_has_tv) { jl_value_t *wrapped = jl_type_unionall(v, vm->T); JL_GC_PUSH1(&wrapped); - wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N); + wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1); JL_GC_POP(); return wrapped; } else { assert(N_has_tv); assert(vm->N == (jl_value_t*)v); - return (jl_value_t*)jl_wrap_vararg(vm->T, NULL); + return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1); } } if (!jl_is_type(body) && !jl_is_typevar(body)) @@ -1411,7 +1411,7 @@ jl_datatype_t *jl_apply_modify_type(jl_value_t *dt) return rettyp; } -jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt) +jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *ty) { jl_value_t *params[2]; jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names); @@ -1422,12 +1422,12 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt) if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames)) names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames } - params[0] = dt; + params[0] = ty; params[1] = (jl_value_t*)jl_bool_type; - jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2); - JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE) - jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp); - JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE) + jl_value_t *tuptyp = jl_apply_tuple_type_v(params, 2); + JL_GC_PUSH1(&tuptyp); + jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, tuptyp); + JL_GC_POP(); return rettyp; } @@ -1672,11 +1672,14 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable) } assert(dt->isconcretetype || dt->isdispatchtuple ? dt->maybe_subtype_of_cache : 1); if (dt->name == jl_type_typename) { - cacheable = 0; // n.b. the cache for Type ignores parameter normalization, so it can't be used to make a stable hash value jl_value_t *p = jl_tparam(dt, 0); if (!jl_is_type(p) && !jl_is_typevar(p)) // Type{v} has no subtypes, if v is not a Type dt->has_concrete_subtype = 0; dt->maybe_subtype_of_cache = 1; + jl_value_t *uw = jl_unwrap_unionall(p); + // n.b. the cache for Type ignores parameter normalization except for Typeofwrapper, so it can't be used to make a stable hash value + if (!jl_is_datatype(uw) || ((jl_datatype_t*)uw)->name->wrapper != p) + cacheable = 0; } dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable); } @@ -1841,13 +1844,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value jl_typename_t *tn = dt->name; int istuple = (tn == jl_tuple_typename); int isnamedtuple = (tn == jl_namedtuple_typename); - if (check && tn != jl_type_typename) { - size_t i; - for (i = 0; i < ntp; i++) - iparams[i] = normalize_unionalls(iparams[i]); - } - // check type cache, if applicable + // check if type cache will be applicable int cacheable = 1; if (istuple) { size_t i; @@ -1883,7 +1881,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value if (jl_has_free_typevars(iparams[i])) cacheable = 0; } + // if applicable, check the cache first for a match if (cacheable) { + jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp); + if (lkup != NULL) + return lkup; + } + // if some normalization might be needed, do that now + // it is probably okay to mutate iparams, and we only store globally rooted objects here + if (check) { size_t i; for (i = 0; i < ntp; i++) { jl_value_t *pi = iparams[i]; @@ -1891,18 +1897,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value continue; if (jl_is_datatype(pi)) continue; - if (jl_is_vararg(pi)) { - pi = jl_unwrap_vararg(pi); - if (jl_has_free_typevars(pi)) - continue; - } - // normalize types equal to wrappers (prepare for wrapper_id) + if (jl_is_vararg(pi)) + // This is already handled in jl_wrap_vararg instead + continue; + if (!cacheable && jl_has_free_typevars(pi)) + continue; + // normalize types equal to wrappers (prepare for Typeofwrapper) jl_value_t *tw = extract_wrapper(pi); if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) && jl_types_equal(pi, tw)) { - // This would require some special handling, but is never used at - // the moment. - assert(!jl_is_vararg(iparams[i])); iparams[i] = tw; if (p) jl_gc_wb(p, tw); } @@ -1912,6 +1915,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value // normalize Type{Type{Union{}}} to Type{TypeofBottom} iparams[0] = (jl_value_t*)jl_typeofbottom_type; } + } + // then check the cache again, if applicable + if (cacheable) { jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp); if (lkup != NULL) return lkup; @@ -1920,12 +1926,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value if (stack_lkup) return stack_lkup; + // check parameters against bounds in type definition + // for whether this is even valid if (check && !istuple) { - // check parameters against bounds in type definition + assert(ntp > 0); check_datatype_parameters(tn, iparams, ntp); } else if (ntp == 0 && jl_emptytuple_type != NULL) { // empty tuple type case + assert(istuple); return (jl_value_t*)jl_emptytuple_type; } @@ -1971,6 +1980,42 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value jl_svecset(p, i, iparams[i]); } + // try to simplify some type parameters + if (check && tn != jl_type_typename) { + size_t i; + int changed = 0; + if (istuple) // normalization might change Tuple's, but not other types's, cacheable status + cacheable = 1; + for (i = 0; i < ntp; i++) { + jl_value_t *newp = normalize_unionalls(iparams[i]); + if (newp != iparams[i]) { + iparams[i] = newp; + jl_svecset(p, i, newp); + changed = 1; + } + if (istuple && cacheable && !jl_is_concrete_type(newp)) + cacheable = 0; + } + if (changed) { + // If this changed something, we need to check the cache again, in + // case we missed the match earlier before the normalizations + // + // e.g. return inst_datatype_inner(dt, p, iparams, ntp, stack, env, 0); + if (cacheable) { + jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp); + if (lkup != NULL) { + JL_GC_POP(); + return lkup; + } + } + jl_value_t *stack_lkup = lookup_type_stack(stack, dt, ntp, iparams); + if (stack_lkup) { + JL_GC_POP(); + return stack_lkup; + } + } + } + // acquire the write lock now that we know we need a new object // since we're going to immediately leak it globally via the instantiation stack if (cacheable) { @@ -2281,7 +2326,7 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t N = inst_type_w_(v->N, env, stack, check); } if (T != v->T || N != v->N) { - t = (jl_value_t*)jl_wrap_vararg(T, N); + t = (jl_value_t*)jl_wrap_vararg(T, N, check); } JL_GC_POP(); return t; @@ -2354,36 +2399,44 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t) return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t); } -jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n) +jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check) { - if (n) { - if (jl_is_typevar(n) || jl_is_uniontype(jl_unwrap_unionall(n))) { - // TODO: this is disabled due to #39698; it is also inconsistent - // with other similar checks, where we usually only check substituted - // values and not the bounds of variables. - /* - jl_tvar_t *N = (jl_tvar_t*)n; - if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) - jl_error("TypeVar in Vararg length must have bounds Union{} and Any"); - */ - } - else if (!jl_is_long(n)) { - jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n); - } - else if (jl_unbox_long(n) < 0) { - jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n)); + jl_task_t *ct = jl_current_task; + JL_GC_PUSH1(&t); + if (check) { + if (n) { + if (jl_is_typevar(n) || jl_is_uniontype(jl_unwrap_unionall(n))) { + // TODO: this is disabled due to #39698; it is also inconsistent + // with other similar checks, where we usually only check substituted + // values and not the bounds of variables. + /* + jl_tvar_t *N = (jl_tvar_t*)n; + if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) + jl_error("TypeVar in Vararg length must have bounds Union{} and Any"); + */ + } + else if (!jl_is_long(n)) { + jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n); + } + else if (jl_unbox_long(n) < 0) { + jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n)); + } } - } - if (t) { - if (!jl_valid_type_param(t)) { - jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t); + if (t) { + if (!jl_valid_type_param(t)) { + jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t); + } + t = normalize_unionalls(t); + jl_value_t *tw = extract_wrapper(t); + if (tw && t != tw && jl_types_equal(t, tw)) + t = tw; } } - jl_task_t *ct = jl_current_task; jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type); jl_set_typetagof(vm, jl_vararg_tag, 0); vm->T = t; vm->N = n; + JL_GC_POP(); return vm; } @@ -2666,7 +2719,7 @@ void jl_init_types(void) JL_GC_DISABLED // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist jl_vararg_type->name->mayinlinealloc = 0; - jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL)); + jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0)); jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params, jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0); jl_tuple_typename = jl_anytuple_type->name; @@ -2903,7 +2956,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_code_info_type = jl_new_datatype(jl_symbol("CodeInfo"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(21, + jl_perm_symsvec(22, "code", "codelocs", "ssavaluetypes", @@ -2921,11 +2974,12 @@ void jl_init_types(void) JL_GC_DISABLED "inferred", "propagate_inbounds", "has_fcall", + "nospecializeinfer", "inlining", "constprop", "purity", "inlining_cost"), - jl_svec(21, + jl_svec(22, jl_array_any_type, jl_array_int32_type, jl_any_type, @@ -2943,17 +2997,18 @@ void jl_init_types(void) JL_GC_DISABLED jl_bool_type, jl_bool_type, jl_bool_type, + jl_bool_type, jl_uint8_type, jl_uint8_type, jl_uint8_type, jl_uint16_type), jl_emptysvec, - 0, 1, 20); + 0, 1, 22); jl_method_type = jl_new_datatype(jl_symbol("Method"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(29, + jl_perm_symsvec(30, "name", "module", "file", @@ -2980,10 +3035,11 @@ void jl_init_types(void) JL_GC_DISABLED "nkw", "isva", "is_for_opaque_closure", + "nospecializeinfer", "constprop", "max_varargs", "purity"), - jl_svec(29, + jl_svec(30, jl_symbol_type, jl_module_type, jl_symbol_type, @@ -3010,6 +3066,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_int32_type, jl_bool_type, jl_bool_type, + jl_bool_type, jl_uint8_type, jl_uint8_type, jl_uint8_type), diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index cac8c7b5228b9..cd11f46b1eb38 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -175,7 +175,7 @@ ,(if (equal? rett '(core Any)) body (let ((meta (take-while (lambda (x) (and (pair? x) - (memq (car x) '(line meta)))) + (memq (car x) '(lineinfo line meta)))) (cdr body))) (R (make-ssavalue))) `(,(car body) ,@meta @@ -187,7 +187,7 @@ ;; a bound is #f if not specified (define (analyze-typevar e) (define (check-sym s) - (if (symbol? s) + (if (symbol? (unescape s)) ; unescape for macroexpand.scm use s (error (string "invalid type parameter name \"" (deparse s) "\"")))) (cond ((atom? e) (list (check-sym e) #f #f)) @@ -1097,7 +1097,7 @@ ;; insert calls to convert() in ccall, and pull out expressions that might ;; need to be rooted before conversion. -(define (lower-ccall name RT atypes args cconv) +(define (lower-ccall name RT atypes args cconv nreq) (let loop ((F atypes) ;; formals (A args) ;; actuals (stmts '()) ;; initializers @@ -1114,13 +1114,15 @@ (if (null? A) `(block ,.(reverse! stmts) - (foreigncall ,name ,RT (call (core svec) ,@(reverse! T)) - ,(if isseq (- (length atypes) 1) 0) ; 0 or number of arguments before ... in definition + (foreigncall ,(expand-forms name) ,(expand-forms RT) (call (core svec) ,@(reverse! T)) + ;; 0 or number of arguments before ... in definition + ,(or nreq + (if isseq (- (length atypes) 1) 0)) ',cconv ,.(reverse! C) ,@GC)) ; GC root ordering is arbitrary - (let* ((a (car A)) - (ty (if isseq (cadar F) (car F)))) + (let* ((a (expand-forms (car A))) + (ty (expand-forms (if isseq (cadar F) (car F))))) (if (and isseq (not (null? (cdr F)))) (error "only the trailing ccall argument type should have \"...\"")) (if (eq? ty 'Any) (loop (if isseq F (cdr F)) (cdr A) stmts (list* '(core Any) T) (list* a C) GC) @@ -2616,7 +2618,9 @@ ((eq? f 'ccall) (if (not (length> e 4)) (error "too few arguments to ccall")) (let* ((cconv (cadddr e)) - (have-cconv (memq cconv '(cdecl stdcall fastcall thiscall llvmcall))) + (have-cconv-expr (and (pair? cconv) (eq? (car cconv) 'cconv))) + (have-cconv (or have-cconv-expr + (memq cconv '(cdecl stdcall fastcall thiscall llvmcall)))) (after-cconv (if have-cconv (cddddr e) (cdddr e))) (name (caddr e)) (RT (car after-cconv)) @@ -2629,9 +2633,13 @@ (eq? (car RT) 'tuple)) (error "ccall argument types must be a tuple; try \"(T,)\" and check if you specified a correct return type") (error "ccall argument types must be a tuple; try \"(T,)\""))) - (expand-forms - (lower-ccall name RT (cdr argtypes) args - (if have-cconv cconv 'ccall)))))) + (lower-ccall name RT (cdr argtypes) args + (if have-cconv + (if have-cconv-expr + (cadr cconv) + cconv) + 'ccall) + (and have-cconv-expr (caddr cconv)))))) ((any kwarg? (cddr e)) ;; f(..., a=b, ...) (expand-forms (lower-kw-call f (cddr e)))) ((has-parameters? (cddr e)) ;; f(...; ...) @@ -3644,12 +3652,22 @@ f(x) = yt(x) (loop (cdr xs)) elt))))) +; try to ignore some metadata expressions for implicit return sometimes +(define (only-meta? blk) + (let loop ((xs blk)) + (if (null? xs) + #t + (let ((elt (car xs))) + (if (and (pair? elt) (memq (car elt) '(lineinfo line loopinfo))) + (loop (cdr xs)) + #f))))) + ;; return `body` with `stmts` inserted after any meta nodes (define (insert-after-meta body stmts) (if (null? stmts) body (let ((meta (take-while (lambda (x) (and (pair? x) - (memq (car x) '(line meta)))) + (memq (car x) '(lineinfo line meta)))) (cdr body)))) `(,(car body) ,@meta @@ -3681,7 +3699,7 @@ f(x) = yt(x) (lambda (x) (and (pair? x) (not (eq? (car x) 'lambda))))))) (define lambda-opt-ignored-exprs - (Set '(quote top core line inert local-def unnecessary copyast + (Set '(quote top core lineinfo line inert local-def unnecessary copyast meta inbounds boundscheck loopinfo decl aliasscope popaliasscope thunk with-static-parameters toplevel-only global globalref outerref const-if-global thismodule @@ -3911,7 +3929,7 @@ f(x) = yt(x) ((atom? e) e) (else (case (car e) - ((quote top core globalref outerref thismodule line break inert module toplevel null true false meta) e) + ((quote top core globalref outerref thismodule lineinfo line break inert module toplevel null true false meta) e) ((toplevel-only) ;; hack to avoid generating a (method x) expr for struct types (if (eq? (cadr e) 'struct) @@ -4545,12 +4563,14 @@ f(x) = yt(x) (file-diff (not (eq? fname last-fname))) ;; don't need a filename node for start of function (need-meta (and file-diff last-fname - (not (eq? e (lam:body lam)))))) + (not (eq? e (lam:body lam))))) + (emit-final-meta (lambda ()))) (if file-diff (set! filename fname)) (if need-meta (emit `(meta push_loc ,fname))) (let ((v (let loop ((xs (cdr e))) - (if (null? (cdr xs)) - (compile (car xs) break-labels value tail) + (if (only-meta? (cdr xs)) + (begin (set! emit-final-meta (lambda () (map (lambda (v) (compile v break-labels #f #f)) (cdr xs)))) + (compile (car xs) break-labels value tail)) (begin (compile (car xs) break-labels #f #f) (loop (cdr xs))))))) (if need-meta @@ -4566,6 +4586,7 @@ f(x) = yt(x) (let ((tmp (make-ssavalue))) (emit `(= ,tmp ,retv)) (set! retv tmp))) + (emit-final-meta) (emit '(meta pop_loc)) (emit `(return ,retv))) (emit '(meta pop_loc)))) @@ -4573,9 +4594,12 @@ f(x) = yt(x) (let ((tmp (make-ssavalue))) (emit `(= ,tmp ,v)) (set! v tmp) + (emit-final-meta) (emit `(meta pop_loc)))) (else - (emit `(meta pop_loc))))) + (emit-final-meta) + (emit `(meta pop_loc)))) + (emit-final-meta)) (if file-diff (set! filename last-fname)) v))) ((return) @@ -4821,7 +4845,7 @@ f(x) = yt(x) (cons (car e) args))) ;; metadata expressions - ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline) + ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline) (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return)))) (cond ((eq? (car e) 'line) (set! current-loc e) @@ -5090,8 +5114,8 @@ f(x) = yt(x) (define *current-desugar-loc* #f) -(define (julia-expand0 ex file line) - (with-bindings ((*current-desugar-loc* `(line ,line ,file))) +(define (julia-expand0 ex lno) + (with-bindings ((*current-desugar-loc* lno)) (trycatch (expand-forms ex) (lambda (e) (if (and (pair? e) (eq? (car e) 'error)) @@ -5106,4 +5130,4 @@ f(x) = yt(x) (define (julia-expand ex (file 'none) (line 0)) (julia-expand1 (julia-expand0 - (julia-expand-macroscope ex) file line) file line)) + (julia-expand-macroscope ex) `(line ,line ,file)) file line)) diff --git a/src/julia.expmap b/src/julia.expmap.in similarity index 89% rename from src/julia.expmap rename to src/julia.expmap.in index 94b955e95981f..484c83a4b16b2 100644 --- a/src/julia.expmap +++ b/src/julia.expmap.in @@ -1,4 +1,4 @@ -{ +@JULIA_SHLIB_SYMBOL_VERSION@ { global: pthread*; __stack_chk_guard; @@ -30,12 +30,11 @@ _Z24jl_coverage_data_pointerN4llvm9StringRefEi; _Z22jl_coverage_alloc_lineN4llvm9StringRefEi; _Z22jl_malloc_data_pointerN4llvm9StringRefEi; + _jl_timing_*; LLVMExtra*; + JLJIT*; llvmGetPassPluginInfo; - /* Make visible so that linker will merge duplicate definitions across DSO boundaries */ - _ZN4llvm3Any6TypeId*; - /* freebsd */ environ; __progname; @@ -43,3 +42,9 @@ local: *; }; + +@LLVM_SHLIB_SYMBOL_VERSION@ { + global: + /* Make visible so that linker will merge duplicate definitions across DSO boundaries */ + _ZN4llvm3Any6TypeId*; +}; diff --git a/src/julia.h b/src/julia.h index 77a95bf625b80..99af065a3c1a7 100644 --- a/src/julia.h +++ b/src/julia.h @@ -302,6 +302,7 @@ typedef struct _jl_code_info_t { uint8_t inferred; uint8_t propagate_inbounds; uint8_t has_fcall; + uint8_t nospecializeinfer; // uint8 settings uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none @@ -359,6 +360,7 @@ typedef struct _jl_method_t { // various boolean properties uint8_t isva; uint8_t is_for_opaque_closure; + uint8_t nospecializeinfer; // uint8 settings uint8_t constprop; // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none uint8_t max_varargs; // 0xFF = use heuristic; otherwise, max # of args to expand @@ -1792,6 +1794,8 @@ JL_DLLIMPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT; extern JL_DLLIMPORT int jl_n_threadpools; extern JL_DLLIMPORT _Atomic(int) jl_n_threads; extern JL_DLLIMPORT int jl_n_gcthreads; +extern int jl_n_markthreads; +extern int jl_n_sweepthreads; extern JL_DLLIMPORT int *jl_n_threads_per_pool; // environment entries @@ -2008,6 +2012,7 @@ JL_DLLEXPORT void jl_sigatomic_end(void); // tasks and exceptions ------------------------------------------------------- typedef struct _jl_timing_block_t jl_timing_block_t; +typedef struct _jl_timing_event_t jl_timing_event_t; typedef struct _jl_excstack_t jl_excstack_t; // info describing an exception handler @@ -2369,6 +2374,7 @@ typedef struct { // limited, standalone int safepoint_on_entry; // Emit a safepoint on entry to each function + int gcstack_arg; // Pass the ptls value as an argument with swiftself // Cache access. Default: jl_rettype_inferred. jl_codeinstance_lookup_t lookup; diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h index 08f465badf8d3..1c0929717b293 100644 --- a/src/julia_fasttls.h +++ b/src/julia_fasttls.h @@ -3,6 +3,13 @@ #ifndef JL_FASTTLS_H #define JL_FASTTLS_H +#ifdef __cplusplus +#include +#define _Atomic(T) std::atomic +#else +#include +#endif + // Thread-local storage access #ifdef __cplusplus @@ -25,6 +32,7 @@ typedef jl_gcframe_t **(jl_get_pgcstack_func)(void); #if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_) #define JULIA_DEFINE_FAST_TLS \ static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec; \ +JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore; \ JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack_static(void) \ { \ return jl_pgcstack_localexec; \ diff --git a/src/julia_internal.h b/src/julia_internal.h index cbd0bf7750251..737553ec98845 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -350,7 +350,6 @@ void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; -void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v); void gc_sweep_sysimg(void); void jl_gc_notify_image_load(const char* img_data, size_t len); @@ -661,6 +660,7 @@ typedef struct { uint8_t inferred:1; uint8_t propagate_inbounds:1; uint8_t has_fcall:1; + uint8_t nospecializeinfer:1; uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none } jl_code_info_flags_bitfield_t; @@ -766,7 +766,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_datatype_t *jl_new_uninitialized_datatype(void); void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable); JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t); // x -> Type{x} -jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n); +jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check); void jl_reinstantiate_inner_types(jl_datatype_t *t); jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type); void jl_cache_type_(jl_datatype_t *type); @@ -796,7 +796,8 @@ jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e, jl_code_info_t *src, jl_svec_t *sparam_vals); JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT; -jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule); +jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr, + jl_module_t *inmodule, const char *file, int line); jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world); @@ -937,6 +938,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3; } extern _Atomic(uint32_t) jl_gc_running; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. @@ -1262,7 +1264,7 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA return itr-2 - jl_excstack_bt_size(stack, itr); } // Exception stack manipulation -void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, +void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, jl_value_t *exception JL_ROOTED_ARGUMENT, jl_bt_element_t *bt_data, size_t bt_size); @@ -1607,6 +1609,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym; extern JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym; extern JL_DLLEXPORT jl_sym_t *jl_purity_sym; extern JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym; +extern JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym; extern JL_DLLEXPORT jl_sym_t *jl_macrocall_sym; extern JL_DLLEXPORT jl_sym_t *jl_colon_sym; extern JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym; @@ -1710,16 +1713,16 @@ typedef struct { } jl_llvmf_dump_t; JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world, - char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary); + char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary); JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params); -JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); +JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary); JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo); -JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); +JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw); JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world); JL_DLLIMPORT void jl_dump_native(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len, ios_t *s); + ios_t *z, ios_t *s); JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs); JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs); JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode, diff --git a/src/julia_threads.h b/src/julia_threads.h index 3e9db5b676577..cbbffe68b71a9 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -202,6 +202,7 @@ typedef struct { } jl_gc_mark_cache_t; struct _jl_bt_element_t; +struct _jl_gc_pagemeta_t; // This includes all the thread local states we care about for a thread. // Changes to TLS field types must be reflected in codegen. @@ -264,6 +265,8 @@ typedef struct _jl_tls_states_t { #endif jl_thread_t system_id; arraylist_t finalizers; + struct _jl_gc_pagemeta_t *page_metadata_allocd; + struct _jl_gc_pagemeta_t *page_metadata_lazily_freed; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; arraylist_t sweep_objs; diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index acb2d673d6760..b87a5a6799b0b 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -433,36 +433,45 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig) abort(); } #endif - // Record extra BBs that contain invisible uses. + + // Record extra BBs that contain invisible uses with gc_preserve_{begin,end}. + // We traverse the dominator tree starting at each `gc_preserve_begin` and marking blocks + // as users until a corresponding `gc_preserve_end` is found. Blocks containing + // the `gc_preserve_end` have already been marked in the previous step. SmallSet extra_use; SmallVector*, 8> dominated; for (auto preserve: use_info.preserves) { - for (auto RN = DT.getNode(preserve->getParent()); RN; - RN = dominated.empty() ? nullptr : dominated.pop_back_val()) { - for (auto N: *RN) { - auto bb = N->getBlock(); - if (extra_use.count(bb)) - continue; - bool ended = false; - for (auto end: preserve->users()) { - auto end_bb = cast(end)->getParent(); - auto end_node = DT.getNode(end_bb); - if (end_bb == bb || (end_node && DT.dominates(end_node, N))) { - ended = true; - break; - } + assert(dominated.empty()); + dominated.push_back(DT.getNode(preserve->getParent())); + while (!dominated.empty()) { + auto N = dominated.pop_back_val(); + if (!N) { + dominated.clear(); + break; + } + auto bb = N->getBlock(); + if (extra_use.count(bb)) + continue; + bool ended = false; + for (auto end: preserve->users()) { + auto end_bb = cast(end)->getParent(); + auto end_node = DT.getNode(end_bb); + if (end_bb == bb || (end_node && DT.dominates(end_node, N))) { + ended = true; + break; } - if (ended) - continue; - bbs.insert(bb); - extra_use.insert(bb); - dominated.push_back(N); } + if (ended) + continue; + bbs.insert(bb); + extra_use.insert(bb); + dominated.append(N->begin(), N->end()); } - assert(dominated.empty()); } + // For each BB, find the first instruction(s) where the allocation is possibly dead. // If all successors are live, then there isn't one. + // If the BB has "invisible" uses, then there isn't one. // If all successors are dead, then it's the first instruction after the last use // within the BB. // If some successors are live and others are dead, it's the first instruction in @@ -723,6 +732,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) auto replace_i = new_i; Type *new_t = new_i->getType(); if (cast_t != new_t) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(cast_t->getContext().supportsTypedPointers()); replace_i = new BitCastInst(replace_i, cast_t, "", user); replace_i->setDebugLoc(user->getDebugLoc()); replace_i->takeName(user); diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 45637a4c567f6..77f1baf6237c4 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -118,7 +118,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT } } -PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM) +PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM) { if (lowerCPUFeatures(M)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp index 6ff7feaa550c8..b2428860c2882 100644 --- a/src/llvm-demote-float16.cpp +++ b/src/llvm-demote-float16.cpp @@ -51,8 +51,12 @@ namespace { static bool have_fp16(Function &caller, const Triple &TT) { Attribute FSAttr = caller.getFnAttribute("target-features"); - StringRef FS = - FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString(); + StringRef FS = ""; + if (FSAttr.isValid()) + FS = FSAttr.getValueAsString(); + else if (jl_ExecutionEngine) + FS = jl_ExecutionEngine->getTargetFeatureString(); + // else probably called from opt, just do nothing if (TT.isAArch64()) { if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){ return true; @@ -62,6 +66,9 @@ static bool have_fp16(Function &caller, const Triple &TT) { return true; } } + if (caller.hasFnAttribute("julia.hasfp16")) { + return true; + } return false; } @@ -187,7 +194,7 @@ static bool demoteFloat16(Function &F) } // end anonymous namespace -PreservedAnalyses DemoteFloat16::run(Function &F, FunctionAnalysisManager &AM) +PreservedAnalyses DemoteFloat16Pass::run(Function &F, FunctionAnalysisManager &AM) { if (demoteFloat16(F)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 3f644a365a86c..4cf6aa7f8ee8a 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -94,37 +94,16 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) unsigned nRoots = cast(target->getArgOperand(0))->getLimitedValue(INT_MAX); // Create the GC frame. - unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace(); - AllocaInst *gcframe_alloca = new AllocaInst( - T_prjlvalue, - allocaAddressSpace, - ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2), - Align(16)); - gcframe_alloca->insertAfter(target); - Instruction *gcframe; - if (allocaAddressSpace) { - // addrspacecast as needed for non-0 alloca addrspace - gcframe = new AddrSpaceCastInst(gcframe_alloca, T_prjlvalue->getPointerTo(0)); - gcframe->insertAfter(gcframe_alloca); - } else { - gcframe = gcframe_alloca; - } + IRBuilder<> builder(target->getNextNode()); + auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2)); + gcframe_alloca->setAlignment(Align(16)); + // addrspacecast as needed for non-0 alloca addrspace + auto gcframe = cast(builder.CreateAddrSpaceCast(gcframe_alloca, T_prjlvalue->getPointerTo(0))); gcframe->takeName(target); // Zero out the GC frame. - BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), ""); - tempSlot_i8->insertAfter(gcframe); - Type *argsT[2] = {tempSlot_i8->getType(), Type::getInt32Ty(F.getContext())}; - Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT)); - Value *args[4] = { - tempSlot_i8, // dest - ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val - ConstantInt::get(Type::getInt32Ty(F.getContext()), sizeof(jl_value_t*) * (nRoots + 2)), // len - ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile - CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args)); - cast(zeroing)->setDestAlignment(Align(16)); - zeroing->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - zeroing->insertAfter(tempSlot_i8); + auto ptrsize = F.getParent()->getDataLayout().getPointerSize(); + builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe); return gcframe; } diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp index fc867252318c5..8e03fe434a79c 100644 --- a/src/llvm-julia-licm.cpp +++ b/src/llvm-julia-licm.cpp @@ -176,7 +176,7 @@ struct JuliaLICM : public JuliaPassContext { // Lazy initialization of exit blocks insertion points. bool exit_pts_init = false; SmallVector _exit_pts; - auto get_exit_pts = [&] () -> ArrayRef { + auto get_exit_pts = [&] () -> MutableArrayRef { if (!exit_pts_init) { exit_pts_init = true; SmallVector exit_bbs; @@ -242,6 +242,7 @@ struct JuliaLICM : public JuliaPassContext { } ++SunkPreserveEnd; moveInstructionBefore(*call, *exit_pts[0], MSSAU, SE, MemorySSA::Beginning); + exit_pts[0] = call; LLVM_DEBUG(dbgs() << "Sunk gc_preserve_end: " << *call << "\n"); REMARK([&](){ return OptimizationRemark(DEBUG_TYPE, "Sunk", call) @@ -250,6 +251,7 @@ struct JuliaLICM : public JuliaPassContext { for (unsigned i = 1; i < exit_pts.size(); i++) { // Clone exit auto CI = CallInst::Create(call, {}, exit_pts[i]); + exit_pts[i] = CI; createNewInstruction(CI, call, MSSAU); LLVM_DEBUG(dbgs() << "Cloned and sunk gc_preserve_end: " << *CI << "\n"); REMARK([&](){ @@ -345,11 +347,8 @@ struct JuliaLICM : public JuliaPassContext { auto align = Align(DL.getPointerSize(0)); auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align); if (MSSAU.getMemorySSA()) { - auto alloc_mdef = MSSAU.getMemorySSA()->getMemoryAccess(call); - assert(isa(alloc_mdef) && "Expected alloc to be associated with a memory def!"); - auto clear_mdef = MSSAU.createMemoryAccessAfter(clear_obj, nullptr, alloc_mdef); - assert(isa(clear_mdef) && "Expected memset to be associated with a memory def!"); - (void) clear_mdef; + auto clear_mdef = MSSAU.createMemoryAccessInBB(clear_obj, nullptr, clear_obj->getParent(), MemorySSA::BeforeTerminator); + MSSAU.insertDef(cast(clear_mdef), true); } changed = true; } diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index 18e0f645d5445..39030d60a44fc 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -1,27 +1,27 @@ //Module passes #ifdef MODULE_PASS -MODULE_PASS("CPUFeatures", CPUFeatures()) -MODULE_PASS("RemoveNI", RemoveNI()) -MODULE_PASS("LowerSIMDLoop", LowerSIMDLoop()) -MODULE_PASS("FinalLowerGC", FinalLowerGCPass()) -MODULE_PASS("JuliaMultiVersioning", MultiVersioning()) -MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) -MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) -MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) +MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) +MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) +MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) +MODULE_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) +MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) +MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass()) +MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass()) #endif //Function passes #ifdef FUNCTION_PASS -FUNCTION_PASS("DemoteFloat16", DemoteFloat16()) -FUNCTION_PASS("CombineMulAdd", CombineMulAdd()) -FUNCTION_PASS("LateLowerGCFrame", LateLowerGC()) -FUNCTION_PASS("AllocOpt", AllocOptPass()) -FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) -FUNCTION_PASS("LowerExcHandlers", LowerExcHandlers()) -FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) +FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass()) +FUNCTION_PASS("CombineMulAdd", CombineMulAddPass, CombineMulAddPass()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass()) +FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass()) +FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass()) +FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass()) #endif //Loop passes #ifdef LOOP_PASS -LOOP_PASS("JuliaLICM", JuliaLICMPass()) +LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) #endif diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 2bf340be13b62..48b005edfb6ef 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -702,8 +702,11 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) { ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i), "", SI); } - if (FalseElem->getType() != TrueElem->getType()) + if (FalseElem->getType() != TrueElem->getType()) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(FalseElem->getContext().supportsTypedPointers()); FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI); + } SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI); int Number = ++S.MaxPtrNumber; S.AllPtrNumbering[SelectBase] = Number; @@ -776,6 +779,8 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) { else BaseElem = IncomingBases[i]; if (BaseElem->getType() != T_prjlvalue) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(BaseElem->getContext().supportsTypedPointers()); auto &remap = CastedRoots[i][BaseElem]; if (!remap) { if (auto constant = dyn_cast(BaseElem)) { @@ -2645,8 +2650,11 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor // Pointee types don't have semantics, so the optimizer is // free to rewrite them if convenient. We need to change // it back here for the store. - if (Val->getType() != T_prjlvalue) + if (Val->getType() != T_prjlvalue) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(Val->getContext().supportsTypedPointers()); Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore); + } new StoreInst(Val, slotAddress, InsertBefore); } @@ -2727,6 +2735,8 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State } if (slotAddress->getType() != AI->getType()) { // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(slotAddress->getContext().supportsTypedPointers()); auto BCI = new BitCastInst(slotAddress, AI->getType()); BCI->insertAfter(slotAddress); slotAddress = BCI; @@ -2755,8 +2765,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State slotAddress->insertAfter(gcframe); auto ValExpr = std::make_pair(Base, isa(Base->getType()) ? -1 : i); auto Elem = MaybeExtractScalar(S, ValExpr, SI); - if (Elem->getType() != T_prjlvalue) + if (Elem->getType() != T_prjlvalue) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(Elem->getContext().supportsTypedPointers()); Elem = new BitCastInst(Elem, T_prjlvalue, "", SI); + } //auto Idxs = makeArrayRef(Tracked[i]); //Value *Elem = ExtractScalar(Base, true, Idxs, SI); Value *shadowStore = new StoreInst(Elem, slotAddress, SI); @@ -2814,7 +2827,7 @@ bool LateLowerGCFrameLegacy::runOnFunction(Function &F) { return modified; } -PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM) +PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM) { auto GetDT = [&AM, &F]() -> DominatorTree & { return AM.getResult(F); diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp index 57fb6ab1c7ed6..146c0fe701e9b 100644 --- a/src/llvm-lower-handlers.cpp +++ b/src/llvm-lower-handlers.cpp @@ -236,7 +236,7 @@ static bool lowerExcHandlers(Function &F) { } // anonymous namespace -PreservedAnalyses LowerExcHandlers::run(Function &F, FunctionAnalysisManager &AM) +PreservedAnalyses LowerExcHandlersPass::run(Function &F, FunctionAnalysisManager &AM) { bool modified = lowerExcHandlers(F); #ifdef JL_VERIFY_PASSES diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp index efe0acb36f1fc..29c0f7e2b10d6 100644 --- a/src/llvm-muladd.cpp +++ b/src/llvm-muladd.cpp @@ -40,10 +40,10 @@ STATISTIC(TotalContracted, "Total number of multiplies marked for FMA"); * Combine * ``` * %v0 = fmul ... %a, %b - * %v = fadd fast ... %v0, %c + * %v = fadd contract ... %v0, %c * ``` * to - * `%v = call fast @llvm.fmuladd.<...>(... %a, ... %b, ... %c)` + * `%v = call contract @llvm.fmuladd.<...>(... %a, ... %b, ... %c)` * when `%v0` has no other use */ @@ -87,13 +87,13 @@ static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT it++; switch (I.getOpcode()) { case Instruction::FAdd: { - if (!I.isFast()) + if (!I.hasAllowContract()) continue; modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE); break; } case Instruction::FSub: { - if (!I.isFast()) + if (!I.hasAllowContract()) continue; modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE); break; @@ -109,7 +109,7 @@ static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT return modified; } -PreservedAnalyses CombineMulAdd::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT +PreservedAnalyses CombineMulAddPass::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT { if (combineMulAdd(F)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 814b13554358c..e4ebbe9d3838a 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -1140,7 +1140,7 @@ void multiversioning_preannotate(Module &M) M.addModuleFlag(Module::ModFlagBehavior::Error, "julia.mv.enable", 1); } -PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM) +PreservedAnalyses MultiVersioningPass::run(Module &M, ModuleAnalysisManager &AM) { if (runMultiVersioning(M, external_use)) { auto preserved = PreservedAnalyses::allInSet(); diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp index 2158109cea120..9f6cfa1beb38e 100644 --- a/src/llvm-propagate-addrspaces.cpp +++ b/src/llvm-propagate-addrspaces.cpp @@ -187,6 +187,8 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc if (LiftingMap.count(CurrentV)) CurrentV = LiftingMap[CurrentV]; if (CurrentV->getType() != TargetType) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(CurrentV->getContext().supportsTypedPointers()); auto *BCI = new BitCastInst(CurrentV, TargetType); ToInsert.push_back(std::make_pair(BCI, InsertPt)); CurrentV = BCI; diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 84f8d7121ff03..840efaebee032 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -70,6 +70,7 @@ void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const { + IRBuilder<> builder(insertBefore); Value *tls; if (TargetTriple.isX86() && insertBefore->getFunction()->callsFunctionThatReturnsTwice()) { // Workaround LLVM bug by hiding the offset computation @@ -87,15 +88,15 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor if (offset) { std::vector args(0); args.push_back(offset->getType()); - auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), args, false), + auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false), dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false); - tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore); + tls = builder.CreateCall(tp, {offset}, "pgcstack"); } else { auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}", false); - tls = CallInst::Create(tp, "pgcstack_i8", insertBefore); + tls = builder.CreateCall(tp, {}, "tls_pgcstack"); } } else { // AArch64/ARM doesn't seem to have this issue. @@ -118,12 +119,12 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor } if (!offset) offset = ConstantInt::getSigned(T_size, jl_tls_offset); - auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), asm_str, "=r", false); - tls = CallInst::Create(tp, "thread_ptr", insertBefore); - tls = GetElementPtrInst::Create(Type::getInt8Ty(insertBefore->getContext()), tls, {offset}, "ppgcstack_i8", insertBefore); + auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false); + tls = builder.CreateCall(tp, {}, "thread_ptr"); + tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack"); } - tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore); - return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore); + tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo()); + return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack"); } GlobalVariable *LowerPTLS::create_hidden_global(Type *T, StringRef name) const @@ -153,15 +154,16 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, // if (!retboxed) // foreach(retinst) // emit_gc_unsafe_leave(ctx, last_gc_state); - auto phi = PHINode::Create(pgcstack->getType(), 2, ""); - phi->insertAfter(pgcstack); + IRBuilder<> builder(pgcstack->getNextNode()); + auto phi = builder.CreatePHI(pgcstack->getType(), 2, "pgcstack"); pgcstack->replaceAllUsesWith(phi); MDBuilder MDB(pgcstack->getContext()); SmallVector Weights{9, 1}; TerminatorInst *fastTerm; TerminatorInst *slowTerm; assert(pgcstack->getType()); // Static analyzer - auto cmp = new ICmpInst(phi, CmpInst::ICMP_NE, pgcstack, Constant::getNullValue(pgcstack->getType())); + builder.SetInsertPoint(phi); + auto cmp = builder.CreateICmpNE(pgcstack, Constant::getNullValue(pgcstack->getType())); SplitBlockAndInsertIfThenElse(cmp, phi, &fastTerm, &slowTerm, MDB.createBranchWeights(Weights)); if (CFGModified) @@ -180,7 +182,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, adopt->insertBefore(slowTerm); phi->addIncoming(adopt, slowTerm->getParent()); // emit fast branch code - IRBuilder<> builder(fastTerm->getParent()); + builder.SetInsertPoint(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true); @@ -194,7 +196,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, last_gc_state->addIncoming(prior, fastTerm->getParent()); for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { - IRBuilder<> builder(BB.getTerminator()); + builder.SetInsertPoint(BB.getTerminator()); emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true); } } @@ -202,16 +204,16 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, } if (imaging_mode) { + IRBuilder<> builder(pgcstack); if (jl_tls_elf_support) { // if (offset != 0) // pgcstack = tp + offset; // fast // else // pgcstack = getter(); // slow - auto offset = new LoadInst(T_size, pgcstack_offset, "", false, pgcstack); + auto offset = builder.CreateLoad(T_size, pgcstack_offset); offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); - auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset, - Constant::getNullValue(offset->getType())); + auto cmp = builder.CreateICmpNE(offset, Constant::getNullValue(offset->getType())); MDBuilder MDB(pgcstack->getContext()); SmallVector Weights{9, 1}; TerminatorInst *fastTerm; @@ -222,10 +224,14 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, *CFGModified = true; auto fastTLS = emit_pgcstack_tp(offset, fastTerm); - auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack); + // refresh the basic block in the builder + builder.SetInsertPoint(pgcstack); + auto phi = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack"); pgcstack->replaceAllUsesWith(phi); pgcstack->moveBefore(slowTerm); - auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack); + // refresh the basic block in the builder + builder.SetInsertPoint(pgcstack); + auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot); getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter); @@ -240,14 +246,14 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, // variable to be filled (in `staticdata.c`) at initialization time of the sysimg. // This way we can bypass the extra indirection in `jl_get_pgcstack` // since we may not know which getter function to use ahead of time. - auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack); + auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot); getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); if (TargetTriple.isOSDarwin()) { - auto key = new LoadInst(T_size, pgcstack_key_slot, "", false, pgcstack); + auto key = builder.CreateLoad(T_size, pgcstack_key_slot); key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); - auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack); + auto new_pgcstack = builder.CreateCall(FT_pgcstack_getter, getter, {key}); new_pgcstack->takeName(pgcstack); pgcstack->replaceAllUsesWith(new_pgcstack); pgcstack->eraseFromParent(); @@ -314,6 +320,19 @@ bool LowerPTLS::run(bool *CFGModified) for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) { auto call = cast(*it); ++it; + auto f = call->getCaller(); + Value *pgcstack = NULL; + for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) { + if (arg->hasSwiftSelfAttr()){ + pgcstack = &*arg; + break; + } + } + if (pgcstack) { + call->replaceAllUsesWith(pgcstack); + call->eraseFromParent(); + continue; + } assert(call->getCalledOperand() == pgcstack_getter); fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified); } diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp index b767074202eb2..5e8f54b98e417 100644 --- a/src/llvm-remove-ni.cpp +++ b/src/llvm-remove-ni.cpp @@ -36,7 +36,7 @@ static bool removeNI(Module &M) JL_NOTSAFEPOINT } } -PreservedAnalyses RemoveNI::run(Module &M, ModuleAnalysisManager &AM) +PreservedAnalyses RemoveNIPass::run(Module &M, ModuleAnalysisManager &AM) { if (removeNI(M)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index fcb05ba7c6805..21e2ec574d650 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -149,7 +149,8 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe return OptimizationRemark(DEBUG_TYPE, "MarkedUnsafeAlgebra", *K) << "marked unsafe algebra on " << ore::NV("Instruction", *K); }); - (*K)->setFast(true); + (*K)->setHasAllowReassoc(true); + (*K)->setHasAllowContract(true); ++length; } ReductionChainLength += length; @@ -282,7 +283,7 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { +namespace orc { +class OrcV2CAPIHelper { +public: + using PoolEntry = orc::SymbolStringPtr::PoolEntry; + using PoolEntryPtr = orc::SymbolStringPtr::PoolEntryPtr; + + // Move from SymbolStringPtr to PoolEntryPtr (no change in ref count). + static PoolEntryPtr moveFromSymbolStringPtr(SymbolStringPtr S) + { + PoolEntryPtr Result = nullptr; + std::swap(Result, S.S); + return Result; + } +}; +} // namespace orc +} // namespace llvm + + +typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef; +typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef; + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry, + LLVMOrcSymbolStringPoolEntryRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility, + LLVMOrcMaterializationResponsibilityRef) + +typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef; +typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef; +typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef; + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef) + +extern "C" { + +JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void) +{ + return wrap(jl_ExecutionEngine); +} + +JL_DLLEXPORT_CODEGEN LLVMOrcExecutionSessionRef +JLJITGetLLVMOrcExecutionSession_impl(JuliaOJITRef JIT) +{ + return wrap(&unwrap(JIT)->getExecutionSession()); +} + +JL_DLLEXPORT_CODEGEN LLVMOrcJITDylibRef +JLJITGetExternalJITDylib_impl(JuliaOJITRef JIT) +{ + return wrap(&unwrap(JIT)->getExternalJITDylib()); +} + +JL_DLLEXPORT_CODEGEN LLVMErrorRef JLJITAddObjectFile_impl( + JuliaOJITRef JIT, LLVMOrcJITDylibRef JD, LLVMMemoryBufferRef ObjBuffer) +{ + return wrap(unwrap(JIT)->addObjectFile( + *unwrap(JD), std::unique_ptr(unwrap(ObjBuffer)))); +} + +JL_DLLEXPORT_CODEGEN LLVMErrorRef JLJITAddLLVMIRModule_impl( + JuliaOJITRef JIT, LLVMOrcJITDylibRef JD, LLVMOrcThreadSafeModuleRef TSM) +{ + std::unique_ptr TmpTSM(unwrap(TSM)); + return wrap(unwrap(JIT)->addExternalModule(*unwrap(JD), std::move(*TmpTSM))); +} + +JL_DLLEXPORT_CODEGEN LLVMErrorRef +JLJITLookup_impl(JuliaOJITRef JIT, LLVMOrcExecutorAddress *Result, + const char *Name, int ExternalJDOnly) +{ + auto Sym = unwrap(JIT)->findExternalJDSymbol(Name, ExternalJDOnly); + if (Sym) { + auto addr = Sym->getAddress(); + *Result = orc::ExecutorAddr(addr).getValue(); + return LLVMErrorSuccess; + } + else { + *Result = 0; + return wrap(Sym.takeError()); + } +} + +JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef +JLJITMangleAndIntern_impl(JuliaOJITRef JIT, + const char *Name) +{ + return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name))); +} + +JL_DLLEXPORT_CODEGEN const char * +JLJITGetTripleString_impl(JuliaOJITRef JIT) +{ + return unwrap(JIT)->getTargetTriple().str().c_str(); +} + +JL_DLLEXPORT_CODEGEN const char +JLJITGetGlobalPrefix_impl(JuliaOJITRef JIT) +{ + return unwrap(JIT)->getDataLayout().getGlobalPrefix(); +} + +JL_DLLEXPORT_CODEGEN const char * +JLJITGetDataLayoutString_impl(JuliaOJITRef JIT) +{ + return unwrap(JIT)->getDataLayout().getStringRepresentation().c_str(); +} + +JL_DLLEXPORT_CODEGEN LLVMOrcIRCompileLayerRef +JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT) +{ + return wrap(&unwrap(JIT)->getIRCompileLayer()); +} + +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \ + { \ + unwrap(PM)->addPass(CREATE_PASS); \ + } +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \ + { \ + unwrap(PM)->addPass(CREATE_PASS); \ + } +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \ + { \ + unwrap(PM)->addPass(CREATE_PASS); \ + } + +#include "llvm-julia-passes.inc" + +#undef MODULE_PASS +#undef CGSCC_PASS +#undef FUNCTION_PASS +#undef LOOP_PASS + +} // extern "C" diff --git a/src/mach_dyld_atfork.tbd b/src/mach_dyld_atfork.tbd deleted file mode 100644 index c2cda4417ec38..0000000000000 --- a/src/mach_dyld_atfork.tbd +++ /dev/null @@ -1,26 +0,0 @@ ---- !tapi-tbd -# copied from XCode's libSystem.tbd (current-version: 1311) -# to provide weak-linkage info for new symbols on old systems -tbd-version: 4 -targets: [ x86_64-macos, x86_64-maccatalyst, arm64-macos, arm64-maccatalyst, - arm64e-macos, arm64e-maccatalyst ] -uuids: - - target: x86_64-macos - value: AFE6C76A-B47A-35F5-91D0-4E9FC439E90D - - target: x86_64-maccatalyst - value: AFE6C76A-B47A-35F5-91D0-4E9FC439E90D - - target: arm64-macos - value: 2EA09BDB-811B-33AA-BB58-4B53AA2DB522 - - target: arm64-maccatalyst - value: 2EA09BDB-811B-33AA-BB58-4B53AA2DB522 - - target: arm64e-macos - value: 09AB3723-C26D-3762-93BA-98E9C38B89C1 - - target: arm64e-maccatalyst - value: 09AB3723-C26D-3762-93BA-98E9C38B89C1 -install-name: '/usr/lib/libSystem.B.dylib' -exports: - - targets: [ arm64-macos, arm64e-macos, x86_64-macos, x86_64-maccatalyst, - arm64-maccatalyst, arm64e-maccatalyst ] - symbols: [ __dyld_atfork_parent, __dyld_atfork_prepare, - __dyld_dlopen_atfork_parent, __dyld_dlopen_atfork_prepare ] -... diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 2933ca4888c4e..e0e809eee08f1 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -99,31 +99,32 @@ (vars '())) (if (null? binds) (cons 'varlist vars) - (cond - ((or (symbol? (car binds)) (decl? (car binds))) - ;; just symbol -> add local - (loop (cdr binds) - (cons (decl-var (car binds)) vars))) - ((and (length= (car binds) 3) - (eq? (caar binds) '=)) - ;; some kind of assignment - (cond - ((or (symbol? (cadar binds)) - (decl? (cadar binds))) - ;; a=b -> add argument - (loop (cdr binds) - (cons (decl-var (cadar binds)) vars))) - ((eventually-call? (cadar binds)) - ;; f()=c - (let ((asgn (cadr (julia-expand0 (car binds) 'none 0)))) - (loop (cdr binds) - (cons (cadr asgn) vars)))) - ((and (pair? (cadar binds)) - (eq? (caadar binds) 'tuple)) - (loop (cdr binds) - (append (map decl-var (lhs-vars (cadar binds))) vars))) - (else '()))) - (else '()))))) + (let ((ux (unescape (car binds)))) + (cond + ((or (symbol? ux) (decl? ux)) + ;; just symbol -> add local + (loop (cdr binds) + (cons (let-decl-var ux) vars))) + ((and (length= (car binds) 3) + (eq? (caar binds) '=)) + (set! ux (unescape (cadar binds))) + ;; some kind of assignment + (cond + ((or (symbol? ux) (decl? ux)) + ;; a=b -> add argument + (loop (cdr binds) + (cons (let-decl-var ux) vars))) + ((eventually-call? (cadar binds)) + ;; f()=c + (let ((name (assigned-name (cadar binds)))) + (loop (cdr binds) + (cons name vars)))) + ((and (pair? (cadar binds)) + (eq? (caadar binds) 'tuple)) + (loop (cdr binds) + (append (map let-decl-var (lhs-vars (cadar binds))) vars))) + (else '()))) + (else '())))))) ;; macro definition (pattern-lambda (macro (call name . argl) body) @@ -180,12 +181,12 @@ (define (unescape e) (if (and (pair? e) (eq? (car e) 'escape)) - (cadr e) + (unescape (cadr e)) e)) (define (unescape-global-lhs e env m parent-scope inarg) (cond ((not (pair? e)) e) - ((eq? (car e) 'escape) (cadr e)) + ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m parent-scope inarg)) ((memq (car e) '(parameters tuple)) (list* (car e) (map (lambda (e) (unescape-global-lhs e env m parent-scope inarg)) @@ -207,7 +208,7 @@ ((eq? (car e) 'curly) (cddr e)) (else '()))) -(define (typevar-expr-name e) (car (analyze-typevar e))) +(define (typevar-expr-name e) (unescape (car (analyze-typevar e)))) ;; get the list of names from a list of `where` variable expressions (define (typevar-names lst) @@ -276,13 +277,13 @@ (list (cadr name)) '())) -;; resolve-expansion-vars-with-new-env, but turn on `inarg` once we get inside -;; the formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`, +;; resolve-expansion-vars-with-new-env, but turn on `inarg` if we get inside +;; a formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`, ;; and we want `inarg` to be true for the `(x)` part. -(define (resolve-in-function-lhs e env m parent-scope inarg) - (define (recur x) (resolve-in-function-lhs x env m parent-scope inarg)) +(define (resolve-in-lhs e env m parent-scope inarg) + (define (recur x) (resolve-in-lhs x env m parent-scope inarg)) (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg)) - (case (car e) + (case (and (pair? e) (car e)) ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e)))) ((|::|) `(|::| ,(recur (cadr e)) ,(other (caddr e)))) ((call) `(call ,(other (cadr e)) @@ -337,6 +338,11 @@ (new-expansion-env-for x env outermost)) m parent-scope inarg)) +(define (reescape ux x) + (if (and (pair? x) (eq? (car x) 'escape)) + (reescape '(escape ,ux) (cadr x))) + ux) + (define (resolve-expansion-vars- e env m parent-scope inarg) (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e)) e) @@ -374,36 +380,35 @@ ;; type has special behavior: identifiers inside are ;; field names, not expressions. ,(map (lambda (x) - (cond ((atom? x) x) - ((and (pair? x) (eq? (car x) '|::|)) - `(|::| ,(cadr x) - ,(resolve-expansion-vars- (caddr x) env m parent-scope inarg))) - (else - (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))) + (let ((ux (unescape x))) + (cond ((atom? ux) ux) + ((and (pair? ux) (eq? (car ux) '|::|)) + `(|::| ,(unescape (cadr ux)) + ,(resolve-expansion-vars- (reescape (caddr ux) x) env m parent-scope inarg))) + (else + (resolve-expansion-vars-with-new-env x env m parent-scope inarg))))) (cadddr e)))) ((parameters) (cons 'parameters (map (lambda (x) ;; `x` by itself after ; means `x=x` - (let ((x (if (and (not inarg) (symbol? x)) - `(kw ,x ,x) - x))) + (let* ((ux (unescape x)) + (x (if (and (not inarg) (symbol? ux)) + `(kw ,ux ,x) + x))) (resolve-expansion-vars- x env m parent-scope #f))) (cdr e)))) ((->) - `(-> ,(resolve-in-function-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg) + `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg) ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))) ((= function) - (if (and (pair? (cadr e)) (function-def? e) (length> e 2)) - ;; in (kw x 1) inside an arglist, the x isn't actually a kwarg - `(,(car e) ,(resolve-in-function-lhs (cadr e) env m parent-scope inarg) - ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)) - `(,(car e) ,@(map (lambda (x) - (resolve-expansion-vars-with-new-env x env m parent-scope inarg)) - (cdr e))))) + `(,(car e) ,(resolve-in-lhs (cadr e) env m parent-scope inarg) + ,@(map (lambda (x) + (resolve-expansion-vars-with-new-env x env m parent-scope inarg)) + (cddr e)))) ((kw) (cond @@ -442,13 +447,14 @@ newenv m parent-scope inarg)) ;; expand initial values in old env (resolve-expansion-vars- (caddr bind) env m parent-scope inarg)) - bind)) + (resolve-expansion-vars- bind newenv m parent-scope inarg))) binds)) ,body))) ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly (let ((parent-scope (cons (list env m) parent-scope)) (body (cadr e)) - (m (caddr e))) + (m (caddr e)) + (lno (cdddr e))) (resolve-expansion-vars-with-new-env body env m parent-scope inarg #t))) ((tuple) (cons (car e) @@ -470,13 +476,14 @@ (define (decl-var* e) (if (pair? e) (case (car e) + ((hygienic-scope) '()) ((escape) '()) ((call) (decl-var* (cadr e))) ((=) (decl-var* (cadr e))) ((curly) (decl-var* (cadr e))) ((|::|) (if (length= e 2) '() (decl-var* (cadr e)))) ((where) (decl-var* (cadr e))) - (else (decl-var e))) + (else e)) e)) (define (decl-vars* e) @@ -484,6 +491,17 @@ (apply append (map decl-vars* (cdr e))) (list (decl-var* e)))) +;; decl-var that can sort of handle scope hygiene, but very badly +(define (let-decl-var e) + (if (pair? e) + (case (car e) + ((hygienic-scope) (let-decl-var (cadr e))) + ((escape) (let-decl-var (cadr e))) + ((|::|) (if (length= e 2) '() (let-decl-var (cadr e)))) + (else e)) + e)) + + ;; count hygienic / escape pairs ;; and fold together a list resulting from applying the function to ;; any block at the same hygienic scope @@ -574,7 +592,8 @@ ((eq? (car e) 'module) e) ((eq? (car e) 'hygienic-scope) (let ((form (cadr e)) ;; form is the expression returned from expand-macros - (modu (caddr e))) ;; m is the macro's def module + (modu (caddr e)) ;; m is the macro's def module + (lno (cdddr e))) ;; lno is (optionally) the line number node (resolve-expansion-vars form modu))) (else (map julia-expand-macroscopes- e)))) @@ -585,8 +604,9 @@ ((eq? (car e) 'hygienic-scope) (let ((parent-scope (list relabels parent-scope)) (body (cadr e)) - (m (caddr e))) - `(hygienic-scope ,(rename-symbolic-labels- (cadr e) (table) parent-scope) ,m))) + (m (caddr e)) + (lno (cdddr e))) + `(hygienic-scope ,(rename-symbolic-labels- (cadr e) (table) parent-scope) ,m ,@lno))) ((and (eq? (car e) 'escape) (not (null? parent-scope))) `(escape ,(apply rename-symbolic-labels- (cadr e) parent-scope))) ((or (eq? (car e) 'symbolicgoto) (eq? (car e) 'symboliclabel)) @@ -611,9 +631,5 @@ (rename-symbolic-labels (julia-expand-quotes e)))) -(define (contains-macrocall e) - (and (pair? e) - (contains (lambda (e) (and (pair? e) (eq? (car e) 'macrocall))) e))) - (define (julia-bq-macro x) (julia-bq-expand x 0)) diff --git a/src/method.c b/src/method.c index c207149032fb9..06a05361a927d 100644 --- a/src/method.c +++ b/src/method.c @@ -321,6 +321,8 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) li->inlining = 2; else if (ma == (jl_value_t*)jl_propagate_inbounds_sym) li->propagate_inbounds = 1; + else if (ma == (jl_value_t*)jl_nospecializeinfer_sym) + li->nospecializeinfer = 1; else if (ma == (jl_value_t*)jl_aggressive_constprop_sym) li->constprop = 1; else if (ma == (jl_value_t*)jl_no_constprop_sym) @@ -477,6 +479,7 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void) src->inferred = 0; src->propagate_inbounds = 0; src->has_fcall = 0; + src->nospecializeinfer = 0; src->edges = jl_nothing; src->constprop = 0; src->inlining = 0; @@ -569,7 +572,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, siz JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION); jl_value_t *tt = linfo->specTypes; jl_method_t *def = linfo->def.method; - jl_timing_show_method_instance(linfo, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method_instance(linfo, JL_TIMING_DEFAULT_BLOCK); jl_value_t *generator = def->generator; assert(generator != NULL); assert(jl_is_method(def)); @@ -662,7 +665,7 @@ jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_s return new_linfo; } -static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) +JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) { uint8_t j; uint8_t called = 0; @@ -682,6 +685,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) } } m->called = called; + m->nospecializeinfer = src->nospecializeinfer; m->constprop = src->constprop; m->purity.bits = src->purity.bits; jl_add_function_to_lineinfo(src, (jl_value_t*)m->name); @@ -811,6 +815,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->primary_world = 1; m->deleted_world = ~(size_t)0; m->is_for_opaque_closure = 0; + m->nospecializeinfer = 0; m->constprop = 0; m->purity.bits = 0; m->max_varargs = UINT8_MAX; diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 4e7a551dd8381..1175c6a161750 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -365,10 +365,13 @@ void jl_gc_init(void) mmtk_julia_copy_stack_check(copy_stacks); // if only max size is specified initialize MMTk with a fixed size heap + // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads. + // If the two values are the same, we can use either. Otherwise, we need to be careful. + uintptr_t gcthreads = jl_options.nmarkthreads; if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } else { - mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } } diff --git a/src/module.c b/src/module.c index 04d3970f9b460..89c4c6cdb674e 100644 --- a/src/module.c +++ b/src/module.c @@ -239,7 +239,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_ } // TODO: we might want to require explicitly importing types to add constructors // or we might want to drop this error entirely - if (!b->imported && (!b2->constp || !jl_is_type(f))) { + if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) { jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended", jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var)); } diff --git a/src/partr.c b/src/partr.c index 2c729add629e2..37cf9ca310d24 100644 --- a/src/partr.c +++ b/src/partr.c @@ -108,6 +108,82 @@ void jl_init_threadinginfra(void) void JL_NORETURN jl_finish_task(jl_task_t *t); +#ifndef MMTK_GC + +static inline int may_mark(void) JL_NOTSAFEPOINT +{ + return (jl_atomic_load(&gc_n_threads_marking) > 0); +} + +// gc thread mark function +void jl_gc_mark_threadfun(void *arg) +{ + jl_threadarg_t *targ = (jl_threadarg_t*)arg; + + // initialize this thread (set tid and create heap) + jl_ptls_t ptls = jl_init_threadtls(targ->tid); + + // wait for all threads + jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0); + uv_barrier_wait(targ->barrier); + + // free the thread argument here + free(targ); + + while (1) { + uv_mutex_lock(&gc_threads_lock); + while (!may_mark()) { + uv_cond_wait(&gc_threads_cond, &gc_threads_lock); + } + uv_mutex_unlock(&gc_threads_lock); + gc_mark_loop_parallel(ptls, 0); + } +} + +// gc thread sweep function +void jl_gc_sweep_threadfun(void *arg) +{ + jl_threadarg_t *targ = (jl_threadarg_t*)arg; + + // initialize this thread (set tid and create heap) + jl_ptls_t ptls = jl_init_threadtls(targ->tid); + + // wait for all threads + jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0); + uv_barrier_wait(targ->barrier); + + // free the thread argument here + free(targ); + + while (1) { + uv_sem_wait(&gc_sweep_assists_needed); + while (1) { + jl_gc_pagemeta_t *pg = pop_lf_page_metadata_back(&global_page_pool_lazily_freed); + if (pg == NULL) { + break; + } + jl_gc_free_page(pg); + push_lf_page_metadata_back(&global_page_pool_freed, pg); + } + } +} + +#else + +// gc thread mark function +void jl_gc_mark_threadfun(void *arg) +{ + mmtk_unreachable(); +} + +// gc thread sweep function +void jl_gc_sweep_threadfun(void *arg) +{ + mmtk_unreachable(); +} + +#endif + // thread function: used by all mutator threads except the main thread void jl_threadfun(void *arg) { diff --git a/src/passes.h b/src/passes.h index acbfcd9538106..2bb33d6eec60d 100644 --- a/src/passes.h +++ b/src/passes.h @@ -10,16 +10,16 @@ using namespace llvm; // Function Passes -struct DemoteFloat16 : PassInfoMixin { +struct DemoteFloat16Pass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; -struct CombineMulAdd : PassInfoMixin { +struct CombineMulAddPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; }; -struct LateLowerGC : PassInfoMixin { +struct LateLowerGCPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; @@ -33,7 +33,7 @@ struct PropagateJuliaAddrspacesPass : PassInfoMixin { +struct LowerExcHandlersPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; @@ -47,17 +47,19 @@ struct GCInvariantVerifierPass : PassInfoMixin { }; // Module Passes -struct CPUFeatures : PassInfoMixin { +struct CPUFeaturesPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; -struct RemoveNI : PassInfoMixin { +struct RemoveNIPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; + static bool isRequired() { return true; } }; -struct LowerSIMDLoop : PassInfoMixin { +struct LowerSIMDLoopPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; + static bool isRequired() { return true; } }; struct FinalLowerGCPass : PassInfoMixin { @@ -65,9 +67,9 @@ struct FinalLowerGCPass : PassInfoMixin { static bool isRequired() { return true; } }; -struct MultiVersioning : PassInfoMixin { +struct MultiVersioningPass : PassInfoMixin { bool external_use; - MultiVersioning(bool external_use = false) : external_use(external_use) {} + MultiVersioningPass(bool external_use = false) : external_use(external_use) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; @@ -101,4 +103,47 @@ struct JuliaLICMPass : PassInfoMixin { LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; }; +#define MODULE_MARKER_PASS(NAME) \ + struct NAME##MarkerPass : PassInfoMixin { \ + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ + static bool isRequired() { return true; } \ + }; + +#define FUNCTION_MARKER_PASS(NAME) \ + struct NAME##MarkerPass : PassInfoMixin { \ + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ + static bool isRequired() { return true; } \ + }; + +#define LOOP_MARKER_PASS(NAME) \ + struct NAME##MarkerPass : PassInfoMixin { \ + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, \ + LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT { \ + return PreservedAnalyses::all(); \ + } \ + static bool isRequired() { return true; } \ + }; + +// These are useful for debugging with --print-before/after +MODULE_MARKER_PASS(BeforeOptimization) +MODULE_MARKER_PASS(BeforeEarlySimplification) +MODULE_MARKER_PASS(AfterEarlySimplification) +MODULE_MARKER_PASS(BeforeEarlyOptimization) +MODULE_MARKER_PASS(AfterEarlyOptimization) +FUNCTION_MARKER_PASS(BeforeLoopOptimization) +LOOP_MARKER_PASS(BeforeLICM) +LOOP_MARKER_PASS(AfterLICM) +LOOP_MARKER_PASS(BeforeLoopSimplification) +LOOP_MARKER_PASS(AfterLoopSimplification) +FUNCTION_MARKER_PASS(AfterLoopOptimization) +FUNCTION_MARKER_PASS(BeforeScalarOptimization) +FUNCTION_MARKER_PASS(AfterScalarOptimization) +FUNCTION_MARKER_PASS(BeforeVectorization) +FUNCTION_MARKER_PASS(AfterVectorization) +MODULE_MARKER_PASS(BeforeIntrinsicLowering) +MODULE_MARKER_PASS(AfterIntrinsicLowering) +MODULE_MARKER_PASS(BeforeCleanup) +MODULE_MARKER_PASS(AfterCleanup) +MODULE_MARKER_PASS(AfterOptimization) + #endif diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 1007dfd35c1d6..ca5992b6f3135 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -34,17 +34,21 @@ // NewPM needs to manually include all the pass headers #include +#include #include +#include #include #include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -52,18 +56,23 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include #include #include #include #include #include +#include +#include #include #include #include @@ -199,210 +208,240 @@ namespace { // .sinkCommonInsts(true) ; } +#if JL_LLVM_VERSION < 150000 +#define LICMOptions() +#endif + +// At any given time exactly one of each pair of overloads is strictly unused +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + + // Version check for our patch to allow invoking pipeline callbacks + // won't work if built with our LLVM but linked with system LLVM + template std::true_type hasInvokeCallbacks_helper(decltype(&PB::invokePipelineStartEPCallbacks)) JL_NOTSAFEPOINT; + std::false_type hasInvokeCallbacks_helper(...) JL_NOTSAFEPOINT; + + // static constexpr bool hasInvokeCallbacks = decltype(hasInvokeCallbacks_helper(nullptr))::value; - // TODO(vchuravy/maleadt): - // Since we are not using the PassBuilder fully and instead rolling our own, we are missing out on - // TargetMachine::registerPassBuilderCallbacks. We need to find a solution either in working with upstream - // or adapting PassBuilder (or subclassing it) to suite our needs. This is in particular important for - // BPF, NVPTX, and AMDGPU. - //TODO implement these once LLVM exposes - //the PassBuilder extension point callbacks - //For now we'll maintain the insertion points even though they don't do anything - //for the sake of documentation //If PB is a nullptr, don't invoke anything (this happens when running julia from opt) - void invokePipelineStartCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokePeepholeEPCallbacks(FunctionPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeCGSCCCallbacks(CGSCCPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeLateLoopOptimizationCallbacks(LoopPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeLoopOptimizerEndCallbacks(LoopPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeScalarOptimizerCallbacks(FunctionPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeVectorizerCallbacks(FunctionPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeOptimizerLastCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} + template + std::enable_if_t(nullptr))::value, void> invokePipelineStartCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokePipelineStartEPCallbacks(MPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokePeepholeEPCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokePeepholeEPCallbacks(FPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokePipelineEarlySimplificationEPCallbacks(MPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeCGSCCCallbacks(CGSCCPassManager &CGPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeCGSCCOptimizerLateEPCallbacks(CGPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeOptimizerEarlyEPCallbacks(MPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeLateLoopOptimizationCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeLateLoopOptimizationsEPCallbacks(LPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeLoopOptimizerEndCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeLoopOptimizerEndEPCallbacks(LPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeScalarOptimizerCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeScalarOptimizerLateEPCallbacks(FPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeVectorizerCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeVectorizerStartEPCallbacks(FPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeOptimizerLastCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeOptimizerLastEPCallbacks(MPM, O); + } + + // Fallbacks + void invokePipelineStartCallbacks(...) {} + void invokePeepholeEPCallbacks(...) {} + void invokeEarlySimplificationCallbacks(...) {} + void invokeCGSCCCallbacks(...) {} + void invokeOptimizerEarlyCallbacks(...) {} + void invokeLateLoopOptimizationCallbacks(...) {} + void invokeLoopOptimizerEndCallbacks(...) {} + void invokeScalarOptimizerCallbacks(...) {} + void invokeVectorizerCallbacks(...) {} + void invokeOptimizerLastCallbacks(...) {} + +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic pop +#endif + +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic pop +#endif } //The actual pipelines //TODO Things we might want to consider: -//? annotation2metadata pass -//? force function attributes pass -//? annotation remarks pass -//? infer function attributes pass -//? lower expect intrinsic pass -//? warn missed transformations pass //* For vectorization //? loop unroll/jam after loop vectorization //? optimization remarks pass //? cse/cvp/instcombine/bdce/sccp/licm/unswitch after loop vectorization ( // cleanup as much as possible before trying to slp vectorize) -//? vectorcombine pass //* For optimization -//? float2int pass -//? lower constant intrinsics pass //? loop sink pass //? hot-cold splitting pass #define JULIA_PASS(ADD_PASS) if (!options.llvm_only) { ADD_PASS; } else do { } while (0) -//Use for O1 and below -static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT { +static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeEarlySimplificationMarkerPass()); #ifdef JL_DEBUG_BUILD addVerificationPasses(MPM, options.llvm_only); #endif + // Place after verification in case we want to force it anyways + MPM.addPass(ForceFunctionAttrsPass()); invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(Annotation2MetadataPass()); MPM.addPass(ConstantMergePass()); - if (!options.dump_native) { - JULIA_PASS(MPM.addPass(CPUFeatures())); - if (O.getSpeedupLevel() > 0) { - MPM.addPass(createModuleToFunctionPassAdaptor(InstSimplifyPass())); - } - } { FunctionPassManager FPM; + FPM.addPass(LowerExpectIntrinsicPass()); + if (O.getSpeedupLevel() >= 2) { + JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass())); + } FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); - if (O.getSpeedupLevel() > 0) { + if (O.getSpeedupLevel() >= 1) { + FPM.addPass(DCEPass()); FPM.addPass(SROAPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(EarlyCSEPass()); } - FPM.addPass(MemCpyOptPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } invokeEarlySimplificationCallbacks(MPM, PB, O); - MPM.addPass(AlwaysInlinerPass()); - { - CGSCCPassManager CGPM; - invokeCGSCCCallbacks(CGPM, PB, O); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); - } - invokeOptimizerEarlyCallbacks(MPM, PB, O); - JULIA_PASS(MPM.addPass(LowerSIMDLoop())); - { - FunctionPassManager FPM; - { - LoopPassManager LPM; - invokeLateLoopOptimizationCallbacks(LPM, PB, O); - invokeLoopOptimizerEndCallbacks(LPM, PB, O); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); - } - invokeScalarOptimizerCallbacks(FPM, PB, O); - invokeVectorizerCallbacks(FPM, PB, O); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - if (options.lower_intrinsics) { - //TODO no barrier pass? - { - FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(LowerExcHandlers())); - JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false))); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - JULIA_PASS(MPM.addPass(RemoveNI())); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC()))); - JULIA_PASS(MPM.addPass(FinalLowerGCPass())); - JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native))); - } else { - JULIA_PASS(MPM.addPass(RemoveNI())); - } - JULIA_PASS(MPM.addPass(LowerSIMDLoop())); // TODO why do we do this twice - if (options.dump_native) { - JULIA_PASS(MPM.addPass(MultiVersioning(options.external_use))); - JULIA_PASS(MPM.addPass(CPUFeatures())); - if (O.getSpeedupLevel() > 0) { - FunctionPassManager FPM; - FPM.addPass(InstSimplifyPass()); - FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - } - invokeOptimizerLastCallbacks(MPM, PB, O); - addSanitizerPasses(MPM, O); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16()))); + MPM.addPass(AfterEarlySimplificationMarkerPass()); } -//Use for O2 and above -static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - addVerificationPasses(MPM, options.llvm_only); -#endif - invokePipelineStartCallbacks(MPM, PB, O); - MPM.addPass(ConstantMergePass()); - { - FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass())); - //TODO consider not using even basic simplification - //options here, and adding a run of CVP to take advantage - //of the unsimplified codegen information (e.g. known - //zeros or ones) - FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); - FPM.addPass(DCEPass()); - FPM.addPass(SROAPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - invokeEarlySimplificationCallbacks(MPM, PB, O); - MPM.addPass(AlwaysInlinerPass()); +static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeEarlyOptimizationMarkerPass()); invokeOptimizerEarlyCallbacks(MPM, PB, O); { CGSCCPassManager CGPM; invokeCGSCCCallbacks(CGPM, PB, O); - { + if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; JULIA_PASS(FPM.addPass(AllocOptPass())); - FPM.addPass(InstCombinePass()); - FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); + FPM.addPass(Float2IntPass()); + FPM.addPass(LowerConstantIntrinsicsPass()); CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); } MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); } if (options.dump_native) { - JULIA_PASS(MPM.addPass(MultiVersioning(options.external_use))); + JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use))); } - JULIA_PASS(MPM.addPass(CPUFeatures())); - { + JULIA_PASS(MPM.addPass(CPUFeaturesPass())); + if (O.getSpeedupLevel() >= 1) { FunctionPassManager FPM; - FPM.addPass(SROAPass()); - // SROA can duplicate PHI nodes which can block LowerSIMD - FPM.addPass(InstCombinePass()); - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(ReassociatePass()); - FPM.addPass(EarlyCSEPass()); - JULIA_PASS(FPM.addPass(AllocOptPass())); + if (O.getSpeedupLevel() >= 2) { + FPM.addPass(SROAPass()); + // SROA can duplicate PHI nodes which can block LowerSIMD + FPM.addPass(InstCombinePass()); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(ReassociatePass()); + FPM.addPass(EarlyCSEPass()); + JULIA_PASS(FPM.addPass(AllocOptPass())); + } else { // if (O.getSpeedupLevel() >= 1) (exactly) + FPM.addPass(InstCombinePass()); + FPM.addPass(EarlyCSEPass()); + } invokePeepholeEPCallbacks(FPM, PB, O); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - MPM.addPass(LowerSIMDLoop()); + MPM.addPass(AfterEarlyOptimizationMarkerPass()); +} + +static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + FPM.addPass(BeforeLoopOptimizationMarkerPass()); { - FunctionPassManager FPM; - { - LoopPassManager LPM1, LPM2; - LPM1.addPass(LoopRotatePass()); - invokeLateLoopOptimizationCallbacks(LPM1, PB, O); - //We don't know if the loop callbacks support MSSA - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA = */false)); -#if JL_LLVM_VERSION < 150000 -#define LICMOptions() -#endif - LPM2.addPass(LICMPass(LICMOptions())); - JULIA_PASS(LPM2.addPass(JuliaLICMPass())); - LPM2.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); - LPM2.addPass(LICMPass(LICMOptions())); - JULIA_PASS(LPM2.addPass(JuliaLICMPass())); - //LICM needs MemorySSA now, so we must use it - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), /*UseMemorySSA = */true)); + LoopPassManager LPM; + if (O.getSpeedupLevel() >= 2) { + LPM.addPass(LoopRotatePass()); } + invokeLateLoopOptimizationCallbacks(LPM, PB, O); + //We don't know if the loop callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } + if (O.getSpeedupLevel() >= 2) { + LoopPassManager LPM; + LPM.addPass(BeforeLICMMarkerPass()); + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + LPM.addPass(AfterLICMMarkerPass()); + //LICM needs MemorySSA now, so we must use it + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); + } + if (O.getSpeedupLevel() >= 2) { FPM.addPass(IRCEPass()); - { - LoopPassManager LPM; + } + { + LoopPassManager LPM; + LPM.addPass(BeforeLoopSimplificationMarkerPass()); + if (O.getSpeedupLevel() >= 2) { LPM.addPass(LoopInstSimplifyPass()); LPM.addPass(LoopIdiomRecognizePass()); LPM.addPass(IndVarSimplifyPass()); LPM.addPass(LoopDeletionPass()); + // This unroll will only unroll loops when the trip count is known and small, + // so that no loop remains LPM.addPass(LoopFullUnrollPass()); - invokeLoopOptimizerEndCallbacks(LPM, PB, O); - //We don't know if the loop end callbacks support MSSA - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); } + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + LPM.addPass(AfterLoopSimplificationMarkerPass()); + //We don't know if the loop end callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } + FPM.addPass(AfterLoopOptimizationMarkerPass()); +} + +static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + FPM.addPass(BeforeScalarOptimizationMarkerPass()); + if (O.getSpeedupLevel() >= 2) { JULIA_PASS(FPM.addPass(AllocOptPass())); FPM.addPass(SROAPass()); FPM.addPass(InstSimplifyPass()); @@ -414,9 +453,11 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat FPM.addPass(IRCEPass()); FPM.addPass(InstCombinePass()); FPM.addPass(JumpThreadingPass()); - if (O.getSpeedupLevel() >= 3) { - FPM.addPass(GVNPass()); - } + } + if (O.getSpeedupLevel() >= 3) { + FPM.addPass(GVNPass()); + } + if (O.getSpeedupLevel() >= 2) { FPM.addPass(DSEPass()); invokePeepholeEPCallbacks(FPM, PB, O); FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); @@ -427,32 +468,47 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat LPM.addPass(LoopInstSimplifyPass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); } - invokeScalarOptimizerCallbacks(FPM, PB, O); - //TODO look into loop vectorize options - FPM.addPass(LoopVectorizePass()); - FPM.addPass(LoopLoadEliminationPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); - FPM.addPass(SLPVectorizerPass()); - invokeVectorizerCallbacks(FPM, PB, O); - FPM.addPass(ADCEPass()); - //TODO add BDCEPass here? - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + FPM.addPass(LoopDistributePass()); } + invokeScalarOptimizerCallbacks(FPM, PB, O); + FPM.addPass(AfterScalarOptimizationMarkerPass()); +} + +static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + FPM.addPass(BeforeVectorizationMarkerPass()); + //TODO look into loop vectorize options + FPM.addPass(InjectTLIMappings()); + FPM.addPass(LoopVectorizePass()); + FPM.addPass(LoopLoadEliminationPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(SLPVectorizerPass()); + invokeVectorizerCallbacks(FPM, PB, O); + FPM.addPass(VectorCombinePass()); + FPM.addPass(ADCEPass()); + //TODO add BDCEPass here? + // This unroll will unroll vectorized loops + // as well as loops that we tried but failed to vectorize + FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); + FPM.addPass(AfterVectorizationMarkerPass()); +} + +static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeIntrinsicLoweringMarkerPass()); if (options.lower_intrinsics) { //TODO barrier pass? { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(LowerExcHandlers())); + JULIA_PASS(FPM.addPass(LowerExcHandlersPass())); JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } // Needed **before** LateLowerGCFrame on LLVM < 12 // due to bug in `CreateAlignmentAssumption`. - JULIA_PASS(MPM.addPass(RemoveNI())); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC()))); + JULIA_PASS(MPM.addPass(RemoveNIPass())); + JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass()))); JULIA_PASS(MPM.addPass(FinalLowerGCPass())); - { + if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; FPM.addPass(GVNPass()); FPM.addPass(SCCPPass()); @@ -460,63 +516,124 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native))); - { + if (O.getSpeedupLevel() >= 1) { FunctionPassManager FPM; FPM.addPass(InstCombinePass()); FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } } else { - JULIA_PASS(MPM.addPass(RemoveNI())); + JULIA_PASS(MPM.addPass(RemoveNIPass())); } - { + MPM.addPass(AfterIntrinsicLoweringMarkerPass()); +} + +static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeCleanupMarkerPass()); + if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(CombineMulAdd())); + JULIA_PASS(FPM.addPass(CombineMulAddPass())); FPM.addPass(DivRemPairsPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } invokeOptimizerLastCallbacks(MPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); addSanitizerPasses(MPM, O); { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(DemoteFloat16())); - FPM.addPass(GVNPass()); + JULIA_PASS(FPM.addPass(DemoteFloat16Pass())); + if (O.getSpeedupLevel() >= 2) { + FPM.addPass(GVNPass()); + } MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + MPM.addPass(AfterCleanupMarkerPass()); +} + +static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeOptimizationMarkerPass()); + buildEarlySimplificationPipeline(MPM, PB, O, options); + MPM.addPass(AlwaysInlinerPass()); + buildEarlyOptimizerPipeline(MPM, PB, O, options); + MPM.addPass(LowerSIMDLoopPass()); + { + FunctionPassManager FPM; + buildLoopOptimizerPipeline(FPM, PB, O, options); + buildScalarOptimizerPipeline(FPM, PB, O, options); + if (O.getSpeedupLevel() >= 2) { + buildVectorPipeline(FPM, PB, O, options); + } + FPM.addPass(WarnMissedTransformationsPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + buildIntrinsicLoweringPipeline(MPM, PB, O, options); + buildCleanupPipeline(MPM, PB, O, options); + MPM.addPass(AfterOptimizationMarkerPass()); +} + +extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size, + int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT +{ + OptimizationLevel O; + switch (Size) { + case 1: + O = OptimizationLevel::Os; + break; + default: + O = OptimizationLevel::Oz; + break; + case 0: + switch (Speedup) { + case 0: + O = OptimizationLevel::O0; + break; + case 1: + O = OptimizationLevel::O1; + break; + case 2: + O = OptimizationLevel::O2; + break; + default: + O = OptimizationLevel::O3; + break; + } + } + buildPipeline(*reinterpret_cast(MPM), reinterpret_cast(PB), O, + OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only}); } #undef JULIA_PASS namespace { - auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT { - auto PIC = std::make_unique(); + + void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT { //Borrowed from LLVM PassBuilder.cpp:386 -#define MODULE_PASS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define FUNCTION_PASS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define LOOP_PASS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CGSCC_PASS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "llvm-julia-passes.inc" @@ -533,12 +650,37 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #undef CGSCC_PASS #undef CGSCC_PASS_WITH_PARAMS #undef CGSCC_ANALYSIS + // Marker passes are set separately so that we don't export them by accident + PIC.addClassToPassName("BeforeOptimizationMarkerPass", "BeforeOptimization"); + PIC.addClassToPassName("BeforeEarlySimplificationMarkerPass", "BeforeEarlySimplification"); + PIC.addClassToPassName("AfterEarlySimplificationMarkerPass", "AfterEarlySimplification"); + PIC.addClassToPassName("BeforeEarlyOptimizationMarkerPass", "BeforeEarlyOptimization"); + PIC.addClassToPassName("AfterEarlyOptimizationMarkerPass", "AfterEarlyOptimization"); + PIC.addClassToPassName("BeforeLoopOptimizationMarkerPass", "BeforeLoopOptimization"); + PIC.addClassToPassName("BeforeLICMMarkerPass", "BeforeLICM"); + PIC.addClassToPassName("AfterLICMMarkerPass", "AfterLICM"); + PIC.addClassToPassName("BeforeLoopSimplificationMarkerPass", "BeforeLoopSimplification"); + PIC.addClassToPassName("AfterLoopSimplificationMarkerPass", "AfterLoopSimplification"); + PIC.addClassToPassName("AfterLoopOptimizationMarkerPass", "AfterLoopOptimization"); + PIC.addClassToPassName("BeforeScalarOptimizationMarkerPass", "BeforeScalarOptimization"); + PIC.addClassToPassName("AfterScalarOptimizationMarkerPass", "AfterScalarOptimization"); + PIC.addClassToPassName("BeforeVectorizationMarkerPass", "BeforeVectorization"); + PIC.addClassToPassName("AfterVectorizationMarkerPass", "AfterVectorization"); + PIC.addClassToPassName("BeforeIntrinsicLoweringMarkerPass", "BeforeIntrinsicLowering"); + PIC.addClassToPassName("AfterIntrinsicLoweringMarkerPass", "AfterIntrinsicLowering"); + PIC.addClassToPassName("BeforeCleanupMarkerPass", "BeforeCleanup"); + PIC.addClassToPassName("AfterCleanupMarkerPass", "AfterCleanup"); + PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization"); + } + auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT { + auto PIC = std::make_unique(); + adjustPIC(*PIC); SI.registerCallbacks(*PIC); return PIC; } - FunctionAnalysisManager createFAM(OptimizationLevel O, TargetIRAnalysis analysis, const Triple &triple) JL_NOTSAFEPOINT { + FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT { FunctionAnalysisManager FAM; // Register the AA manager first so that our version is the one used. @@ -549,21 +691,18 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); AA.registerFunctionAnalysis(); AA.registerFunctionAnalysis(); } - // TM->registerDefaultAliasAnalyses(AA); + TM.registerDefaultAliasAnalyses(AA); return AA; }); // Register our TargetLibraryInfoImpl. - FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetIRAnalysis(analysis); }); - FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(triple)); }); + FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetIRAnalysis(TM.getTargetIRAnalysis()); }); + FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(TM.getTargetTriple())); }); return FAM; } ModulePassManager createMPM(PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT { ModulePassManager MPM; - if (O.getSpeedupLevel() < 2) - buildBasicPipeline(MPM, &PB, O, options); - else - buildFullPipeline(MPM, &PB, O, options); + buildPipeline(MPM, &PB, O, options); return MPM; } } @@ -575,7 +714,7 @@ NewPM::NewPM(std::unique_ptr TM, OptimizationLevel O, Optimizatio NewPM::~NewPM() = default; -AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM.getTargetIRAnalysis(), TM.getTargetTriple())), CGAM(), MAM() { +AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM)), CGAM(), MAM() { PB.registerLoopAnalyses(LAM); PB.registerFunctionAnalyses(FAM); PB.registerCGSCCAnalyses(CGAM); @@ -680,31 +819,70 @@ static llvm::Optional> parseJu // NOTE: Instead of exporting all the constructors in passes.h we could // forward the callbacks to the respective passes. LLVM seems to prefer this, // and when we add the full pass builder having them directly will be helpful. -void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { +static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { + auto PIC = PB.getPassInstrumentationCallbacks(); + if (PIC) { + adjustPIC(*PIC); + } PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { -#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef FUNCTION_PASS + if (Name.consume_front("GCInvariantVerifier")) { + if (Name.consume_front("<") && Name.consume_back(">")) { + bool strong = true; + if (Name.consume_front("no-")) { + strong = false; + } + if (Name == "strong") { + PM.addPass(GCInvariantVerifierPass(strong)); + return true; + } + } + return false; + } return false; }); PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &PM, ArrayRef InnerPipeline) { -#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef MODULE_PASS + if (Name.consume_front("LowerPTLSPass")) { + if (Name.consume_front("<") && Name.consume_back(">")) { + bool imaging_mode = true; + if (Name.consume_front("no-")) { + imaging_mode = false; + } + if (Name == "imaging") { + PM.addPass(LowerPTLSPass(imaging_mode)); + return true; + } + } + return false; + } + if (Name.consume_front("JuliaMultiVersioning")) { + if (Name.consume_front("<") && Name.consume_back(">")) { + bool external_use = true; + if (Name.consume_front("no-")) { + external_use = false; + } + if (Name == "external") { + PM.addPass(MultiVersioningPass(external_use)); + return true; + } + } + return false; + } //Add full pipelines here auto julia_options = parseJuliaPipelineOptions(Name); if (julia_options) { ModulePassManager pipeline; - if (julia_options->first.getSpeedupLevel() < 2) { - buildBasicPipeline(pipeline, nullptr, julia_options->first, julia_options->second); - } else { - buildFullPipeline(pipeline, nullptr, julia_options->first, julia_options->second); - } + buildPipeline(pipeline, nullptr, julia_options->first, julia_options->second); PM.addPass(std::move(pipeline)); return true; } @@ -714,13 +892,18 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, LoopPassManager &PM, ArrayRef InnerPipeline) { -#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef LOOP_PASS return false; }); } +extern "C" JL_DLLEXPORT_CODEGEN +void jl_register_passbuilder_callbacks_impl(void *PB) JL_NOTSAFEPOINT { + registerCallbacks(*static_cast(PB)); +} + extern "C" JL_DLLEXPORT_CODEGEN ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT { return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks}; diff --git a/src/precompile.c b/src/precompile.c index 4aac28ff9a790..a7174492cf0e1 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -111,7 +111,9 @@ JL_DLLEXPORT void jl_write_compiler_output(void) bool_t emit_native = jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm; - bool_t emit_split = jl_options.outputji && emit_native; + const char *outputji = jl_options.outputji; + + bool_t emit_split = outputji && emit_native; ios_t *s = NULL; ios_t *z = NULL; @@ -123,40 +125,39 @@ JL_DLLEXPORT void jl_write_compiler_output(void) if (!emit_split) z = s; + ios_t f; + + if (outputji) { + if (ios_file(&f, outputji, 1, 1, 1, 1) == NULL) + jl_errorf("cannot open system image file \"%s\" for writing", outputji); + ios_write(&f, (const char *)s->buf, (size_t)s->size); + ios_close(s); + free(s); + } + // jl_dump_native writes the clone_targets into `s` // We need to postpone the srctext writing after that. if (native_code) { + ios_t *targets = outputji ? &f : NULL; + // jl_dump_native will close and free z when appropriate + // this is a horrible abstraction, but + // this helps reduce live memory significantly jl_dump_native(native_code, jl_options.outputbc, jl_options.outputunoptbc, jl_options.outputo, jl_options.outputasm, - (const char*)z->buf, (size_t)z->size, s); + z, targets); jl_postoutput_hook(); } - if ((jl_options.outputji || emit_native) && jl_options.incremental) { - write_srctext(s, udeps, srctextpos); - } - - if (jl_options.outputji) { - ios_t f; - if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) - jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); - ios_write(&f, (const char*)s->buf, (size_t)s->size); + if (outputji) { + if (jl_options.incremental) { + write_srctext(&f, udeps, srctextpos); + } ios_close(&f); } - if (s) { - ios_close(s); - free(s); - } - - if (emit_split) { - ios_close(z); - free(z); - } - for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: "); diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 30a6ff9b3dede..e129b1239c7df 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -227,8 +227,11 @@ constexpr auto bdver2 = bdver1 | get_feature_masks(f16c, bmi, tbm, fma); constexpr auto bdver3 = bdver2 | get_feature_masks(xsaveopt, fsgsbase); constexpr auto bdver4 = bdver3 | get_feature_masks(avx2, bmi2, mwaitx, movbe, rdrnd); +// technically xsaves is part of znver1, znver2, and znver3 +// Disabled due to Erratum 1386 +// See: https://github.com/JuliaLang/julia/issues/50102 constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero, mwaitx, prfchw, - rdseed, sha, sse4a, xsavec, xsaves); + rdseed, sha, sse4a, xsavec); constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd); constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq); diff --git a/src/rtutils.c b/src/rtutils.c index 01ea11014a6db..7a31d37e4175c 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -320,7 +320,7 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE dest->top = src->top; } -static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, +static void jl_reserve_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, size_t reserved_size) { jl_excstack_t *s = *stack; @@ -334,13 +334,14 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, if (s) jl_copy_excstack(new_s, s); *stack = new_s; + jl_gc_wb(task, new_s); } -void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, +void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, jl_value_t *exception JL_ROOTED_ARGUMENT, jl_bt_element_t *bt_data, size_t bt_size) { - jl_reserve_excstack(stack, (*stack ? (*stack)->top : 0) + bt_size + 2); + jl_reserve_excstack(task, stack, (*stack ? (*stack)->top : 0) + bt_size + 2); jl_excstack_t *s = *stack; jl_bt_element_t *rawstack = jl_excstack_raw(s); memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size); diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 9babdf89f098b..ed320aa9a6c35 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -429,6 +429,8 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp jl_atomic_error("atomic_pointerreplace: invalid atomic ordering"); // TODO: filter other invalid orderings jl_value_t *ety = jl_tparam0(jl_typeof(p)); + if (!is_valid_intrinsic_elptr(ety)) + jl_error("atomic_pointerreplace: invalid pointer"); char *pp = (char*)jl_unbox_long(p); jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety); JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE) @@ -447,8 +449,6 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp return result; } else { - if (!is_valid_intrinsic_elptr(ety)) - jl_error("atomic_pointerreplace: invalid pointer"); if (jl_typeof(x) != ety) jl_type_error("atomic_pointerreplace", ety, x); size_t nb = jl_datatype_size(ety); diff --git a/src/safepoint.c b/src/safepoint.c index 19eca4bf6f00d..c6f9a42059d1a 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -124,6 +124,14 @@ int jl_safepoint_start_gc(void) jl_safepoint_wait_gc(); return 0; } + // Foreign thread adoption disables the GC and waits for it to finish, however, that may + // introduce a race between it and this thread checking if the GC is enabled and only + // then setting jl_gc_running. To avoid that, check again now that we won that race. + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { + jl_atomic_store_release(&jl_gc_running, 0); + uv_mutex_unlock(&safepoint_lock); + return 0; + } jl_safepoint_enable(1); jl_safepoint_enable(2); uv_mutex_unlock(&safepoint_lock); @@ -151,7 +159,7 @@ void jl_safepoint_end_gc(void) void jl_safepoint_wait_gc(void) { jl_task_t *ct = jl_current_task; (void)ct; - JL_TIMING_SUSPEND(GC_SAFEPOINT, ct); + JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct); // The thread should have set this is already assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0); // Use normal volatile load in the loop for speed until GC finishes. diff --git a/src/staticdata.c b/src/staticdata.c index 49b97480b5165..452e4380deb02 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -71,6 +71,7 @@ External links: */ #include #include +#include #include // printf #include // PRIxPTR @@ -3365,10 +3366,10 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_ } // TODO?: refactor to make it easier to create the "package inspector" -static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname) +static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc) { JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg); - jl_timing_printf(JL_TIMING_CURRENT_BLOCK, pkgname); + jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname); uint64_t checksum = 0; int64_t dataendpos = 0; int64_t datastartpos = 0; @@ -3378,7 +3379,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im return verify_fail; assert(datastartpos > 0 && datastartpos < dataendpos); - + needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc; jl_value_t *restored = NULL; jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; jl_svec_t *cachesizes_sv = NULL; @@ -3390,15 +3391,24 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im ios_bufmode(f, bm_none); JL_SIGATOMIC_BEGIN(); size_t len = dataendpos - datastartpos; - char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); - jl_gc_notify_image_alloc(sysimg, len); + char *sysimg; + bool success = !needs_permalloc; ios_seek(f, datastartpos); - if (ios_readall(f, sysimg, len) != len || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { - restored = jl_get_exceptionf(jl_errorexception_type, "Error reading system image file."); + if (needs_permalloc) { + sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); + } + else + sysimg = &f->buf[f->bpos]; + if (needs_permalloc) + success = ios_readall(f, sysimg, len) == len; + if (!success || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { + restored = jl_get_exceptionf(jl_errorexception_type, "Error reading package image file."); JL_SIGATOMIC_END(); } else { - ios_close(f); + if (needs_permalloc) + ios_close(f); ios_static_buffer(f, sysimg, len); pkgcachesizes cachesizes; jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes); @@ -3444,11 +3454,11 @@ static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uin jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); } -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname) +JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc) { ios_t f; ios_static_buffer(&f, (char*)buf, sz); - jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname); + jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname, needs_permalloc); ios_close(&f); return ret; } @@ -3461,7 +3471,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d "Cache file \"%s\" not found.\n", fname); } jl_image_t pkgimage = {}; - jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname); + jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname, true); ios_close(&f); return ret; } @@ -3530,10 +3540,11 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); - jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname); + jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, false); return mod; } diff --git a/src/subtype.c b/src/subtype.c index fd9bd3e8be00f..5b05bb288ffc4 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -860,7 +860,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want typedef int (*tvar_callback)(void*, int8_t, jl_stenv_t *, int); -static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var, int inv) JL_NOTSAFEPOINT +static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT { return var_occurs_inside(v, var, 0, 1); } @@ -909,7 +909,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8 // ( Tuple{Int, Int} <: Tuple{T, T} where T) but // !( Tuple{Int, String} <: Tuple{T, T} where T) // Then check concreteness by checking that the lower bound is not an abstract type. - int diagonal = vb.occurs_cov > 1 && !var_occurs_invariant(u->body, u->var, 0); + int diagonal = vb.occurs_cov > 1 && !var_occurs_invariant(u->body, u->var); if (ans && (vb.concrete || (diagonal && is_leaf_typevar(u->var)))) { if (vb.concrete && !diagonal && !is_leaf_bound(vb.ub)) { // a non-diagonal var can only be a subtype of a diagonal var if its @@ -941,8 +941,8 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8 jl_value_t *vl = btemp->lb; // TODO: this takes a significant amount of time if (btemp->depth0 != vb.depth0 && - ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_inside(vu, vb.var, 0, 1)) || - (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_inside(vl, vb.var, 0, 1)))) { + ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_invariant(vu, vb.var)) || + (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_invariant(vl, vb.var)))) { ans = 0; break; } btemp = btemp->prev; @@ -953,7 +953,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8 if (R && ans && e->envidx < e->envsz) { jl_value_t *val; if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type) - val = (jl_value_t*)jl_wrap_vararg(NULL, NULL); // special token result that represents N::Int in the envout + val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0); // special token result that represents N::Int in the envout else if (!vb.occurs_inv && vb.lb != jl_bottom_type) val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb); else if (vb.lb == vb.ub) @@ -1510,6 +1510,9 @@ static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t { int16_t oldRmore = e->Runions.more; int sub; + // fast-path for #49857 + if (obviously_in_union(y, x)) + return 1; int kindx = !jl_has_free_typevars(x); int kindy = !jl_has_free_typevars(y); if (kindx && kindy) @@ -1985,7 +1988,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su jl_value_t *body = find_var_body(y0, (jl_tvar_t*)b); if (body == NULL) body = y0; - if (var_occurs_invariant(body, (jl_tvar_t*)b, 0)) + if (var_occurs_invariant(body, (jl_tvar_t*)b)) return 0; } if (nparams_expanded_x > npy && jl_is_typevar(b) && concrete_min(a1) > 1) { @@ -2970,7 +2973,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv res = intersect(u->body, t, e, param); } vb->concrete |= (vb->occurs_cov > 1 && is_leaf_typevar(u->var) && - !var_occurs_invariant(u->body, u->var, 0)); + !var_occurs_invariant(u->body, u->var)); // handle the "diagonal dispatch" rule, which says that a type var occurring more // than once, and only in covariant position, is constrained to concrete types. E.g. @@ -3011,6 +3014,38 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv return res; } +static int always_occurs_cov(jl_value_t *v, jl_tvar_t *var, int param) JL_NOTSAFEPOINT +{ + if (param > 1) { + return 0; + } + else if (v == (jl_value_t*)var) { + return param == 1; + } + else if (jl_is_uniontype(v)) { + return always_occurs_cov(((jl_uniontype_t*)v)->a, var, param) && + always_occurs_cov(((jl_uniontype_t*)v)->b, var, param); + } + else if (jl_is_unionall(v)) { + jl_unionall_t *ua = (jl_unionall_t*)v; + return ua->var != var && ( + always_occurs_cov(ua->var->ub, var, 0) || + always_occurs_cov(ua->body, var, param)); + } + else if (jl_is_vararg(v)) { + jl_vararg_t *vm = (jl_vararg_t*)v; + return vm->T && always_occurs_cov(vm->T, var, param); + } + else if (jl_is_datatype(v)) { + int nparam = jl_is_tuple_type(v) ? 1 : param; + for (size_t i = 0; i < jl_nparams(v); i++) { + if (always_occurs_cov(jl_tparam(v, i), var, nparam)) + return 1; + } + } + return 0; +} + static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param) { jl_value_t *res = NULL; @@ -3019,7 +3054,8 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_ e->invdepth, NULL, e->vars }; JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars); save_env(e, &se, 1); - if (is_leaf_typevar(u->var) && !var_occurs_invariant(u->body, u->var, 0)) + int noinv = !var_occurs_invariant(u->body, u->var); + if (is_leaf_typevar(u->var) && noinv && always_occurs_cov(u->body, u->var, param)) vb.constraintkind = 1; res = intersect_unionall_(t, u, e, R, param, &vb); if (vb.limited) { @@ -3033,7 +3069,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_ vb.constraintkind = vb.concrete ? 1 : 2; else if (u->var->lb != jl_bottom_type) vb.constraintkind = 2; - else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) + else if (vb.occurs_cov && noinv) vb.constraintkind = 1; int reintersection = constraint1 != vb.constraintkind || vb.concrete; if (reintersection) { @@ -3089,7 +3125,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t ii = (jl_value_t*)vmy; else { JL_GC_PUSH1(&ii); - ii = (jl_value_t*)jl_wrap_vararg(ii, NULL); + ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1); JL_GC_POP(); } return ii; @@ -3130,7 +3166,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2)) ii = (jl_value_t*)vmy; else - ii = (jl_value_t*)jl_wrap_vararg(ii, i2); + ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1); JL_GC_POP(); return ii; } diff --git a/src/support/dtypes.h b/src/support/dtypes.h index a30fe85ccc0d0..da570921c101c 100644 --- a/src/support/dtypes.h +++ b/src/support/dtypes.h @@ -340,6 +340,23 @@ STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val) JL_NOTSAFEPOI memcpy(ptr, &val, 2); } +STATIC_INLINE void *calloc_s(size_t sz) JL_NOTSAFEPOINT { + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *p = calloc(sz == 0 ? 1 : sz, 1); + if (p == NULL) { + perror("(julia) calloc"); + abort(); + } +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + return p; +} + STATIC_INLINE void *malloc_s(size_t sz) JL_NOTSAFEPOINT { int last_errno = errno; #ifdef _OS_WINDOWS_ diff --git a/src/task.c b/src/task.c index 477ae481071a0..73d9033f0cb50 100644 --- a/src/task.c +++ b/src/task.c @@ -646,7 +646,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER int finalizers_inhibited = ptls->finalizers_inhibited; ptls->finalizers_inhibited = 0; - jl_timing_block_t *blk = jl_timing_block_exit_task(ct, ptls); + jl_timing_block_t *blk = jl_timing_block_task_exit(ct, ptls); ctx_switch(ct); #ifdef MIGRATE_TASKS @@ -666,7 +666,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER 0 != ct->ptls && 0 == ptls->finalizers_inhibited); ptls->finalizers_inhibited = finalizers_inhibited; - jl_timing_block_enter_task(ct, ptls, blk); (void)blk; + jl_timing_block_task_enter(ct, ptls, blk); (void)blk; sig_atomic_t other_defer_signal = ptls->defer_signal; ptls->defer_signal = defer_signal; @@ -705,7 +705,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct) #define pop_timings_stack() \ jl_timing_block_t *cur_block = ptls->timing_stack; \ while (cur_block && eh->timing_stack != cur_block) { \ - cur_block = jl_pop_timing_block(cur_block); \ + cur_block = jl_timing_block_pop(cur_block); \ } \ assert(cur_block == eh->timing_stack); #else @@ -721,7 +721,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct) /* The temporary ptls->bt_data is rooted by special purpose code in the\ GC. This exists only for the purpose of preserving bt_data until we \ set ptls->bt_size=0 below. */ \ - jl_push_excstack(&ct->excstack, exception, \ + jl_push_excstack(ct, &ct->excstack, exception, \ ptls->bt_data, ptls->bt_size); \ ptls->bt_size = 0; \ } \ @@ -1084,7 +1084,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->ptls = NULL; t->world_age = ct->world_age; t->reentrant_timing = 0; - jl_timing_init_task(t); + jl_timing_task_init(t); #ifdef COPY_STACKS if (!t->copy_stack) { @@ -1221,10 +1221,10 @@ CFI_NORETURN ct->started = 1; JL_PROBE_RT_START_TASK(ct); - jl_timing_block_enter_task(ct, ptls, NULL); + jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { record_backtrace(ptls, 0); - jl_push_excstack(&ct->excstack, ct->result, + jl_push_excstack(ct, &ct->excstack, ct->result, ptls->bt_data, ptls->bt_size); res = ct->result; } @@ -1693,7 +1693,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->ctx.asan_fake_stack = NULL; #endif - jl_timing_block_enter_task(ct, ptls, NULL); + jl_timing_block_task_enter(ct, ptls, NULL); #ifdef COPY_STACKS // initialize the base_ctx from which all future copy_stacks will be copies diff --git a/src/threading.c b/src/threading.c index ddb4850aa074c..d1157a02dada0 100644 --- a/src/threading.c +++ b/src/threading.c @@ -410,18 +410,28 @@ jl_ptls_t jl_init_threadtls(int16_t tid) return ptls; } -JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void) JL_NOTSAFEPOINT_LEAVE -{ +JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void) +{ + // `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running. + // we can't use a normal safepoint because we don't have signal handlers yet. + // we also can't use jl_safepoint_wait_gc because that assumes we're in a task. + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + while (jl_atomic_load_acquire(&jl_gc_running)) { + jl_cpu_pause(); + } + // this check is coupled with the one in `jl_safepoint_wait_gc`, where we observe if a + // foreign thread has asked to disable the GC, guaranteeing the order of events. + // initialize this thread (assign tid, create heap, set up root task) jl_ptls_t ptls = jl_init_threadtls(-1); void *stack_lo, *stack_hi; jl_init_stack_limits(0, &stack_lo, &stack_hi); - (void)jl_gc_unsafe_enter(ptls); // warning: this changes `jl_current_task`, so be careful not to call that from this function - jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); + jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); // assumes the GC is disabled JL_GC_PROMISE_ROOTED(ct); uv_random(NULL, NULL, &ct->rngState, sizeof(ct->rngState), 0, NULL); + jl_atomic_fetch_add(&jl_gc_disable_counter, -1); return &ct->gcstack; } @@ -596,6 +606,8 @@ static void jl_check_tls(void) JL_DLLEXPORT const int jl_tls_elf_support = 0; #endif +extern int jl_n_markthreads; +extern int jl_n_sweepthreads; extern int gc_first_tid; // interface to Julia; sets up to make the runtime thread-safe @@ -650,22 +662,37 @@ void jl_init_threading(void) } } - int16_t ngcthreads = jl_options.ngcthreads - 1; - if (ngcthreads == -1 && - (cp = getenv(NUM_GC_THREADS_NAME))) { // ENV[NUM_GC_THREADS_NAME] specified - - ngcthreads = (uint64_t)strtol(cp, NULL, 10) - 1; - } - if (ngcthreads == -1) { - // if `--gcthreads` was not specified, set the number of GC threads - // to half of compute threads - if (nthreads <= 1) { - ngcthreads = 0; + jl_n_markthreads = jl_options.nmarkthreads - 1; + jl_n_sweepthreads = jl_options.nsweepthreads; + if (jl_n_markthreads == -1) { // --gcthreads not specified + if ((cp = getenv(NUM_GC_THREADS_NAME))) { // ENV[NUM_GC_THREADS_NAME] specified + errno = 0; + jl_n_markthreads = (uint64_t)strtol(cp, &endptr, 10) - 1; + if (errno != 0 || endptr == cp || nthreads <= 0) + jl_n_markthreads = 0; + cp = endptr; + if (*cp == ',') { + cp++; + errno = 0; + jl_n_sweepthreads = strtol(cp, &endptri, 10); + if (errno != 0 || endptri == cp || jl_n_sweepthreads < 0) { + jl_n_sweepthreads = 0; + } + } } else { - ngcthreads = (nthreads / 2) - 1; + // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified, + // set the number of mark threads to half of compute threads + // and number of sweep threads to 0 + if (nthreads <= 1) { + jl_n_markthreads = 0; + } + else { + jl_n_markthreads = (nthreads / 2) - 1; + } } } + int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; #ifdef MMTK_GC // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. @@ -737,8 +764,11 @@ void jl_start_threads(void) mask[i] = 0; } } + else if (i == nthreads - 1 && jl_n_sweepthreads == 1) { + uv_thread_create(&uvtid, jl_gc_sweep_threadfun, t); + } else { - uv_thread_create(&uvtid, jl_gc_threadfun, t); + uv_thread_create(&uvtid, jl_gc_mark_threadfun, t); } uv_thread_detach(&uvtid); } diff --git a/src/threading.h b/src/threading.h index 40792a2889e44..73d2cd73fb70d 100644 --- a/src/threading.h +++ b/src/threading.h @@ -25,7 +25,8 @@ jl_ptls_t jl_init_threadtls(int16_t tid) JL_NOTSAFEPOINT; // provided by a threading infrastructure void jl_init_threadinginfra(void); -void jl_gc_threadfun(void *arg); +void jl_gc_mark_threadfun(void *arg); +void jl_gc_sweep_threadfun(void *arg); void jl_threadfun(void *arg); #ifdef __cplusplus diff --git a/src/timing.c b/src/timing.c index 3290e68ee9169..d933f082c816e 100644 --- a/src/timing.c +++ b/src/timing.c @@ -6,6 +6,10 @@ #include "options.h" #include "stdio.h" +#if defined(USE_TRACY) || defined(USE_ITTAPI) +#define DISABLE_FREQUENT_EVENTS +#endif + jl_module_t *jl_module_root(jl_module_t *m); #ifdef __cplusplus @@ -19,62 +23,81 @@ extern "C" { #endif static uint64_t t0; -#if defined(USE_TRACY) || defined(USE_ITTAPI) -/** - * These sources often generate millions of events / minute. Although Tracy - * can generally keep up with that, those events also bloat the saved ".tracy" - * files, so we disable them by default. - **/ -JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) | - (1ull << JL_TIMING_TYPE_CACHE_LOOKUP) | - (1ull << JL_TIMING_METHOD_MATCH) | - (1ull << JL_TIMING_METHOD_LOOKUP_FAST) | - (1ull << JL_TIMING_AST_COMPRESS) | - (1ull << JL_TIMING_AST_UNCOMPRESS)); -#else -JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~0ull; -#endif -JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST] = {0}; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; // Used to as an item limit when several strings of metadata can // potentially be associated with a single timing zone. JL_DLLEXPORT uint32_t jl_timing_print_limit = 10; -const char *jl_timing_names[(int)JL_TIMING_LAST] = +const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST] = { #define X(name) #name, - JL_TIMING_OWNERS + JL_TIMING_SUBSYSTEMS #undef X }; JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST]; +#ifdef USE_TIMING_COUNTS +static arraylist_t jl_timing_counts_events; +static jl_mutex_t jl_timing_counts_events_lock; +#endif //USE_TIMING_COUNTS + #ifdef USE_ITTAPI -JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST]; +static arraylist_t jl_timing_ittapi_events; +static jl_mutex_t jl_timing_ittapi_events_lock; +#endif //USE_ITTAPI + +#ifdef USE_TIMING_COUNTS +static int cmp_counts_events(const void *a, const void *b) { + jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a; + jl_timing_counts_event_t *event_b = *(jl_timing_counts_event_t **)b; + return strcmp(event_a->name, event_b->name); +} #endif void jl_print_timings(void) { #ifdef USE_TIMING_COUNTS + qsort(jl_timing_counts_events.items, jl_timing_counts_events.len, + sizeof(jl_timing_counts_event_t *), cmp_counts_events); + + JL_LOCK_NOGC(&jl_timing_counts_events_lock); uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; - for (int i = 0; i < JL_TIMING_LAST; i++) { - root_time -= jl_timing_counts[i]; + jl_timing_counts_event_t *root_event; + for (int i = 0; i < jl_timing_counts_events.len; i++) { + jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; + if (strcmp(other_event->name, "ROOT") == 0) { + root_event = other_event; + } else { + root_time -= jl_atomic_load_relaxed(&other_event->self); + } } - jl_timing_counts[0] = root_time; + jl_atomic_store_relaxed(&root_event->self, root_time); + jl_atomic_store_relaxed(&root_event->total, total_time); + fprintf(stderr, "\nJULIA TIMINGS\n"); - for (int i = 0; i < JL_TIMING_LAST; i++) { - if (jl_timing_counts[i] != 0) - fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], - 100 * (((double)jl_timing_counts[i]) / total_time), jl_timing_counts[i]); + fprintf(stderr, "%-25s, %-30s, %-30s\n", "Event", "Self Cycles (% of Total)", "Total Cycles (% of Total)"); + for (int i = 0; i < jl_timing_counts_events.len; i++) { + jl_timing_counts_event_t *event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; + uint64_t self = jl_atomic_load_relaxed(&event->self); + uint64_t total = jl_atomic_load_relaxed(&event->total); + if (total != 0) + fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%), %20" PRIu64 " (%5.2f %%)\n", + event->name, + self, 100 * (((double)self) / total_time), + total, 100 * (((double)total) / total_time)); } + JL_UNLOCK_NOGC(&jl_timing_counts_events_lock); fprintf(stderr, "\nJULIA COUNTERS\n"); + fprintf(stderr, "%-25s, %-20s\n", "Counter", "Value"); #define X(name) do { \ int64_t val = (int64_t) jl_atomic_load_relaxed(&jl_timing_counters[(int)JL_TIMING_COUNTER_##name].basic_counter); \ if (val != 0) \ - fprintf(stderr, "%-25s : %" PRIi64 "\n", #name, val); \ + fprintf(stderr, "%-25s, %20" PRIi64 "\n", #name, val); \ } while (0); JL_TIMING_COUNTERS @@ -82,18 +105,42 @@ void jl_print_timings(void) #endif } +static int indirect_strcmp(const void *a, const void *b) { + return strcmp(*(const char **)a, *(const char **)b); +} + void jl_init_timing(void) { t0 = cycleclock(); - _Static_assert(JL_TIMING_EVENT_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing events!"); - _Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!"); + _Static_assert(JL_TIMING_SUBSYSTEM_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing subsystems!"); + +#ifdef USE_TIMING_COUNTS + JL_MUTEX_INIT(&jl_timing_counts_events_lock, "jl_timing_counts_events_lock"); + + // Create events list for counts backend + arraylist_new(&jl_timing_counts_events, 1); + + jl_timing_counts_event_t *root_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); + arraylist_push(&jl_timing_counts_events, (void *)root_event); + + root_event->name = "ROOT"; + jl_atomic_store_relaxed(&root_event->self, 0); + jl_atomic_store_relaxed(&root_event->total, 0); +#endif + +#ifdef USE_ITTAPI + // Create events list for ITTAPI backend + JL_MUTEX_INIT(&jl_timing_ittapi_events_lock, "jl_timing_ittapi_events_lock"); + arraylist_new(&jl_timing_ittapi_events, 0); +#endif + + // Sort the subsystem names for quick enable/disable lookups + qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, + sizeof(const char *), indirect_strcmp); int i __attribute__((unused)) = 0; #ifdef USE_ITTAPI -#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name)); - JL_TIMING_EVENTS -#undef X i = 0; #define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime"); JL_TIMING_COUNTERS @@ -113,6 +160,27 @@ void jl_init_timing(void) TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITDataSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0); TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_ImageSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0); #endif + +/** + * These sources often generate millions of events / minute. Although Tracy + * can generally keep up with that, those events also bloat the saved ".tracy" + * files, so we disable them by default. + **/ +#ifdef DISABLE_FREQUENT_EVENTS + uint8_t error = 0; + error |= jl_timing_set_enable("ROOT", 0); + error |= jl_timing_set_enable("TYPE_CACHE_LOOKUP", 0); + error |= jl_timing_set_enable("METHOD_MATCH", 0); + error |= jl_timing_set_enable("METHOD_LOOKUP_FAST", 0); + error |= jl_timing_set_enable("AST_COMPRESS", 0); + error |= jl_timing_set_enable("AST_UNCOMPRESS", 0); + if (error) + jl_error("invalid timing subsystem encountered in jl_init_timing"); +#endif + + // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and + // JULIA_TIMING_METADATA_PRINT_LIMIT=20 + jl_timing_apply_env(); } void jl_destroy_timing(void) @@ -120,25 +188,206 @@ void jl_destroy_timing(void) jl_ptls_t ptls = jl_current_task->ptls; jl_timing_block_t *stack = ptls->timing_stack; while (stack) { - _jl_timing_block_destroy(stack); + jl_timing_block_end(stack); stack = stack->prev; } } -jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block) +static const int get_timing_subsystem(const char *subsystem) { + const char **match = (const char **)bsearch( + &subsystem, jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, + sizeof(const char *), indirect_strcmp + ); + if (!match) + return JL_TIMING_SUBSYSTEM_LAST; + + return (int)(match - &jl_timing_subsystems[0]); +} + +#ifdef USE_ITTAPI + +typedef struct { + __itt_event event; + const char *name; +} cached_ittapi_event_t; + +static __itt_event _jl_timing_ittapi_event_create(const char *event) { + JL_LOCK_NOGC(&jl_timing_ittapi_events_lock); + const size_t n = jl_timing_ittapi_events.len; + for (size_t i = 0; i < n; i++) { + cached_ittapi_event_t *other_event = (cached_ittapi_event_t *)jl_timing_ittapi_events.items[i]; + if (strcmp(event, other_event->name) == 0) { + JL_UNLOCK_NOGC(&jl_timing_ittapi_events_lock); + return other_event->event; + } + } + + // No matching event found - create a new one + cached_ittapi_event_t *new_event = (cached_ittapi_event_t *)malloc(sizeof(cached_ittapi_event_t)); + arraylist_push(&jl_timing_ittapi_events, (void *)new_event); + new_event->name = event; + new_event->event = __itt_event_create(event, strlen(event)); + JL_UNLOCK_NOGC(&jl_timing_ittapi_events_lock); + + return new_event->event; +} + +#endif // USE_ITTAPI + +#ifdef USE_TIMING_COUNTS + +// This function is analogous to __itt_event_create but for the counts backend +// +// `event` is required to live forever +static jl_timing_counts_event_t *_jl_timing_counts_event_create(const char *event) { + JL_LOCK_NOGC(&jl_timing_counts_events_lock); + const size_t n = jl_timing_counts_events.len; + for (size_t i = 0; i < n; i++) { + jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; + if (strcmp(event, other_event->name) == 0) { + JL_UNLOCK_NOGC(&jl_timing_counts_events_lock); + return other_event; + } + } + + // No matching event found - create a new one + jl_timing_counts_event_t *new_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); + arraylist_push(&jl_timing_counts_events, (void *)new_event); + new_event->name = event; + jl_atomic_store_relaxed(&new_event->self, 0); + jl_atomic_store_relaxed(&new_event->total, 0); + JL_UNLOCK_NOGC(&jl_timing_counts_events_lock); + + return new_event; +} + +STATIC_INLINE void _jl_timing_counts_pause(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(block->running); + block->running = 0; +#endif + block->total += t - block->start; +} + +STATIC_INLINE void _jl_timing_counts_resume(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(!block->running); + block->running = 1; +#endif + block->start = t; +} + +STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { + block->total = 0; + block->start = t; + block->t0 = t; +#ifdef JL_DEBUG_BUILD + block->running = 1; +#endif +} + +STATIC_INLINE void _jl_timing_counts_stop(jl_timing_block_t *block, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(block->counts_ctx.running); + block->counts_ctx.running = 0; +#endif + jl_timing_counts_event_t *event = block->event->counts_event; + block->counts_ctx.total += t - block->counts_ctx.start; + jl_atomic_fetch_add_relaxed(&event->self, block->counts_ctx.total); + jl_atomic_fetch_add_relaxed(&event->total, t - block->counts_ctx.t0); +} + +#endif // USE_TIMING_COUNTS + +JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) { + int maybe_subsystem = get_timing_subsystem(subsystem); + if (maybe_subsystem >= JL_TIMING_SUBSYSTEM_LAST) { + jl_errorf("invalid timing subsystem name: %s", subsystem); + return NULL; + } + + jl_timing_event_t *event = (jl_timing_event_t *) malloc(sizeof(jl_timing_event_t)); + event->subsystem = maybe_subsystem; + +#ifdef USE_TIMING_COUNTS + event->counts_event = _jl_timing_counts_event_create(name); +#endif // USE_TIMING_COUNTS + +#ifdef USE_ITTAPI + event->ittapi_event = _jl_timing_ittapi_event_create(name); +#endif // USE_ITTAPI + +#ifdef USE_TRACY + event->tracy_srcloc.name = name; + event->tracy_srcloc.function = function; + event->tracy_srcloc.file = file; + event->tracy_srcloc.line = line; + event->tracy_srcloc.color = color; +#endif // USE_TRACY + + return event; +} + +JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event) { + if (size < sizeof(jl_timing_block_t)) { + jl_errorf("jl_timing_block_t buffer must be at least %d bytes", sizeof(jl_timing_block_t)); + return; + } + + jl_timing_block_t *block = (jl_timing_block_t *)buf; + memset(block, 0, sizeof(jl_timing_block_t)); + block->event = event; +} + +JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) { + assert(!block->is_running); + if (!_jl_timing_enabled(block->event->subsystem)) return; + + uint64_t t = cycleclock(); (void)t; + _COUNTS_START(&block->counts_ctx, t); + _ITTAPI_START(block); + _TRACY_START(block); + + jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack; + block->prev = *prevp; + block->is_running = 1; + if (block->prev) { + _COUNTS_PAUSE(&block->prev->counts_ctx, t); + } + *prevp = block; +} + +JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) { + if (block->is_running) { + uint64_t t = cycleclock(); (void)t; + _ITTAPI_STOP(block); + _TRACY_STOP(block->tracy_ctx); + _COUNTS_STOP(block, t); + + jl_task_t *ct = jl_current_task; + jl_timing_block_t **pcur = &ct->ptls->timing_stack; + assert(*pcur == block); + *pcur = block->prev; + if (block->prev) { + _COUNTS_RESUME(&block->prev->counts_ctx, t); + } + } +} + +jl_timing_block_t *jl_timing_block_pop(jl_timing_block_t *cur_block) { - _jl_timing_block_destroy(cur_block); + jl_timing_block_end(cur_block); return cur_block->prev; } -void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk) +void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk) { if (prev_blk != NULL) { assert(ptls->timing_stack == NULL); ptls->timing_stack = prev_blk; if (prev_blk != NULL) { - _COUNTS_START(&prev_blk->counts_ctx, cycleclock()); + _COUNTS_RESUME(&prev_blk->counts_ctx, cycleclock()); } } @@ -149,7 +398,7 @@ void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t #endif } -jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls) +jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls) { #ifdef USE_TRACY // Tracy is fairly strict about not leaving a fiber that hasn't @@ -171,7 +420,7 @@ jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls) ptls->timing_stack = NULL; if (blk != NULL) { - _COUNTS_STOP(&blk->counts_ctx, cycleclock()); + _COUNTS_PAUSE(&blk->counts_ctx, cycleclock()); } return blk; } @@ -187,7 +436,7 @@ JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block) if (buf.size == buf.maxsize) memset(&buf.buf[IOS_INLSIZE - 3], '.', 3); - TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size); + TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size); #endif } @@ -197,7 +446,7 @@ JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_b jl_module_t *root = jl_module_root(m); if (root == m || root == jl_main_module) { const char *module_name = jl_symbol_name(m->name); - TracyCZoneText(*(cur_block->tracy_ctx), module_name, strlen(module_name)); + TracyCZoneText(cur_block->tracy_ctx, module_name, strlen(module_name)); } else { jl_timing_printf(cur_block, "%s.%s", jl_symbol_name(root->name), jl_symbol_name(m->name)); } @@ -208,27 +457,46 @@ JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *c { #ifdef USE_TRACY const char *filename = gnu_basename(path); - TracyCZoneText(*(cur_block->tracy_ctx), filename, strlen(filename)); + TracyCZoneText(cur_block->tracy_ctx, filename, strlen(filename)); +#endif +} + +JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block) +{ +#ifdef USE_TRACY + jl_module_t *root = jl_module_root(mod); + if (root == mod || root == jl_main_module) { + jl_timing_printf(cur_block, "%s:%d in %s", + gnu_basename(file), + line, + jl_symbol_name(mod->name)); + } else { + // TODO: generalize to print the entire module hierarchy + jl_timing_printf(cur_block, "%s:%d in %s.%s", + gnu_basename(file), + line, + jl_symbol_name(root->name), + jl_symbol_name(mod->name)); + } #endif } JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block) { jl_timing_show_func_sig(mi->specTypes, cur_block); - jl_method_t *def = mi->def.method; - jl_timing_printf(cur_block, "%s:%d in %s", - gnu_basename(jl_symbol_name(def->file)), - def->line, - jl_symbol_name(def->module->name)); + if (jl_is_method(mi->def.value)) { + jl_method_t *def = mi->def.method; + jl_timing_show_location(jl_symbol_name(def->file), def->line, def->module, cur_block); + } else { + jl_timing_printf(cur_block, " in %s", + jl_symbol_name(mi->def.module->name)); + } } JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block) { jl_timing_show((jl_value_t *)method, cur_block); - jl_timing_printf(cur_block, "%s:%d in %s", - gnu_basename(jl_symbol_name(method->file)), - method->line, - jl_symbol_name(method->module->name)); + jl_timing_show_location(jl_symbol_name(method->file), method->line, method->module, cur_block); } JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block) @@ -243,10 +511,19 @@ JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_ if (buf.size == buf.maxsize) memset(&buf.buf[IOS_INLSIZE - 3], '.', 3); - TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size); + TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size); #endif } +JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block) +{ + jl_timing_printf(cur_block, "%s", jl_symbol_name(macro->def.method->name)); + assert(jl_typetagis(lno, jl_linenumbernode_type)); + jl_timing_show_location(jl_symbol_name((jl_sym_t*)jl_fieldref(lno, 1)), + jl_unbox_int64(jl_fieldref(lno, 0)), + mod, cur_block); +} + JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...) { va_list args; @@ -261,7 +538,7 @@ JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *for if (buf.size == buf.maxsize) memset(&buf.buf[IOS_INLSIZE - 3], '.', 3); - TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size); + TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size); #endif va_end(args); } @@ -269,11 +546,11 @@ JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *for JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str) { #ifdef USE_TRACY - TracyCZoneText(*(cur_block->tracy_ctx), str, strlen(str)); + TracyCZoneText(cur_block->tracy_ctx, str, strlen(str)); #endif } -void jl_timing_init_task(jl_task_t *t) +void jl_timing_task_init(jl_task_t *t) { #ifdef USE_TRACY jl_value_t *start_type = jl_typeof(t->start); @@ -311,18 +588,17 @@ void jl_timing_init_task(jl_task_t *t) JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { - for (int i = 0; i < JL_TIMING_LAST; i++) { - if (strcmp(subsystem, jl_timing_names[i]) == 0) { - uint64_t subsystem_bit = (1ul << i); - if (enabled) { - jl_timing_enable_mask |= subsystem_bit; - } else { - jl_timing_enable_mask &= ~subsystem_bit; - } - return 0; - } + int i = get_timing_subsystem(subsystem); + if (i >= JL_TIMING_SUBSYSTEM_LAST) + return -1; + + uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT)); + if (enabled) { + jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit); + } else { + jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), subsystem_bit); } - return -1; + return 0; } static void jl_timing_set_enable_from_env(void) @@ -388,6 +664,13 @@ void jl_timing_apply_env(void) void jl_init_timing(void) { } void jl_destroy_timing(void) { } + +JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) { return NULL; } + +JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event) { } +JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) { } +JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) { } + JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { return -1; } JL_DLLEXPORT uint32_t jl_timing_print_limit = 0; diff --git a/src/timing.h b/src/timing.h index 73614864ea733..30f6ad0ab3b5c 100644 --- a/src/timing.h +++ b/src/timing.h @@ -31,7 +31,7 @@ void jl_init_timing(void); void jl_destroy_timing(void) JL_NOTSAFEPOINT; // Update the enable bit-mask to enable/disable tracing events for -// the subsystem in `jl_timing_names` matching the provided string. +// the subsystem in `jl_timing_subsystems` matching the provided string. // // Returns -1 if no matching sub-system was found. JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled); @@ -51,13 +51,16 @@ void jl_timing_apply_env(void); // when adding potentially many items of metadata to a single timing zone. extern JL_DLLEXPORT uint32_t jl_timing_print_limit; +JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color); +JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event); +JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *cur_block); +JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block); + #ifdef __cplusplus } #endif -#ifdef __cplusplus -#define HAVE_TIMING_SUPPORT -#elif defined(_COMPILER_CLANG_) +#if defined(_COMPILER_CLANG_) #define HAVE_TIMING_SUPPORT #elif defined(_COMPILER_GCC_) #define HAVE_TIMING_SUPPORT @@ -70,7 +73,9 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #if !defined( ENABLE_TIMINGS ) || !defined( HAVE_TIMING_SUPPORT ) #define JL_TIMING(subsystem, event) -#define JL_TIMING_SUSPEND(subsystem, ct) +#define JL_TIMING_CREATE_BLOCK(new_block_name, subsystem, event) + +#define JL_TIMING_SUSPEND_TASK(subsystem, ct) #define jl_timing_show(v, b) #define jl_timing_show_module(m, b) @@ -78,12 +83,16 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #define jl_timing_show_method_instance(mi, b) #define jl_timing_show_method(mi, b) #define jl_timing_show_func_sig(tt, b) +#define jl_timing_show_location(file, line, mod, b) +#define jl_timing_show_macro(macro, lno, mod, b) #define jl_timing_printf(b, f, ...) #define jl_timing_puts(b, s) -#define jl_timing_init_task(t) -#define jl_timing_block_enter_task(ct, ptls, blk) -#define jl_timing_block_exit_task(ct, ptls) ((jl_timing_block_t *)NULL) -#define jl_pop_timing_block(blk) +#define jl_timing_task_init(t) +#define jl_timing_event_create(blk) +#define jl_timing_block_start(blk) +#define jl_timing_block_task_enter(ct, ptls, blk) +#define jl_timing_block_task_exit(ct, ptls) ((jl_timing_block_t *)NULL) +#define jl_timing_block_pop(blk) #define jl_timing_counter_inc(counter, value) #define jl_timing_counter_dec(counter, value) @@ -99,6 +108,7 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #include "julia_assert.h" #ifdef USE_TRACY #include "tracy/TracyC.h" +typedef struct ___tracy_source_location_data TracySrcLocData; #endif #ifdef USE_ITTAPI @@ -109,11 +119,11 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; extern "C" { #endif void jl_print_timings(void); -jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block); -void jl_timing_init_task(jl_task_t *t); -void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk); -jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls); +void jl_timing_task_init(jl_task_t *t); +void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk); +jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls); +jl_timing_block_t *jl_timing_block_pop(jl_timing_block_t *cur_block); // Add the output of `jl_static_show(x)` as a text annotation to the // profiling region corresponding to `cur_block`. @@ -125,19 +135,22 @@ JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *c JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block); +JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block); +JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...); JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); + +#define jl_timing_event_create(subsystem, name, function, file, line, color) _jl_timing_event_create(subsystem, name, function, file, line, color) +#define jl_timing_block_start(blk) _jl_timing_block_start(blk) +#define jl_timing_block_end(blk) _jl_timing_block_end(blk) + #ifdef __cplusplus } #endif -#ifdef __cplusplus -#define JL_TIMING_CURRENT_BLOCK (&__timing_block.block) -#else -#define JL_TIMING_CURRENT_BLOCK (&__timing_block) -#endif +#define JL_TIMING_DEFAULT_BLOCK (&__timing_block) -#define JL_TIMING_OWNERS \ +#define JL_TIMING_SUBSYSTEMS \ X(ROOT) \ X(GC) \ X(LOWERING) \ @@ -170,27 +183,6 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); X(JULIA_INIT) \ -#define JL_TIMING_EVENTS \ - JL_TIMING_OWNERS \ - X(GC_Stop) \ - X(GC_Mark) \ - X(GC_Sweep) \ - X(GC_Finalizers) \ - X(CODEGEN_LLVM) \ - X(CODEGEN_Codeinst) \ - X(CODEGEN_Workqueue) \ - X(LOAD_Sysimg) \ - X(LOAD_Pkgimg) \ - X(LOAD_Processor) \ - X(VERIFY_Edges) \ - X(VERIFY_Methods) \ - X(VERIFY_Graph) \ - X(STACKWALK_Backtrace) \ - X(STACKWALK_Excstack) \ - X(NATIVE_Dump) \ - X(NATIVE_Create) \ - - #define JL_TIMING_COUNTERS \ X(Invalidations) \ X(HeapSize) \ @@ -200,18 +192,11 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); X(ImageSize) \ -enum jl_timing_owners { +enum jl_timing_subsystem { #define X(name) JL_TIMING_ ## name, - JL_TIMING_OWNERS -#undef X - JL_TIMING_LAST -}; - -enum jl_timing_events { -#define X(name) JL_TIMING_EVENT_ ## name, - JL_TIMING_EVENTS + JL_TIMING_SUBSYSTEMS #undef X - JL_TIMING_EVENT_LAST + JL_TIMING_SUBSYSTEM_LAST }; enum jl_timing_counter_types { @@ -221,27 +206,42 @@ enum jl_timing_counter_types { JL_TIMING_COUNTER_LAST }; -/** - * Timing back-ends differ in terms of whether they support nested - * and asynchronous events. - **/ +#define TIMING_XCONCAT(x1, x2) x1##x2 +#define TIMING_CONCAT(x1, x2) TIMING_XCONCAT(x1, x2) /** * Timing Backend: Aggregated timing counts (implemented in timing.c) **/ +typedef struct jl_timing_counts_event_t { + const char *name; + _Atomic(uint64_t) self; + _Atomic(uint64_t) total; +} jl_timing_counts_event_t; + +typedef struct _jl_timing_counts_t { + uint64_t total; + uint64_t start; + uint64_t t0; +#ifdef JL_DEBUG_BUILD + uint8_t running; +#endif +} jl_timing_counts_t; + #ifdef USE_TIMING_COUNTS -#define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; -#define _COUNTS_CTOR(block, owner) _jl_timing_counts_ctor(block, owner) -#define _COUNTS_DESTROY(block) _jl_timing_counts_destroy(block) -#define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) -#define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) +#define _COUNTS_EVENT_MEMBER jl_timing_counts_event_t *counts_event; +#define _COUNTS_BLOCK_MEMBER jl_timing_counts_t counts_ctx; +#define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) +#define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) +#define _COUNTS_PAUSE(block, t) _jl_timing_counts_pause(block, t) +#define _COUNTS_RESUME(block, t) _jl_timing_counts_resume(block, t) #else -#define _COUNTS_CTX_MEMBER -#define _COUNTS_CTOR(block, owner) -#define _COUNTS_DESTROY(block) +#define _COUNTS_EVENT_MEMBER +#define _COUNTS_BLOCK_MEMBER #define _COUNTS_START(block, t) #define _COUNTS_STOP(block, t) +#define _COUNTS_PAUSE(block, t) +#define _COUNTS_RESUME(block, t) #endif /** @@ -249,122 +249,74 @@ enum jl_timing_counter_types { **/ #ifdef USE_TRACY -#define _TRACY_CTX_MEMBER TracyCZoneCtx *tracy_ctx; -#define _TRACY_CTOR(context, name, enable) TracyCZoneN(__tracy_ctx, name, (enable)); \ - (context) = &__tracy_ctx -#define _TRACY_DESTROY(ctx) TracyCZoneEnd(*ctx) +#define _TRACY_EVENT_MEMBER TracySrcLocData tracy_srcloc; +#define _TRACY_BLOCK_MEMBER TracyCZoneCtx tracy_ctx; +#define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( &(block)->event->tracy_srcloc, 1 ); +#define _TRACY_STOP(ctx) TracyCZoneEnd(ctx) #else -#define _TRACY_CTX_MEMBER -#define _TRACY_CTOR(context, name, enable) -#define _TRACY_DESTROY(block) +#define _TRACY_EVENT_MEMBER +#define _TRACY_BLOCK_MEMBER +#define _TRACY_START(block) +#define _TRACY_STOP(ctx) #endif +/** + * Timing Backend: Intel VTune (ITTAPI) + **/ + #ifdef USE_ITTAPI -#define _ITTAPI_CTX_MEMBER int owner; int event; -#define _ITTAPI_CTOR(block, owner, event) block->owner = owner; block->event = event -#define _ITTAPI_START(block) if (_jl_timing_enabled(block->owner)) __itt_event_start(jl_timing_ittapi_events[block->event]) -#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->owner)) __itt_event_end(jl_timing_ittapi_events[block->event]) +#define _ITTAPI_EVENT_MEMBER __itt_event ittapi_event; +#define _ITTAPI_BLOCK_MEMBER +#define _ITTAPI_START(block) __itt_event_start((block)->event->ittapi_event) +#define _ITTAPI_STOP(block) __itt_event_end((block)->event->ittapi_event) #else -#define _ITTAPI_CTX_MEMBER -#define _ITTAPI_CTOR(block, owner, event) +#define _ITTAPI_EVENT_MEMBER +#define _ITTAPI_BLOCK_MEMBER #define _ITTAPI_START(block) #define _ITTAPI_STOP(block) #endif /** - * Implementation: Aggregated counts back-end + * Top-level jl_timing implementation **/ -extern JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST]; -typedef struct _jl_timing_counts_t { - uint64_t total; - uint64_t t0; - int owner; -#ifdef JL_DEBUG_BUILD - uint8_t running; -#endif -} jl_timing_counts_t; - -STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - assert(block->running); - block->running = 0; -#endif - block->total += t - block->t0; -} - -STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - assert(!block->running); - block->running = 1; -#endif - block->t0 = t; -} - -STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block, int owner) JL_NOTSAFEPOINT { - block->owner = owner; - block->total = 0; -#ifdef JL_DEBUG_BUILD - block->running = 0; -#endif -} - -STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block) JL_NOTSAFEPOINT { - jl_timing_counts[block->owner] += block->total; -} +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; +extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST]; /** - * Top-level jl_timing implementation + * Stores all static attributes associated with a profiling event. + * + * A single event can be used to create many timing blocks with + * the same name/source information. **/ +struct _jl_timing_event_t { // typedef in julia.h + _TRACY_EVENT_MEMBER + _ITTAPI_EVENT_MEMBER + _COUNTS_EVENT_MEMBER -extern JL_DLLEXPORT uint64_t jl_timing_enable_mask; -extern const char *jl_timing_names[(int)JL_TIMING_LAST]; -#ifdef USE_ITTAPI -extern JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST]; -#endif + int subsystem; +}; +/** + * Stores all dynamic attributes associated with a timing block. + * + * Every time the application enters an instrumented block of code, + * a new timing block is created. A timing block corresponds to one + * "span" of time in the profiler. + **/ struct _jl_timing_block_t { // typedef in julia.h struct _jl_timing_block_t *prev; - _TRACY_CTX_MEMBER - _ITTAPI_CTX_MEMBER - _COUNTS_CTX_MEMBER -}; + jl_timing_event_t *event; -STATIC_INLINE int _jl_timing_enabled(int event) JL_NOTSAFEPOINT { - return !!(jl_timing_enable_mask & (1 << event)); -} + _TRACY_BLOCK_MEMBER + _ITTAPI_BLOCK_MEMBER + _COUNTS_BLOCK_MEMBER -STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, int event) JL_NOTSAFEPOINT { - uint64_t t = cycleclock(); (void)t; - _COUNTS_CTOR(&block->counts_ctx, owner); - _COUNTS_START(&block->counts_ctx, t); - _ITTAPI_CTOR(block, owner, event); - _ITTAPI_START(block); - - jl_task_t *ct = jl_current_task; - jl_timing_block_t **prevp = &ct->ptls->timing_stack; - block->prev = *prevp; - if (block->prev) { - _COUNTS_STOP(&block->prev->counts_ctx, t); - } - *prevp = block; -} + uint8_t is_running; +}; -STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { - uint64_t t = cycleclock(); (void)t; - - _ITTAPI_STOP(block); - _COUNTS_STOP(&block->counts_ctx, t); - _COUNTS_DESTROY(&block->counts_ctx); - _TRACY_DESTROY(block->tracy_ctx); - - jl_task_t *ct = jl_current_task; - jl_timing_block_t **pcur = &ct->ptls->timing_stack; - assert(*pcur == block); - *pcur = block->prev; - if (block->prev) { - _COUNTS_START(&block->prev->counts_ctx, t); - } +STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT { + return (jl_atomic_load_relaxed(jl_timing_disable_mask + subsystem / (sizeof(uint64_t) * CHAR_BIT)) & (1 << (subsystem % (sizeof(uint64_t) * CHAR_BIT)))) == 0; } typedef struct _jl_timing_suspend_t { @@ -384,51 +336,24 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N #endif } -#ifdef __cplusplus -struct jl_timing_block_cpp_t { - jl_timing_block_t block; - jl_timing_block_cpp_t(int owner, int event) JL_NOTSAFEPOINT { - _jl_timing_block_ctor(&block, owner, event); - } - ~jl_timing_block_cpp_t() JL_NOTSAFEPOINT { - _jl_timing_block_destroy(&block); - } - jl_timing_block_cpp_t(const jl_timing_block_cpp_t&) = delete; - jl_timing_block_cpp_t(const jl_timing_block_cpp_t&&) = delete; - jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &) = delete; - jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &&) = delete; -}; -#define JL_TIMING(subsystem, event) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ - _TRACY_CTOR(__timing_block.block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1) -#else -#define JL_TIMING(subsystem, event) \ - __attribute__((cleanup(_jl_timing_block_destroy))) \ - jl_timing_block_t __timing_block; \ - _jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ - _TRACY_CTOR(__timing_block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1) -#endif - -#ifdef __cplusplus -struct jl_timing_suspend_cpp_t { - jl_timing_suspend_t suspend; - jl_timing_suspend_cpp_t(const char *subsystem, jl_task_t *ct) JL_NOTSAFEPOINT { - _jl_timing_suspend_ctor(&suspend, subsystem, ct); - } - ~jl_timing_suspend_cpp_t() JL_NOTSAFEPOINT { - _jl_timing_suspend_destroy(&suspend); - } - jl_timing_suspend_cpp_t(const jl_timing_suspend_cpp_t &) = delete; - jl_timing_suspend_cpp_t(jl_timing_suspend_cpp_t &&) = delete; - jl_timing_suspend_cpp_t& operator=(const jl_timing_suspend_cpp_t &) = delete; - jl_timing_suspend_cpp_t& operator=(jl_timing_suspend_cpp_t &&) = delete; -}; -#define JL_TIMING_SUSPEND(subsystem, ct) jl_timing_suspend_cpp_t __suspend_block(#subsystem, ct) -#else -#define JL_TIMING_SUSPEND(subsystem, ct) \ +#define JL_TIMING(subsystem, event) \ + JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event); \ + jl_timing_block_start(&__timing_block) + +#define JL_TIMING_CREATE_BLOCK(block, subsystem_name, event_name) \ + static jl_timing_event_t *TIMING_CONCAT(__timing_event, __LINE__) = 0; \ + if (!TIMING_CONCAT(__timing_event, __LINE__)) \ + TIMING_CONCAT(__timing_event, __LINE__) = jl_timing_event_create( \ + #subsystem_name, #event_name, __func__, __FILE__, __LINE__, 0 \ + ); \ + __attribute__((cleanup(_jl_timing_block_end))) \ + jl_timing_block_t block = { 0 }; \ + block.event = TIMING_CONCAT(__timing_event, __LINE__) + +#define JL_TIMING_SUSPEND_TASK(subsystem, ct) \ __attribute__((cleanup(_jl_timing_suspend_destroy))) \ jl_timing_suspend_t __timing_suspend; \ _jl_timing_suspend_ctor(&__timing_suspend, #subsystem, ct) -#endif // Counting #ifdef USE_ITTAPI diff --git a/src/toplevel.c b/src/toplevel.c index 200d0ad220231..51ff93488426f 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -65,7 +65,7 @@ static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROO void jl_module_run_initializer(jl_module_t *m) { JL_TIMING(INIT_MODULE, INIT_MODULE); - jl_timing_show_module(m, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_module(m, JL_TIMING_DEFAULT_BLOCK); jl_function_t *f = jl_module_get_initializer(m); if (f == NULL) return; @@ -185,17 +185,28 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex size_t last_age = ct->world_age; // add standard imports unless baremodule + jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args; + int lineno = 0; + const char *filename = "none"; + if (jl_array_len(exprs) > 0) { + jl_value_t *lineex = jl_array_ptr_ref(exprs, 0); + if (jl_is_linenode(lineex)) { + lineno = jl_linenode_line(lineex); + jl_value_t *file = jl_linenode_file(lineex); + if (jl_is_symbol(file)) + filename = jl_symbol_name((jl_sym_t*)file); + } + } if (std_imports) { if (jl_base_module != NULL) { jl_add_standard_imports(newm); } // add `eval` function - form = jl_call_scm_on_ast("module-default-defs", (jl_value_t*)ex, newm); + form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno); jl_toplevel_eval_flex(newm, form, 0, 1); form = NULL; } - jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args; for (int i = 0; i < jl_array_len(exprs); i++) { // process toplevel form ct->world_age = jl_atomic_load_acquire(&jl_world_counter); @@ -450,6 +461,9 @@ static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED { + JL_TIMING(LOAD_IMAGE, LOAD_Require); + jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "%s", jl_symbol_name(var)); + static jl_value_t *require_func = NULL; int build_mode = jl_generating_output(); jl_module_t *m = NULL; @@ -642,19 +656,28 @@ static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword jl_errorf("cannot rename non-macro \"%s\" to macro \"%s\" in \"%s\"", n1, n2, keyword); } -// Format msg and eval `throw(ErrorException(msg)))` in module `m`. -// Used in `jl_toplevel_eval_flex` instead of `jl_errorf` so that the error +// Eval `throw(ErrorException(msg)))` in module `m`. +// Used in `jl_toplevel_eval_flex` instead of `jl_throw` so that the error // location in julia code gets into the backtrace. -static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...) +static void jl_eval_throw(jl_module_t *m, jl_value_t *exc) { jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2); JL_GC_PUSH1(&throw_ex); jl_exprargset(throw_ex, 0, jl_builtin_throw); + jl_exprargset(throw_ex, 1, exc); + jl_toplevel_eval_flex(m, throw_ex, 0, 0); + JL_GC_POP(); +} + +// Format error message and call jl_eval +static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...) +{ va_list args; va_start(args, fmt); - jl_exprargset(throw_ex, 1, jl_vexceptionf(jl_errorexception_type, fmt, args)); + jl_value_t *exc = jl_vexceptionf(jl_errorexception_type, fmt, args); va_end(args); - jl_toplevel_eval_flex(m, throw_ex, 0, 0); + JL_GC_PUSH1(&exc); + jl_eval_throw(m, exc); JL_GC_POP(); } @@ -861,7 +884,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head)); if (jl_is_string(jl_exprarg(ex, 0))) jl_eval_errorf(m, "syntax: %s", jl_string_data(jl_exprarg(ex, 0))); - jl_throw(jl_exprarg(ex, 0)); + jl_eval_throw(m, jl_exprarg(ex, 0)); } else if (jl_is_symbol(ex)) { JL_GC_POP(); diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl index 47812fb993428..70593bfadae05 100644 --- a/stdlib/Artifacts/src/Artifacts.jl +++ b/stdlib/Artifacts/src/Artifacts.jl @@ -654,18 +654,13 @@ access a single file/directory within an artifact. Example: !!! compat "Julia 1.6" Slash-indexing requires at least Julia 1.6. """ -macro artifact_str(name, platform=nothing, artifacts_toml_path=nothing) +macro artifact_str(name, platform=nothing) # Find Artifacts.toml file we're going to load from srcfile = string(__source__.file) if ((isinteractive() && startswith(srcfile, "REPL[")) || (!isinteractive() && srcfile == "none")) && !isfile(srcfile) srcfile = pwd() end - # Sometimes we know the exact path to the Artifacts.toml file, so we can save some lookups - local artifacts_toml = if artifacts_toml_path === nothing || artifacts_toml_path == :(nothing) - find_artifacts_toml(srcfile) - else - eval(artifacts_toml_path) - end + local artifacts_toml = find_artifacts_toml(srcfile) if artifacts_toml === nothing error(string( "Cannot locate '(Julia)Artifacts.toml' file when attempting to use artifact '", @@ -695,7 +690,7 @@ macro artifact_str(name, platform=nothing, artifacts_toml_path=nothing) # If `name` is a constant, (and we're using the default `Platform`) we can actually load # and parse the `Artifacts.toml` file now, saving the work from runtime. - if isa(name, AbstractString) && (platform === nothing || platform == :(nothing)) + if isa(name, AbstractString) && platform === nothing # To support slash-indexing, we need to split the artifact name from the path tail: platform = HostPlatform() artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform) diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl index 248d851ccad79..67117217be549 100644 --- a/stdlib/Artifacts/test/runtests.jl +++ b/stdlib/Artifacts/test/runtests.jl @@ -91,9 +91,6 @@ end HelloWorldC_exe_path = joinpath(HelloWorldC_dir, "bin", "hello_world$(exeext)") @test isfile(HelloWorldC_exe_path) - HelloWorldC_dir_explicit_artifact = eval(:(@artifact_str "HelloWorldC" nothing joinpath(@__DIR__, "Artifacts.toml"))) - @test isdir(HelloWorldC_dir_explicit_artifact) - # Simple slash-indexed lookup HelloWorldC_bin_path = artifact"HelloWorldC/bin" @test isdir(HelloWorldC_bin_path) diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml index fc5883cc79802..4c7aa35a99730 100644 --- a/stdlib/CompilerSupportLibraries_jll/Project.toml +++ b/stdlib/CompilerSupportLibraries_jll/Project.toml @@ -4,7 +4,7 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" # NOTE: When updating this, also make sure to update the value # `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable # automatic usage of BB-built CSLs on extremely up-to-date systems! -version = "1.0.2+0" +version = "1.0.5+0" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl index 3fd3d63108297..d8cc052967d50 100644 --- a/stdlib/Distributed/src/cluster.jl +++ b/stdlib/Distributed/src/cluster.jl @@ -1317,6 +1317,28 @@ end write_cookie(io::IO) = print(io.in, string(cluster_cookie(), "\n")) +function get_threads_spec(opts) + if opts.nthreads > 0 + @assert opts.nthreadpools >= 1 + @assert opts.nthreads_per_pool != C_NULL + thr = "$(unsafe_load(opts.nthreads_per_pool))" + if opts.nthreadpools == 2 + thr = "$(thr),$(unsafe_load(opts.nthreads_per_pool, 2))" + end + `--threads=$(thr)` + else + `` + end +end + +function get_gcthreads_spec(opts) + if opts.nmarkthreads > 0 || opts.nsweepthreads > 0 + `--gcthreads=$(opts.nmarkthreads),$(opts.nsweepthreads)` + else + `` + end +end + # Starts workers specified by (-n|--procs) and --machine-file command line options function process_opts(opts) # startup worker. @@ -1331,8 +1353,9 @@ function process_opts(opts) end # Propagate --threads to workers - threads = opts.nthreads > 0 ? `--threads=$(opts.nthreads)` : `` - gcthreads = opts.ngcthreads > 0 ? `--gcthreads=$(opts.ngcthreads)` : `` + threads = get_threads_spec(opts) + # Propagate --gcthreads to workers + gcthreads = get_gcthreads_spec(opts) exeflags = `$threads $gcthreads` diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md index 6c332511f578f..a420d49232345 100644 --- a/stdlib/FileWatching/docs/src/index.md +++ b/stdlib/FileWatching/docs/src/index.md @@ -20,6 +20,7 @@ A simple utility tool for creating advisory pidfiles (lock files). ```@docs mkpidlock +trymkpidlock close(lock::LockMonitor) ``` diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 17ae24460db6b..2a654547ae6e3 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -18,7 +18,8 @@ export PollingFileWatcher, FDWatcher, # pidfile: - mkpidlock + mkpidlock, + trymkpidlock import Base: @handle_as, wait, close, eventloop, notify_error, IOError, _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError, @@ -462,6 +463,11 @@ function __init__() global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid})) global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) + + Base.mkpidlock_hook = mkpidlock + Base.trymkpidlock_hook = trymkpidlock + Base.parse_pidfile_hook = Pidfile.parse_pidfile + nothing end @@ -885,6 +891,6 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R end include("pidfile.jl") -import .Pidfile: mkpidlock +import .Pidfile: mkpidlock, trymkpidlock end diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 8416765a57b97..6d40414e20db2 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -1,7 +1,7 @@ module Pidfile -export mkpidlock +export mkpidlock, trymkpidlock using Base: IOError, UV_EEXIST, UV_ESRCH, @@ -31,7 +31,7 @@ your program, so the `finalizer` does not reclaim it early. Optional keyword arguments: - `mode`: file access mode (modified by the process umask). Defaults to world-readable. - `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work) - - `stale_age`: Delete an existing pidfile (ignoring the lock) if its mtime is older than this. + - `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime. The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid. By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an estimated normal completion time. @@ -41,6 +41,16 @@ Optional keyword arguments: """ function mkpidlock end +""" + trymkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...) + +Like `mkpidlock` except returns `false` instead of waiting if the file is already locked. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +""" +function trymkpidlock end # mutable only because we want to add a finalizer mutable struct LockMonitor @@ -95,6 +105,18 @@ function mkpidlock(at::String, proc::Process; kwopts...) return lock end +function trymkpidlock(args...; kwargs...) + try + mkpidlock(args...; kwargs..., wait=false) + catch ex + if ex isa PidlockedError + return false + else + rethrow() + end + end +end + """ Base.touch(::Pidfile.LockMonitor) @@ -192,8 +214,12 @@ function tryopen_exclusive(path::String, mode::Integer = 0o444) return nothing end +struct PidlockedError <: Exception + msg::AbstractString +end + """ - open_exclusive(path::String; mode, poll_interval, stale_age) :: File + open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File Create a new a file for read-write advisory-exclusive access. If `wait` is `false` then error out if the lock files exist @@ -218,7 +244,7 @@ function open_exclusive(path::String; file = tryopen_exclusive(path, mode) end if file === nothing - error("Failed to get pidfile lock for $(repr(path)).") + throw(PidlockedError("Failed to get pidfile lock for $(repr(path)).")) else return file end diff --git a/stdlib/FileWatching/test/pidfile.jl b/stdlib/FileWatching/test/pidfile.jl index 94621f6af78e3..c2cb0c88a1b1e 100644 --- a/stdlib/FileWatching/test/pidfile.jl +++ b/stdlib/FileWatching/test/pidfile.jl @@ -180,14 +180,14 @@ end Base.errormonitor(rmtask) t1 = time() - @test_throws ErrorException open_exclusive("pidfile", wait=false) + @test_throws Pidfile.PidlockedError open_exclusive("pidfile", wait=false) @test time()-t1 ≈ 0 atol=1 sleep(1) @test !deleted t1 = time() - @test_throws ErrorException open_exclusive("pidfile", wait=false) + @test_throws Pidfile.PidlockedError open_exclusive("pidfile", wait=false) @test time()-t1 ≈ 0 atol=1 wait(rmtask) @@ -246,7 +246,7 @@ end Base.errormonitor(waittask) # mkpidlock with no waiting - t = @elapsed @test_throws ErrorException mkpidlock("pidfile", wait=false) + t = @elapsed @test_throws Pidfile.PidlockedError mkpidlock("pidfile", wait=false) @test t ≈ 0 atol=1 t = @elapsed lockf1 = mkpidlock(joinpath(dir, "pidfile")) @@ -354,7 +354,7 @@ end @test lockf.update === nothing sleep(1) - t = @elapsed @test_throws ErrorException mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0) + t = @elapsed @test_throws Pidfile.PidlockedError mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0) @test t ≈ 0 atol=1 sleep(5) diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl index b0bf24e0d1379..25f06250c3f8f 100644 --- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl +++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl @@ -12,7 +12,7 @@ import Base.Docs.apropos using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, show_unquoted, summarysize, signature_type, format_bytes - +using Base.Libc using Markdown include("editless.jl") @@ -21,7 +21,7 @@ include("macros.jl") include("clipboard.jl") """ - varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, recursive::Bool = false, sortby::Symbol = :name, minsize::Int = 0) + varinfo(m::Module=Main, pattern::Regex=r""; all=false, imported=false, recursive=false, sortby::Symbol=:name, minsize::Int=0) Return a markdown table giving information about exported global variables in a module, optionally restricted to those matching `pattern`. @@ -37,7 +37,7 @@ The memory consumption estimate is an approximate lower bound on the size of the The output of `varinfo` is intended for display purposes only. See also [`names`](@ref) to get an array of symbols defined in a module, which is suitable for more general manipulations. """ -function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0) +function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, recursive::Bool = false, sortby::Symbol = :name, minsize::Int=0) sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`")) rows = Vector{Any}[] workqueue = [(m, ""),] @@ -301,7 +301,7 @@ end # TODO: @deprecate peakflops to LinearAlgebra export peakflops """ - peakflops(n::Integer=2000; parallel::Bool=false) + peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) `peakflops` computes the peak flop rate of the computer by using double precision [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). For more information see @@ -311,12 +311,12 @@ export peakflops This function will be moved from `InteractiveUtils` to `LinearAlgebra` in the future. In Julia 1.1 and later it is available as `LinearAlgebra.peakflops`. """ -function peakflops(n::Integer=2000; parallel::Bool=false) - # Base.depwarn("`peakflop`s have moved to the LinearAlgebra module, " * +function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) + # Base.depwarn("`peakflops` has moved to the LinearAlgebra module, " * # "add `using LinearAlgebra` to your imports.", :peakflops) let LinearAlgebra = Base.require(Base.PkgId( Base.UUID((0x37e2e46d_f89d_539d,0xb4ee_838fcccc9c8e)), "LinearAlgebra")) - return LinearAlgebra.peakflops(n; parallel = parallel) + return LinearAlgebra.peakflops(n, eltype=eltype, ntrials=ntrials, parallel=parallel) end end diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl index a4a5118acf8d7..c2abda9a60cc3 100644 --- a/stdlib/InteractiveUtils/src/clipboard.jl +++ b/stdlib/InteractiveUtils/src/clipboard.jl @@ -100,7 +100,7 @@ elseif Sys.iswindows() pdata == C_NULL && return cleanup(:GlobalAlloc) plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata) plock == C_NULL && return cleanup(:GlobalLock) - ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16}, Ptr{UInt16}, Csize_t), plock, x_u16, sizeof(x_u16)) + GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, x_u16), sizeof(x_u16)) unlock = ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata) (unlock == 0 && Libc.GetLastError() == 0) || return cleanup(:GlobalUnlock) # this should never fail pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata) # CF_UNICODETEXT diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl index 29a64343b8370..646028575d052 100644 --- a/stdlib/InteractiveUtils/src/codeview.jl +++ b/stdlib/InteractiveUtils/src/codeview.jl @@ -167,10 +167,18 @@ const OC_MISMATCH_WARNING = """ # Printing code representations in IR and assembly + +function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool, + raw::Bool, dump_module::Bool, syntax::Symbol, + optimize::Bool, debuginfo::Symbol, binary::Bool) + params = CodegenParams(debug_info_kind=Cint(0), + safepoint_on_entry=raw, gcstack_arg=raw) + _dump_function(f, t, native, wrapper, raw, dump_module, syntax, + optimize, debuginfo, binary, params) +end function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool, - strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol, - optimize::Bool, debuginfo::Symbol, binary::Bool, - params::CodegenParams=CodegenParams(debug_info_kind=Cint(0))) + raw::Bool, dump_module::Bool, syntax::Symbol, + optimize::Bool, debuginfo::Symbol, binary::Bool, params::CodegenParams) ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions") if isa(f, Core.Builtin) throw(ArgumentError("argument is not a generic function")) @@ -180,21 +188,21 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe if !isa(f, Core.OpaqueClosure) world = Base.get_world_counter() match = Base._which(signature_type(f, t); world) - linfo = Core.Compiler.specialize_method(match) + mi = Core.Compiler.specialize_method(match) # TODO: use jl_is_cacheable_sig instead of isdispatchtuple - isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING) + isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING) else world = UInt64(f.world) if Core.Compiler.is_source_inferred(f.source.source) # OC was constructed from inferred source. There's only one # specialization and we can't infer anything more precise either. world = f.source.primary_world - linfo = f.source.specializations::Core.MethodInstance + mi = f.source.specializations::Core.MethodInstance Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING) else - linfo = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec()) - actual = isdispatchtuple(linfo.specTypes) - isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING) + mi = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec()) + actual = isdispatchtuple(mi.specTypes) + isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING) end end # get the code for it @@ -208,21 +216,25 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe throw(ArgumentError("'syntax' must be either :intel or :att")) end if dump_module - str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary, params) + # we want module metadata, so use LLVM to generate assembly output + str = _dump_function_native_assembly(mi, world, wrapper, syntax, debuginfo, binary, raw, params) else - str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary) + # if we don't want the module metadata, just disassemble what our JIT has + str = _dump_function_native_disassembly(mi, world, wrapper, syntax, debuginfo, binary) end else - str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params) + str = _dump_function_llvm(mi, world, wrapper, !raw, dump_module, optimize, debuginfo, params) end str = warning * str return str end -function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool) - str = ccall(:jl_dump_method_asm, Ref{String}, - (Any, UInt, Bool, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool), - linfo, world, false, wrapper, syntax, debuginfo, binary) +function _dump_function_native_disassembly(mi::Core.MethodInstance, world::UInt, + wrapper::Bool, syntax::Symbol, + debuginfo::Symbol, binary::Bool) + str = @ccall jl_dump_method_asm(mi::Any, world::UInt, false::Bool, wrapper::Bool, + syntax::Ptr{UInt8}, debuginfo::Ptr{UInt8}, + binary::Bool)::Ref{String} return str end @@ -231,27 +243,30 @@ struct LLVMFDump f::Ptr{Cvoid} # opaque end -function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool, params::CodegenParams) +function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt, + wrapper::Bool, syntax::Symbol, debuginfo::Symbol, + binary::Bool, raw::Bool, params::CodegenParams) llvmf_dump = Ref{LLVMFDump}() - ccall(:jl_get_llvmf_defn, Cvoid, (Ptr{LLVMFDump}, Any, UInt, Bool, Bool, CodegenParams), llvmf_dump, linfo, world, wrapper, true, params) + @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump},mi::Any, world::UInt, wrapper::Bool, + true::Bool, params::CodegenParams)::Cvoid llvmf_dump[].f == C_NULL && error("could not compile the specified method") - str = ccall(:jl_dump_function_asm, Ref{String}, - (Ptr{LLVMFDump}, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool), - llvmf_dump, false, syntax, debuginfo, binary) + str = @ccall jl_dump_function_asm(llvmf_dump::Ptr{LLVMFDump}, false::Bool, + syntax::Ptr{UInt8}, debuginfo::Ptr{UInt8}, + binary::Bool, raw::Bool)::Ref{String} return str end -function _dump_function_linfo_llvm( - linfo::Core.MethodInstance, world::UInt, wrapper::Bool, +function _dump_function_llvm( + mi::Core.MethodInstance, world::UInt, wrapper::Bool, strip_ir_metadata::Bool, dump_module::Bool, optimize::Bool, debuginfo::Symbol, params::CodegenParams) llvmf_dump = Ref{LLVMFDump}() - ccall(:jl_get_llvmf_defn, Cvoid, (Ptr{LLVMFDump}, Any, UInt, Bool, Bool, CodegenParams), llvmf_dump, linfo, world, wrapper, optimize, params) + @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, world::UInt, + wrapper::Bool, optimize::Bool, params::CodegenParams)::Cvoid llvmf_dump[].f == C_NULL && error("could not compile the specified method") - str = ccall(:jl_dump_function_ir, Ref{String}, - (Ptr{LLVMFDump}, Bool, Bool, Ptr{UInt8}), - llvmf_dump, strip_ir_metadata, dump_module, debuginfo) + str = @ccall jl_dump_function_ir(llvmf_dump::Ptr{LLVMFDump}, strip_ir_metadata::Bool, + dump_module::Bool, debuginfo::Ptr{UInt8})::Ref{String} return str end @@ -268,7 +283,7 @@ Keyword argument `debuginfo` may be one of source (default) or none, to specify """ function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) - d = _dump_function(f, types, false, false, !raw, dump_module, :intel, optimize, debuginfo, false) + d = _dump_function(f, types, false, false, raw, dump_module, :intel, optimize, debuginfo, false) if highlighting[:llvm] && get(io, :color, false)::Bool print_llvm(io, d) else @@ -290,20 +305,22 @@ generic function and type signature to `io`. * Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`. * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address. * If `dump_module` is `false`, do not print metadata such as rodata or directives. +* If `raw` is `false`, uninteresting instructions (like the safepoint function prologue) are elided. See also: [`@code_native`](@ref), [`code_llvm`](@ref), [`code_typed`](@ref) and [`code_lowered`](@ref) """ function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); - dump_module::Bool=true, syntax::Symbol=:intel, debuginfo::Symbol=:default, binary::Bool=false) - d = _dump_function(f, types, true, false, false, dump_module, syntax, true, debuginfo, binary) + dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false, + debuginfo::Symbol=:default, binary::Bool=false) + d = _dump_function(f, types, true, false, raw, dump_module, syntax, true, debuginfo, binary) if highlighting[:native] && get(io, :color, false)::Bool print_native(io, d) else print(io, d) end end -code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:intel, debuginfo::Symbol=:default, binary::Bool=false) = - code_native(stdout, f, types; dump_module, syntax, debuginfo, binary) +code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false, debuginfo::Symbol=:default, binary::Bool=false) = + code_native(stdout, f, types; dump_module, syntax, raw, debuginfo, binary) code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call ## colorized IR and assembly printing diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl index 1ea6c797d1636..0b653f9b6ad21 100644 --- a/stdlib/LibGit2/src/types.jl +++ b/stdlib/LibGit2/src/types.jl @@ -904,12 +904,13 @@ end Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_config_entry) struct. """ -@kwdef struct ConfigEntry - name::Cstring = Cstring(C_NULL) - value::Cstring = Cstring(C_NULL) - level::GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT - free::Ptr{Cvoid} = C_NULL - payload::Any = nothing +struct ConfigEntry + name::Cstring + value::Cstring + include_depth::Cuint + level::GIT_CONFIG + free::Ptr{Cvoid} + payload::Ptr{Cvoid} # User is not permitted to read or write this field end @assert Base.allocatedinline(ConfigEntry) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index a29c259dae607..386de771d666f 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -9,14 +9,14 @@ module LinearAlgebra import Base: \, /, *, ^, +, -, == import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech, - asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, conj, convert, copy, copyto!, - copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat, - getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, kron, kron!, - length, log, map, ndims, one, oneunit, parent, permutedims, power_by_squaring, - print_matrix, promote_rule, real, round, sec, sech, setindex!, show, similar, sin, + asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, collect, conj, convert, copy, + copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, + getindex, hcat, getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, + kron, kron!, length, log, map, ndims, one, oneunit, parent, permutedims, + power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin, sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec, view, zero -using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, +using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix, @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing, splat using Base.Broadcast: Broadcasted, broadcasted @@ -457,6 +457,34 @@ const ⋅ = dot const × = cross export ⋅, × +wrapper_char(::AbstractArray) = 'N' +wrapper_char(::Adjoint) = 'C' +wrapper_char(::Adjoint{<:Real}) = 'T' +wrapper_char(::Transpose) = 'T' +wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h' +wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's' +wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's' + +function wrap(A::AbstractVecOrMat, tA::AbstractChar) + if tA == 'N' + return A + elseif tA == 'T' + return transpose(A) + elseif tA == 'C' + return adjoint(A) + elseif tA == 'H' + return Hermitian(A, :U) + elseif tA == 'h' + return Hermitian(A, :L) + elseif tA == 'S' + return Symmetric(A, :U) + else # tA == 's' + return Symmetric(A, :L) + end +end + +_unwrap(A::AbstractVecOrMat) = A + ## convenience methods ## return only the solution of a least squares problem while avoiding promoting ## vectors to matrices. @@ -557,14 +585,20 @@ end ldiv(F, B) """ - LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false) + LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) `peakflops` computes the peak flop rate of the computer by using double precision [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it -multiplies a matrix of size `n x n`, where `n = 2000`. If the underlying BLAS is using +multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using multiple threads, higher flop rates are realized. The number of BLAS threads can be set with [`BLAS.set_num_threads(n)`](@ref). +If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements +of type `eltype` for calculating the peak flop rate. + +By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument +is provided, `peakflops` will use those many trials for picking the best timing. + If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all the worker processors. The flop rate of the entire parallel computer is returned. When running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size @@ -574,19 +608,21 @@ of the problem that is solved on each processor. This function requires at least Julia 1.1. In Julia 1.0 it is available from the standard library `InteractiveUtils`. """ -function peakflops(n::Integer=2000; parallel::Bool=false) - a = fill(1.,100,100) - t = @elapsed a2 = a*a - a = fill(1.,n,n) - t = @elapsed a2 = a*a - @assert a2[1,1] == n +function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) + t = zeros(Float64, ntrials) + for i=1:ntrials + a = ones(eltype,n,n) + t[i] = @elapsed a2 = a*a + @assert a2[1,1] == n + end + if parallel let Distributed = Base.require(Base.PkgId( Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed")) return sum(Distributed.pmap(peakflops, fill(n, Distributed.nworkers()))) end else - return 2*Float64(n)^3 / t + return 2*Float64(n)^3 / minimum(t) end end diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl index 88610dac2e6f6..93358d052d50b 100644 --- a/stdlib/LinearAlgebra/src/abstractq.jl +++ b/stdlib/LinearAlgebra/src/abstractq.jl @@ -35,6 +35,7 @@ convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)' # ... to matrix +collect(Q::AbstractQ) = copyto!(Matrix{eltype(Q)}(undef, size(Q)), Q) Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ)))) Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q) @@ -56,6 +57,15 @@ function size(Q::AbstractQ, dim::Integer) end size(adjQ::AdjointQ) = reverse(size(adjQ.Q)) +# comparison +(==)(Q::AbstractQ, A::AbstractMatrix) = lmul!(Q, Matrix{eltype(Q)}(I, size(A))) == A +(==)(A::AbstractMatrix, Q::AbstractQ) = Q == A +(==)(Q::AbstractQ, P::AbstractQ) = Matrix(Q) == Matrix(P) +isapprox(Q::AbstractQ, A::AbstractMatrix; kwargs...) = + isapprox(lmul!(Q, Matrix{eltype(Q)}(I, size(A))), A, kwargs...) +isapprox(A::AbstractMatrix, Q::AbstractQ; kwargs...) = isapprox(Q, A, kwargs...) +isapprox(Q::AbstractQ, P::AbstractQ; kwargs...) = isapprox(Matrix(Q), Matrix(P), kwargs...) + # pseudo-array behaviour, required for indexing with `begin` or `end` axes(Q::AbstractQ) = map(Base.oneto, size(Q)) axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1) @@ -125,14 +135,31 @@ function show(io::IO, ::MIME{Symbol("text/plain")}, Q::AbstractQ) end # multiplication +# generically, treat AbstractQ like a matrix with its definite size +qsize_check(Q::AbstractQ, B::AbstractVecOrMat) = + size(Q, 2) == size(B, 1) || + throw(DimensionMismatch("second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))")) +qsize_check(A::AbstractVecOrMat, Q::AbstractQ) = + size(A, 2) == size(Q, 1) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))")) +qsize_check(Q::AbstractQ, P::AbstractQ) = + size(Q, 2) == size(P, 1) || + throw(DimensionMismatch("second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))")) + (*)(Q::AbstractQ, J::UniformScaling) = Q*J.λ function (*)(Q::AbstractQ, b::Number) T = promote_type(eltype(Q), typeof(b)) lmul!(convert(AbstractQ{T}, Q), Matrix{T}(b*I, size(Q))) end -function (*)(A::AbstractQ, B::AbstractVecOrMat) - T = promote_type(eltype(A), eltype(B)) - lmul!(convert(AbstractQ{T}, A), copy_similar(B, T)) +function (*)(Q::AbstractQ, B::AbstractVector) + T = promote_type(eltype(Q), eltype(B)) + qsize_check(Q, B) + mul!(similar(B, T, size(Q, 1)), convert(AbstractQ{T}, Q), B) +end +function (*)(Q::AbstractQ, B::AbstractMatrix) + T = promote_type(eltype(Q), eltype(B)) + qsize_check(Q, B) + mul!(similar(B, T, (size(Q, 1), size(B, 2))), convert(AbstractQ{T}, Q), B) end (*)(J::UniformScaling, Q::AbstractQ) = J.λ*Q @@ -140,21 +167,28 @@ function (*)(a::Number, Q::AbstractQ) T = promote_type(typeof(a), eltype(Q)) rmul!(Matrix{T}(a*I, size(Q)), convert(AbstractQ{T}, Q)) end -*(a::AbstractVector, Q::AbstractQ) = reshape(a, length(a), 1) * Q +function (*)(A::AbstractVector, Q::AbstractQ) + T = promote_type(eltype(A), eltype(Q)) + qsize_check(A, Q) + return mul!(similar(A, T, length(A)), A, convert(AbstractQ{T}, Q)) +end function (*)(A::AbstractMatrix, Q::AbstractQ) T = promote_type(eltype(A), eltype(Q)) - return rmul!(copy_similar(A, T), convert(AbstractQ{T}, Q)) + qsize_check(A, Q) + return mul!(similar(A, T, (size(A, 1), size(Q, 2))), A, convert(AbstractQ{T}, Q)) end (*)(u::AdjointAbsVec, Q::AbstractQ) = (Q'u')' ### Q*Q (including adjoints) -*(Q::AbstractQ, P::AbstractQ) = Q * (P*I) +(*)(Q::AbstractQ, P::AbstractQ) = Q * (P*I) ### mul! -function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat{T},AbstractQ{T}}) where {T} +function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat,AbstractQ}) where {T} require_one_based_indexing(C, B) - mB = size(B, 1) - mC = size(C, 1) + mB, nB = size(B, 1), size(B, 2) + mC, nC = size(C, 1), size(C, 2) + qsize_check(Q, B) + nB != nC && throw(DimensionMismatch()) if mB < mC inds = CartesianIndices(axes(B)) copyto!(view(C, inds), B) @@ -164,9 +198,21 @@ function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat return lmul!(Q, copyto!(C, B)) end end -mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat{T}, Q::AbstractQ{T}) where {T} = rmul!(copyto!(C, A), Q) -mul!(C::AbstractVecOrMat{T}, adjQ::AdjointQ{T}, B::AbstractVecOrMat{T}) where {T} = lmul!(adjQ, copyto!(C, B)) -mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat{T}, adjQ::AdjointQ{T}) where {T} = rmul!(copyto!(C, A), adjQ) +function mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat, Q::AbstractQ{T}) where {T} + require_one_based_indexing(C, A) + mA, nA = size(A, 1), size(A, 2) + mC, nC = size(C, 1), size(C, 2) + mA != mC && throw(DimensionMismatch()) + qsize_check(A, Q) + if nA < nC + inds = CartesianIndices(axes(A)) + copyto!(view(C, inds), A) + C[CartesianIndices((axes(C, 1), nA+1:nC))] .= zero(T) + return rmul!(C, Q) + else + return rmul!(copyto!(C, A), Q) + end +end ### division \(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A @@ -319,7 +365,7 @@ rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,<:StridedMatrix}) where {T<:BlasF LAPACK.gemqrt!('R', 'N', B.factors, B.T, A) rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,<:StridedMatrix}) where {T<:BlasFloat} = LAPACK.ormqr!('R', 'N', B.factors, B.τ, A) -function rmul!(A::AbstractMatrix, Q::QRPackedQ) +function rmul!(A::AbstractVecOrMat, Q::QRPackedQ) require_one_based_indexing(A) mQ, nQ = size(Q.factors) mA, nA = size(A,1), size(A,2) @@ -354,7 +400,7 @@ rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:Bla (Q = adjQ.Q; LAPACK.ormqr!('R', 'T', Q.factors, Q.τ, A)) rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} = (Q = adjQ.Q; LAPACK.ormqr!('R', 'C', Q.factors, Q.τ, A)) -function rmul!(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:QRPackedQ}) +function rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:QRPackedQ}) require_one_based_indexing(A) Q = adjQ.Q mQ, nQ = size(Q.factors) @@ -459,42 +505,12 @@ lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}} rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')' # flexible left-multiplication (and adjoint right-multiplication) -function (*)(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, b::AbstractVector) - T = promote_type(eltype(Q), eltype(b)) - if size(Q.factors, 1) == length(b) - bnew = copy_similar(b, T) - elseif size(Q.factors, 2) == length(b) - bnew = [b; zeros(T, size(Q.factors, 1) - length(b))] - else - throw(DimensionMismatch("vector must have length either $(size(Q.factors, 1)) or $(size(Q.factors, 2))")) - end - lmul!(convert(AbstractQ{T}, Q), bnew) -end -function (*)(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractMatrix) - T = promote_type(eltype(Q), eltype(B)) - if size(Q.factors, 1) == size(B, 1) - Bnew = copy_similar(B, T) - elseif size(Q.factors, 2) == size(B, 1) - Bnew = [B; zeros(T, size(Q.factors, 1) - size(B,1), size(B, 2))] - else - throw(DimensionMismatch("first dimension of matrix must have size either $(size(Q.factors, 1)) or $(size(Q.factors, 2))")) - end - lmul!(convert(AbstractQ{T}, Q), Bnew) -end -function (*)(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) - Q = adjQ.Q - T = promote_type(eltype(A), eltype(adjQ)) - adjQQ = convert(AbstractQ{T}, adjQ) - if size(A, 2) == size(Q.factors, 1) - AA = copy_similar(A, T) - return rmul!(AA, adjQQ) - elseif size(A, 2) == size(Q.factors, 2) - return rmul!([A zeros(T, size(A, 1), size(Q.factors, 1) - size(Q.factors, 2))], adjQQ) - else - throw(DimensionMismatch("matrix A has dimensions $(size(A)) but Q-matrix B has dimensions $(size(adjQ))")) - end -end -(*)(u::AdjointAbsVec, Q::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) = (Q'u')' +qsize_check(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat) = + size(B, 1) in size(Q.factors) || + throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))")) +qsize_check(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) = + (Q = adjQ.Q; size(A, 2) in size(Q.factors) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))) det(Q::HessenbergQ) = _det_tau(Q.τ) @@ -518,104 +534,41 @@ convert(::Type{AbstractQ{T}}, Q::LQPackedQ) where {T} = LQPackedQ{T}(Q) size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n) ## Multiplication -### QB / QcB -lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B) -lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} = - (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B)) -lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} = - (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B)) +# out-of-place right application of LQPackedQs +# +# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension +# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q), +# and if so effectively apply Q's square form to A without additional shenanigans; and +# (2) if the preceding dimensions do not match, check whether the appropriate dimension of +# A instead matches the number of rows of the matrix of which Q is a factor (i.e. +# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending +# A as necessary for check (1) to pass (if possible) and then applying Q's square form -function (*)(adjA::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVector) - A = adjA.Q - T = promote_type(eltype(A), eltype(B)) - if length(B) == size(A.factors, 2) - C = copy_similar(B, T) - elseif length(B) == size(A.factors, 1) - C = [B; zeros(T, size(A.factors, 2) - size(A.factors, 1), size(B, 2))] - else - throw(DimensionMismatch("length of B, $(length(B)), must equal one of the dimensions of A, $(size(A))")) - end - lmul!(convert(AbstractQ{T}, adjA), C) -end -function (*)(adjA::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractMatrix) - A = adjA.Q - T = promote_type(eltype(A), eltype(B)) - if size(B,1) == size(A.factors,2) - C = copy_similar(B, T) - elseif size(B,1) == size(A.factors,1) - C = [B; zeros(T, size(A.factors, 2) - size(A.factors, 1), size(B, 2))] - else - throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of A, $(size(A))")) - end - lmul!(convert(AbstractQ{T}, adjA), C) -end +qsize_check(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat) = + size(B, 1) in size(adjQ.Q.factors) || + throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))")) +qsize_check(A::AbstractVecOrMat, Q::LQPackedQ) = + size(A, 2) in size(Q.factors) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")) # in-place right-application of LQPackedQs # these methods require that the applied-to matrix's (A's) number of columns # match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place # operation, and the underlying LAPACK routine (ormlq) treats the implicit Q # as its (nQ-by-nQ) square form) -rmul!(A::StridedMatrix{T}, B::LQPackedQ{T}) where {T<:BlasFloat} = +rmul!(A::StridedVecOrMat{T}, B::LQPackedQ{T}) where {T<:BlasFloat} = LAPACK.ormlq!('R', 'N', B.factors, B.τ, A) -rmul!(A::StridedMatrix{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} = +rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} = (B = adjB.Q; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A)) -rmul!(A::StridedMatrix{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} = +rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} = (B = adjB.Q; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A)) -# out-of-place right application of LQPackedQs -# -# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension -# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q), -# and if so effectively apply Q's square form to A without additional shenanigans; and -# (2) if the preceding dimensions do not match, check whether the appropriate dimension of -# A instead matches the number of rows of the matrix of which Q is a factor (i.e. -# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending -# A as necessary for check (1) to pass (if possible) and then applying Q's square form -# -function (*)(A::AbstractVector, Q::LQPackedQ) - T = promote_type(eltype(A), eltype(Q)) - if 1 == size(Q.factors, 2) - C = copy_similar(A, T) - elseif 1 == size(Q.factors, 1) - C = zeros(T, length(A), size(Q.factors, 2)) - copyto!(C, 1, A, 1, length(A)) - else - _rightappdimmismatch("columns") - end - return rmul!(C, convert(AbstractQ{T}, Q)) -end -function (*)(A::AbstractMatrix, Q::LQPackedQ) - T = promote_type(eltype(A), eltype(Q)) - if size(A, 2) == size(Q.factors, 2) - C = copy_similar(A, T) - elseif size(A, 2) == size(Q.factors, 1) - C = zeros(T, size(A, 1), size(Q.factors, 2)) - copyto!(C, 1, A, 1, length(A)) - else - _rightappdimmismatch("columns") - end - return rmul!(C, convert(AbstractQ{T}, Q)) -end -function (*)(adjA::AdjointAbsMat, Q::LQPackedQ) - A = adjA.parent - T = promote_type(eltype(A), eltype(Q)) - if size(A, 1) == size(Q.factors, 2) - C = copy_similar(adjA, T) - elseif size(A, 1) == size(Q.factors, 1) - C = zeros(T, size(A, 2), size(Q.factors, 2)) - adjoint!(view(C, :, 1:size(A, 1)), A) - else - _rightappdimmismatch("rows") - end - return rmul!(C, convert(AbstractQ{T}, Q)) -end -(*)(u::AdjointAbsVec, Q::LQPackedQ) = (Q'u')' - -_rightappdimmismatch(rowsorcols) = - throw(DimensionMismatch(string("the number of $(rowsorcols) of the matrix on the left ", - "must match either (1) the number of columns of the (LQPackedQ) matrix on the right ", - "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ", - "(the factorization's originating matrix's number of rows)"))) +### QB / QcB +lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B) +lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} = + (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B)) +lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} = + (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B)) # In LQ factorization, `Q` is expressed as the product of the adjoint of the # reflectors. Thus, `det` has to be conjugated. diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 2f5c5508e0ee3..875e8cefcb66e 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -94,11 +94,8 @@ inplace_adj_or_trans(::Type{<:AbstractArray}) = copyto! inplace_adj_or_trans(::Type{<:Adjoint}) = adjoint! inplace_adj_or_trans(::Type{<:Transpose}) = transpose! -adj_or_trans_char(::T) where {T<:AbstractArray} = adj_or_trans_char(T) -adj_or_trans_char(::Type{<:AbstractArray}) = 'N' -adj_or_trans_char(::Type{<:Adjoint}) = 'C' -adj_or_trans_char(::Type{<:Adjoint{<:Real}}) = 'T' -adj_or_trans_char(::Type{<:Transpose}) = 'T' +_unwrap(A::Adjoint) = parent(A) +_unwrap(A::Transpose) = parent(A) Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent) Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent)) @@ -335,6 +332,8 @@ axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...) axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent)) IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear() IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian() +@propagate_inbounds Base.isassigned(v::AdjOrTransAbsVec, i::Int) = isassigned(v.parent, i-1+first(axes(v.parent)[1])) +@propagate_inbounds Base.isassigned(v::AdjOrTransAbsMat, i::Int, j::Int) = isassigned(v.parent, j, i) @propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T @propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T @propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v) @@ -506,3 +505,8 @@ pinv(v::TransposeAbsVec, tol::Real = 0) = pinv(conj(v.parent)).parent ## complex conjugate conj(A::Transpose) = adjoint(A.parent) conj(A::Adjoint) = transpose(A.parent) + +## structured matrix methods ## +function Base.replace_in_print_matrix(A::AdjOrTrans,i::Integer,j::Integer,s::AbstractString) + Base.replace_in_print_matrix(parent(A), j, i, s) +end diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index dd3783d67b0cf..192272cc61e98 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -130,6 +130,19 @@ function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j) end end +@inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int) + @boundscheck checkbounds(Bool, A, i, j) || return false + if i == j + return @inbounds isassigned(A.dv, i) + elseif A.uplo == 'U' && (i == j - 1) + return @inbounds isassigned(A.ev, i) + elseif A.uplo == 'L' && (i == j + 1) + return @inbounds isassigned(A.ev, j) + else + return true + end +end + @inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T @boundscheck checkbounds(A, i, j) if i == j @@ -170,8 +183,9 @@ end #Converting from Bidiagonal to dense Matrix function Matrix{T}(A::Bidiagonal) where T n = size(A, 1) - B = zeros(T, n, n) + B = Matrix{T}(undef, n, n) n == 0 && return B + n > 1 && fill!(B, zero(T)) @inbounds for i = 1:n - 1 B[i,i] = A.dv[i] if A.uplo == 'U' diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 56c5954cc28fe..b8a44159de8bd 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -875,7 +875,9 @@ julia> sqrt(A) sqrt(::AbstractMatrix) function sqrt(A::AbstractMatrix{T}) where {T<:Union{Real,Complex}} - if ishermitian(A) + if checksquare(A) == 0 + return copy(A) + elseif ishermitian(A) sqrtHermA = sqrt(Hermitian(A)) return ishermitian(sqrtHermA) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA) elseif istriu(A) diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index b9fa98a9b12b3..29c190e87df72 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -111,7 +111,8 @@ Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D) Array(D::Diagonal{T}) where {T} = Matrix(D) function Matrix{T}(D::Diagonal) where {T} n = size(D, 1) - B = zeros(T, n, n) + B = Matrix{T}(undef, n, n) + n > 1 && fill!(B, zero(T)) @inbounds for i in 1:n B[i,i] = D.diag[i] end @@ -139,6 +140,16 @@ function size(D::Diagonal,d::Integer) return d<=2 ? length(D.diag) : 1 end +@inline function Base.isassigned(D::Diagonal, i::Int, j::Int) + @boundscheck checkbounds(Bool, D, i, j) || return false + if i == j + @inbounds r = isassigned(D.diag, i) + else + r = true + end + r +end + @inline function getindex(D::Diagonal, i::Int, j::Int) @boundscheck checkbounds(D, i, j) if i == j @@ -785,12 +796,11 @@ function eigen(A::AbstractMatrix, D::Diagonal; sortby::Union{Function,Nothing}=n end if size(A, 1) == size(A, 2) && isdiag(A) return eigen(Diagonal(A), D; sortby) - elseif ishermitian(A) + elseif all(isposdef, D.diag) S = promote_type(eigtype(eltype(A)), eltype(D)) - return eigen!(eigencopy_oftype(Hermitian(A), S), Diagonal{S}(D); sortby) + return eigen(A, cholesky(Diagonal{S}(D)); sortby) else - S = promote_type(eigtype(eltype(A)), eltype(D)) - return eigen!(eigencopy_oftype(A, S), Diagonal{S}(D); sortby) + return eigen!(D \ A; sortby) end end diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl index 185061b0a3a7d..489bfa4665c7a 100644 --- a/stdlib/LinearAlgebra/src/eigen.jl +++ b/stdlib/LinearAlgebra/src/eigen.jl @@ -524,7 +524,7 @@ true """ function eigen(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB} S = promote_type(eigtype(TA), TB) - eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) + eigen!(copy_similar(A, S), copy_similar(B, S); kws...) end eigen(A::Number, B::Number) = eigen(fill(A,1,1), fill(B,1,1)) @@ -619,7 +619,7 @@ julia> eigvals(A,B) """ function eigvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB} S = promote_type(eigtype(TA), TB) - return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) + return eigvals!(copy_similar(A, S), copy_similar(B, S); kws...) end """ diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl index c66f59838e8ba..9cbe3f76ccfb9 100644 --- a/stdlib/LinearAlgebra/src/generic.jl +++ b/stdlib/LinearAlgebra/src/generic.jl @@ -1591,7 +1591,11 @@ end ξ1/ν end -# apply reflector from left +""" + reflectorApply!(x, τ, A) + +Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`. +""" @inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat) require_one_based_indexing(x) m, n = size(A, 1), size(A, 2) diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 75b3e121f9086..179f93f2cd6f2 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -80,6 +80,9 @@ function Matrix{T}(H::UpperHessenberg) where T return triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1) end +Base.isassigned(H::UpperHessenberg, i::Int, j::Int) = + i <= j+1 ? isassigned(H.data, i, j) : true + getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} = i <= j+1 ? convert(T, H.data[i,j]) : zero(T) @@ -446,8 +449,7 @@ julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.] julia> F = hessenberg(A) Hessenberg{Float64, UpperHessenberg{Float64, Matrix{Float64}}, Matrix{Float64}, Vector{Float64}, Bool} -Q factor: -3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false} +Q factor: 3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false} H factor: 3×3 UpperHessenberg{Float64, Matrix{Float64}}: 4.0 -11.3137 -1.41421 @@ -474,7 +476,7 @@ function show(io::IO, mime::MIME"text/plain", F::Hessenberg) if !iszero(F.μ) print("\nwith shift μI for μ = ", F.μ) end - println(io, "\nQ factor:") + print(io, "\nQ factor: ") show(io, mime, F.Q) println(io, "\nH factor:") show(io, mime, F.H) diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl index 066a858cacb30..6353f9fa8d266 100644 --- a/stdlib/LinearAlgebra/src/lapack.jl +++ b/stdlib/LinearAlgebra/src/lapack.jl @@ -554,9 +554,9 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty # * .. Array Arguments .. # INTEGER IPIV( * ) # DOUBLE PRECISION A( LDA, * ) - function getrf!(A::AbstractMatrix{$elty}) + function getrf!(A::AbstractMatrix{$elty}; check = true) require_one_based_indexing(A) - chkfinite(A) + check && chkfinite(A) chkstride1(A) m, n = size(A) lda = max(1,stride(A, 2)) diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl index 33d794906c7e6..07d918c4374a5 100644 --- a/stdlib/LinearAlgebra/src/lq.jl +++ b/stdlib/LinearAlgebra/src/lq.jl @@ -27,8 +27,7 @@ L factor: 2×2 Matrix{Float64}: -8.60233 0.0 4.41741 -0.697486 -Q factor: -2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} +Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} julia> S.L * S.Q 2×2 Matrix{Float64}: @@ -97,8 +96,7 @@ L factor: 2×2 Matrix{Float64}: -8.60233 0.0 4.41741 -0.697486 -Q factor: -2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} +Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} julia> S.L * S.Q 2×2 Matrix{Float64}: @@ -154,7 +152,7 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ) summary(io, F); println(io) println(io, "L factor:") show(io, mime, F.L) - println(io, "\nQ factor:") + print(io, "\nQ factor: ") show(io, mime, F.Q) end diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl index a93803ca2ea45..5d69090f27e44 100644 --- a/stdlib/LinearAlgebra/src/lu.jl +++ b/stdlib/LinearAlgebra/src/lu.jl @@ -79,7 +79,7 @@ transpose(F::LU{<:Real}) = TransposeFactorization(F) # the following method is meant to catch calls to lu!(A::LAPACKArray) without a pivoting stategy lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check) function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat} - lpt = LAPACK.getrf!(A) + lpt = LAPACK.getrf!(A; check) check && checknonsingular(lpt[3]) return LU{T,typeof(lpt[1]),typeof(lpt[2])}(lpt[1], lpt[2], lpt[3]) end diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index 170aacee6682f..e375108f6a831 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -68,24 +68,24 @@ end @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector, alpha::Number, beta::Number) = - generic_matvecmul!(y, adj_or_trans_char(A), _parent(A), x, MulAddMul(alpha, beta)) + generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, MulAddMul(alpha, beta)) # BLAS cases # equal eltypes @inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T}, _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} = - gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta) + gemv!(y, tA, A, x, _add.alpha, _add.beta) # Real (possibly transposed) matrix times complex vector. # Multiply the matrix with the real and imaginary parts separately @inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}}, _add::MulAddMul=MulAddMul()) where {T<:BlasReal} = - gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta) + gemv!(y, tA, A, x, _add.alpha, _add.beta) # Complex matrix times real vector. # Reinterpret the matrix as a real matrix and do real matvec computation. # works only in cooperation with BLAS when A is untransposed (tA == 'N') # but that check is included in gemv! anyway @inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T}, _add::MulAddMul=MulAddMul()) where {T<:BlasReal} = - gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta) + gemv!(y, tA, A, x, _add.alpha, _add.beta) # Vector-Matrix multiplication (*)(x::AdjointAbsVec, A::AbstractMatrix) = (A'*x')' @@ -267,10 +267,10 @@ julia> C @inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) = generic_matmatmul!( C, - adj_or_trans_char(A), - adj_or_trans_char(B), - _parent(A), - _parent(B), + wrapper_char(A), + wrapper_char(B), + _unwrap(A), + _unwrap(B), MulAddMul(α, β) ) @@ -340,25 +340,45 @@ julia> lmul!(F.Q, B) """ lmul!(A, B) +# THE one big BLAS dispatch @inline function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}, - _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} - if tA == 'T' && tB == 'N' && A === B - return syrk_wrapper!(C, 'T', A, _add) - elseif tA == 'N' && tB == 'T' && A === B - return syrk_wrapper!(C, 'N', A, _add) - elseif tA == 'C' && tB == 'N' && A === B - return herk_wrapper!(C, 'C', A, _add) - elseif tA == 'N' && tB == 'C' && A === B - return herk_wrapper!(C, 'N', A, _add) - else - return gemm_wrapper!(C, tA, tB, A, B, _add) + _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} + if all(in(('N', 'T', 'C')), (tA, tB)) + if tA == 'T' && tB == 'N' && A === B + return syrk_wrapper!(C, 'T', A, _add) + elseif tA == 'N' && tB == 'T' && A === B + return syrk_wrapper!(C, 'N', A, _add) + elseif tA == 'C' && tB == 'N' && A === B + return herk_wrapper!(C, 'C', A, _add) + elseif tA == 'N' && tB == 'C' && A === B + return herk_wrapper!(C, 'N', A, _add) + else + return gemm_wrapper!(C, tA, tB, A, B, _add) + end + end + alpha, beta = promote(_add.alpha, _add.beta, zero(T)) + if alpha isa Union{Bool,T} && beta isa Union{Bool,T} + if (tA == 'S' || tA == 's') && tB == 'N' + return BLAS.symm!('L', tA == 'S' ? 'U' : 'L', alpha, A, B, beta, C) + elseif (tB == 'S' || tB == 's') && tA == 'N' + return BLAS.symm!('R', tB == 'S' ? 'U' : 'L', alpha, B, A, beta, C) + elseif (tA == 'H' || tA == 'h') && tB == 'N' + return BLAS.hemm!('L', tA == 'H' ? 'U' : 'L', alpha, A, B, beta, C) + elseif (tB == 'H' || tB == 'h') && tA == 'N' + return BLAS.hemm!('R', tB == 'H' ? 'U' : 'L', alpha, B, A, beta, C) + end end + return _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add) end # Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency. @inline function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, _add::MulAddMul=MulAddMul()) where {T<:BlasReal} - gemm_wrapper!(C, tA, tB, A, B, _add) + if all(in(('N', 'T', 'C')), (tA, tB)) + gemm_wrapper!(C, tA, tB, A, B, _add) + else + _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add) + end end @@ -394,8 +414,19 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x:: alpha, beta = promote(α, β, zero(T)) if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && - !iszero(stride(x, 1)) # We only check input's stride here. - return BLAS.gemv!(tA, alpha, A, x, beta, y) + !iszero(stride(x, 1)) && # We only check input's stride here. + if tA in ('N', 'T', 'C') + return BLAS.gemv!(tA, alpha, A, x, beta, y) + elseif tA in ('S', 's') + return BLAS.symv!(tA == 'S' ? 'U' : 'L', alpha, A, x, beta, y) + elseif tA in ('H', 'h') + return BLAS.hemv!(tA == 'H' ? 'U' : 'L', alpha, A, x, beta, y) + end + end + if tA in ('S', 's', 'H', 'h') + # re-wrap again and use plain ('N') matvec mul algorithm, + # because _generic_matvecmul! can't handle the HermOrSym cases specifically + return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β)) else return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) end @@ -418,7 +449,8 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y)) return y else - return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) + Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA) + return _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β)) end end @@ -434,12 +466,16 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa alpha, beta = promote(α, β, zero(T)) @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && - !iszero(stride(x, 1)) + !iszero(stride(x, 1)) && tA in ('N', 'T', 'C') xfl = reinterpret(reshape, T, x) # Use reshape here. yfl = reinterpret(reshape, T, y) BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :]) BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :]) return y + elseif tA in ('S', 's', 'H', 'h') + # re-wrap again and use plain ('N') matvec mul algorithm, + # because _generic_matvecmul! can't handle the HermOrSym cases specifically + return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β)) else return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) end @@ -528,7 +564,11 @@ function gemm_wrapper(tA::AbstractChar, tB::AbstractChar, mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) C = similar(B, T, mA, nB) - gemm_wrapper!(C, tA, tB, A, B) + if all(in(('N', 'T', 'C')), (tA, tB)) + gemm_wrapper!(C, tA, tB, A, B) + else + _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add) + end end function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar, @@ -607,7 +647,7 @@ function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::Abs stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 && stride(A, 2) >= size(A, 1) && stride(B, 2) >= size(B, 1) && - stride(C, 2) >= size(C, 1)) && tA == 'N' + stride(C, 2) >= size(C, 1) && tA == 'N') BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C)) return C end @@ -645,13 +685,16 @@ end # NOTE: the generic version is also called as fallback for # strides != 1 cases -generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, - _add::MulAddMul = MulAddMul()) = - _generic_matvecmul!(C, tA, A, B, _add) +@inline function generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, + _add::MulAddMul = MulAddMul()) + Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA) + return _generic_matvecmul!(C, ta, Anew, B, _add) +end function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, _add::MulAddMul = MulAddMul()) require_one_based_indexing(C, A, B) + @assert tA in ('N', 'T', 'C') mB = length(B) mA, nA = lapack_size(tA, A) if mB != nA @@ -725,8 +768,7 @@ end const tilebufsize = 10800 # Approximately 32k/3 -function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix, - _add::MulAddMul=MulAddMul()) +function generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) mC, nC = size(C) @@ -740,15 +782,16 @@ function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::Abs if mA == nA == mB == nB == mC == nC == 3 return matmul3x3!(C, tA, tB, A, B, _add) end + A, tA = tA in ('H', 'h', 'S', 's') ? (wrap(A, tA), 'N') : (A, tA) + B, tB = tB in ('H', 'h', 'S', 's') ? (wrap(B, tB), 'N') : (B, tB) _generic_matmatmul!(C, tA, tB, A, B, _add) end -generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) = - _generic_matmatmul!(C, tA, tB, A, B, _add) - function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}, _add::MulAddMul) where {T,S,R} + @assert tA in ('N', 'T', 'C') && tB in ('N', 'T', 'C') require_one_based_indexing(C, A, B) + mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) if mB != nA @@ -931,7 +974,9 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end @inbounds begin - if tA == 'T' + if tA == 'N' + A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2] + elseif tA == 'T' # TODO making these lazy could improve perf A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])) A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])) @@ -939,10 +984,23 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat # TODO making these lazy could improve perf A11 = copy(A[1,1]'); A12 = copy(A[2,1]') A21 = copy(A[1,2]'); A22 = copy(A[2,2]') - else - A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2] + elseif tA == 'S' + A11 = symmetric(A[1,1], :U); A12 = A[1,2] + A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U) + elseif tA == 's' + A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])) + A21 = A[2,1]; A22 = symmetric(A[2,2], :L) + elseif tA == 'H' + A11 = hermitian(A[1,1], :U); A12 = A[1,2] + A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U) + else # if tA == 'h' + A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])) + A21 = A[2,1]; A22 = hermitian(A[2,2], :L) end - if tB == 'T' + if tB == 'N' + B11 = B[1,1]; B12 = B[1,2]; + B21 = B[2,1]; B22 = B[2,2] + elseif tB == 'T' # TODO making these lazy could improve perf B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])) B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])) @@ -950,9 +1008,18 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat # TODO making these lazy could improve perf B11 = copy(B[1,1]'); B12 = copy(B[2,1]') B21 = copy(B[1,2]'); B22 = copy(B[2,2]') - else - B11 = B[1,1]; B12 = B[1,2]; - B21 = B[2,1]; B22 = B[2,2] + elseif tB == 'S' + B11 = symmetric(B[1,1], :U); B12 = B[1,2] + B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U) + elseif tB == 's' + B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])) + B21 = B[2,1]; B22 = symmetric(B[2,2], :L) + elseif tB == 'H' + B11 = hermitian(B[1,1], :U); B12 = B[1,2] + B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U) + else # if tB == 'h' + B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])) + B21 = B[2,1]; B22 = hermitian(B[2,2], :L) end _modify!(_add, A11*B11 + A12*B21, C, (1,1)) _modify!(_add, A11*B12 + A12*B22, C, (1,2)) @@ -974,7 +1041,11 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end @inbounds begin - if tA == 'T' + if tA == 'N' + A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3] + A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3] + A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3] + elseif tA == 'T' # TODO making these lazy could improve perf A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1])) A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])); A23 = copy(transpose(A[3,2])) @@ -984,13 +1055,29 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat A11 = copy(A[1,1]'); A12 = copy(A[2,1]'); A13 = copy(A[3,1]') A21 = copy(A[1,2]'); A22 = copy(A[2,2]'); A23 = copy(A[3,2]') A31 = copy(A[1,3]'); A32 = copy(A[2,3]'); A33 = copy(A[3,3]') - else - A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3] - A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3] - A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3] + elseif tA == 'S' + A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3] + A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3] + A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U) + elseif tA == 's' + A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1])) + A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2])) + A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L) + elseif tA == 'H' + A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3] + A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3] + A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U) + else # if tA == 'h' + A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1])) + A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2])) + A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L) end - if tB == 'T' + if tB == 'N' + B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3] + B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3] + B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3] + elseif tB == 'T' # TODO making these lazy could improve perf B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1])) B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])); B23 = copy(transpose(B[3,2])) @@ -1000,10 +1087,22 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat B11 = copy(B[1,1]'); B12 = copy(B[2,1]'); B13 = copy(B[3,1]') B21 = copy(B[1,2]'); B22 = copy(B[2,2]'); B23 = copy(B[3,2]') B31 = copy(B[1,3]'); B32 = copy(B[2,3]'); B33 = copy(B[3,3]') - else - B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3] - B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3] - B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3] + elseif tB == 'S' + B11 = symmetric(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3] + B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U); B23 = B[2,3] + B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = symmetric(B[3,3], :U) + elseif tB == 's' + B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1])) + B21 = B[2,1]; B22 = symmetric(B[2,2], :L); B23 = copy(transpose(B[3,2])) + B31 = B[3,1]; B32 = B[3,2]; B33 = symmetric(B[3,3], :L) + elseif tB == 'H' + B11 = hermitian(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3] + B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U); B23 = B[2,3] + B31 = copy(adjoint(B[1,3])); B32 = copy(adjoint(B[2,3])); B33 = hermitian(B[3,3], :U) + else # if tB == 'h' + B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])); B13 = copy(adjoint(B[3,1])) + B21 = B[2,1]; B22 = hermitian(B[2,2], :L); B23 = copy(adjoint(B[3,2])) + B31 = B[3,1]; B32 = B[3,2]; B33 = hermitian(B[3,3], :L) end _modify!(_add, A11*B11 + A12*B21 + A13*B31, C, (1,1)) diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl index 43d04ac5fa415..fe40fec78e801 100644 --- a/stdlib/LinearAlgebra/src/qr.jl +++ b/stdlib/LinearAlgebra/src/qr.jl @@ -314,8 +314,7 @@ julia> a = [1. 2.; 3. 4.] julia> qr!(a) LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}} -Q factor: -2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} +Q factor: 2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} R factor: 2×2 Matrix{Float64}: -3.16228 -4.42719 @@ -379,7 +378,7 @@ Multiplication with respect to either full/square or non-full/square `Q` is allo and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix with [`Matrix`](@ref). This operation returns the "thin" Q factor, i.e., if `A` is `m`×`n` with `m>=n`, then `Matrix(F.Q)` yields an `m`×`n` matrix with orthonormal columns. To retrieve the "full" Q factor, an -`m`×`m` orthogonal matrix, use `F.Q*I`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m` +`m`×`m` orthogonal matrix, use `F.Q*I` or `collect(F.Q)`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m` orthogonal matrix. The block size for QR decomposition can be specified by keyword argument @@ -399,8 +398,7 @@ julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0] julia> F = qr(A) LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}} -Q factor: -3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} +Q factor: 3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} R factor: 2×2 Matrix{Float64}: -5.0 10.0 @@ -452,7 +450,7 @@ Array(F::QRPivoted) = Matrix(F) function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{QR, QRCompactWY, QRPivoted}) summary(io, F); println(io) - println(io, "Q factor:") + print(io, "Q factor: ") show(io, mime, F.Q) println(io, "\nR factor:") show(io, mime, F.R) diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index f96ca812ea0ec..fa3464e93230b 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -5,7 +5,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T} data::S uplo::Char - function Symmetric{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}} + function Symmetric{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}} require_one_based_indexing(data) (uplo != 'U' && uplo != 'L') && throw_uplo() new{T,S}(data, uplo) @@ -98,7 +98,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T} data::S uplo::Char - function Hermitian{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}} + function Hermitian{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}} require_one_based_indexing(data) (uplo != 'U' && uplo != 'L') && throw_uplo() new{T,S}(data, uplo) @@ -185,6 +185,9 @@ function hermitian_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S end hermitian_type(::Type{T}) where {T<:Number} = T +_unwrap(A::Hermitian) = parent(A) +_unwrap(A::Symmetric) = parent(A) + for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric)) @eval begin $S(A::$S) = A @@ -221,6 +224,15 @@ const RealHermSymComplexSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, S size(A::HermOrSym, d) = size(A.data, d) size(A::HermOrSym) = size(A.data) +@inline function Base.isassigned(A::HermOrSym, i::Int, j::Int) + @boundscheck checkbounds(Bool, A, i, j) || return false + @inbounds if i == j || ((A.uplo == 'U') == (i < j)) + return isassigned(A.data, i, j) + else + return isassigned(A.data, j, i) + end +end + @inline function getindex(A::Symmetric, i::Integer, j::Integer) @boundscheck checkbounds(A, i, j) @inbounds if i == j @@ -512,90 +524,6 @@ for f in (:+, :-) end end -## Matvec -@inline function mul!(y::StridedVector{T}, A::Symmetric{T,<:StridedMatrix}, x::StridedVector{T}, - α::Number, β::Number) where {T<:BlasFloat} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symv!(A.uplo, alpha, A.data, x, beta, y) - else - return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β)) - end -end -@inline function mul!(y::StridedVector{T}, A::Hermitian{T,<:StridedMatrix}, x::StridedVector{T}, - α::Number, β::Number) where {T<:BlasReal} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symv!(A.uplo, alpha, A.data, x, beta, y) - else - return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β)) - end -end -@inline function mul!(y::StridedVector{T}, A::Hermitian{T,<:StridedMatrix}, x::StridedVector{T}, - α::Number, β::Number) where {T<:BlasComplex} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.hemv!(A.uplo, alpha, A.data, x, beta, y) - else - return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β)) - end -end -## Matmat -@inline function mul!(C::StridedMatrix{T}, A::Symmetric{T,<:StridedMatrix}, B::StridedMatrix{T}, - α::Number, β::Number) where {T<:BlasFloat} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('L', A.uplo, alpha, A.data, B, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Symmetric{T,<:StridedMatrix}, - α::Number, β::Number) where {T<:BlasFloat} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('R', B.uplo, alpha, B.data, A, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::Hermitian{T,<:StridedMatrix}, B::StridedMatrix{T}, - α::Number, β::Number) where {T<:BlasReal} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('L', A.uplo, alpha, A.data, B, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Hermitian{T,<:StridedMatrix}, - α::Number, β::Number) where {T<:BlasReal} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('R', B.uplo, alpha, B.data, A, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::Hermitian{T,<:StridedMatrix}, B::StridedMatrix{T}, - α::Number, β::Number) where {T<:BlasComplex} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.hemm!('L', A.uplo, alpha, A.data, B, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Hermitian{T,<:StridedMatrix}, - α::Number, β::Number) where {T<:BlasComplex} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.hemm!('R', B.uplo, alpha, B.data, A, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end - *(A::HermOrSym, B::HermOrSym) = A * copyto!(similar(parent(B)), B) function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector) @@ -928,3 +856,10 @@ function _hermitianpart!(A::AbstractMatrix) end return A end + +## structured matrix printing ## +function Base.replace_in_print_matrix(A::HermOrSym,i::Integer,j::Integer,s::AbstractString) + ijminmax = minmax(i, j) + inds = A.uplo == 'U' ? ijminmax : reverse(ijminmax) + Base.replace_in_print_matrix(parent(A), inds..., s) +end diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl index 17371b74bb343..279577c31d664 100644 --- a/stdlib/LinearAlgebra/src/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl @@ -156,6 +156,11 @@ end eigmax(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, size(A, 1):size(A, 1))[1] eigmin(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, 1:1)[1] +function eigen(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB} + S = promote_type(eigtype(TA), TB) + return eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) +end + function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix} vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data')) GeneralizedEigen(sorteig!(vals, vecs, sortby)...) @@ -164,26 +169,32 @@ function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Not vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data')) GeneralizedEigen(sorteig!(vals, vecs, sortby)...) end -function eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, B::AbstractMatrix{T}; sortby::Union{Function,Nothing}=nothing) where {T<:Number} - return _choleigen!(A, B, sortby) -end -function eigen!(A::StridedMatrix{T}, B::Union{RealHermSymComplexHerm{T},Diagonal{T}}; sortby::Union{Function,Nothing}=nothing) where {T<:Number} - return _choleigen!(A, B, sortby) + +function eigen(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing) + if ishermitian(A) + eigen!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby) + else + eigen!(copy_similar(A, eigtype(eltype(A))), C; sortby) + end end -function _choleigen!(A, B, sortby) - U = cholesky(B).U - vals, w = eigen!(UtiAUi!(A, U)) - vecs = U \ w +function eigen!(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing) + # Cholesky decomposition based eigenvalues and eigenvectors + vals, w = eigen!(UtiAUi!(A, C.U)) + vecs = C.U \ w GeneralizedEigen(sorteig!(vals, vecs, sortby)...) end # Perform U' \ A / U in-place, where U::Union{UpperTriangular,Diagonal} -UtiAUi!(A::StridedMatrix, U) = _UtiAUi!(A, U) +UtiAUi!(A, U) = _UtiAUi!(A, U) UtiAUi!(A::Symmetric, U) = Symmetric(_UtiAUi!(copytri!(parent(A), A.uplo), U), sym_uplo(A.uplo)) UtiAUi!(A::Hermitian, U) = Hermitian(_UtiAUi!(copytri!(parent(A), A.uplo, true), U), sym_uplo(A.uplo)) - _UtiAUi!(A, U) = rdiv!(ldiv!(U', A), U) +function eigvals(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB} + S = promote_type(eigtype(TA), TB) + return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) +end + function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix} vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1] isnothing(sortby) || sort!(vals, by=sortby) @@ -195,3 +206,15 @@ function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,N return vals end eigvecs(A::HermOrSym) = eigvecs(eigen(A)) + +function eigvals(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing) + if ishermitian(A) + eigvals!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby) + else + eigvals!(copy_similar(A, eigtype(eltype(A))), C; sortby) + end +end +function eigvals!(A::AbstractMatrix{T}, C::Cholesky{T, <:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:Number} + # Cholesky decomposition based eigenvalues + return eigvals!(UtiAUi!(A, C.U); sortby) +end diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl index 1e4ba4119393d..295a46f1522a5 100644 --- a/stdlib/LinearAlgebra/src/triangular.jl +++ b/stdlib/LinearAlgebra/src/triangular.jl @@ -223,6 +223,15 @@ function full!(A::UnitUpperTriangular) B end +Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) = + i > j ? isassigned(A.data, i, j) : true +Base.isassigned(A::LowerTriangular, i::Int, j::Int) = + i >= j ? isassigned(A.data, i, j) : true +Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) = + i < j ? isassigned(A.data, i, j) : true +Base.isassigned(A::UpperTriangular, i::Int, j::Int) = + i <= j ? isassigned(A.data, i, j) : true + getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} = i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T)) getindex(A::LowerTriangular, i::Integer, j::Integer) = diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl index 2739400bb393c..13f6a1bb70756 100644 --- a/stdlib/LinearAlgebra/src/tridiag.jl +++ b/stdlib/LinearAlgebra/src/tridiag.jl @@ -124,8 +124,9 @@ AbstractMatrix{T}(S::SymTridiagonal) where {T} = convert(AbstractVector{T}, S.ev)::AbstractVector{T}) function Matrix{T}(M::SymTridiagonal) where T n = size(M, 1) - Mf = zeros(T, n, n) + Mf = Matrix{T}(undef, n, n) n == 0 && return Mf + n > 2 && fill!(Mf, zero(T)) @inbounds for i = 1:n-1 Mf[i,i] = symmetric(M.dv[i], :U) Mf[i+1,i] = transpose(M.ev[i]) @@ -413,6 +414,19 @@ end det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift) logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift)) +@inline function Base.isassigned(A::SymTridiagonal, i::Int, j::Int) + @boundscheck checkbounds(Bool, A, i, j) || return false + if i == j + return @inbounds isassigned(A.dv, i) + elseif i == j + 1 + return @inbounds isassigned(A.ev, j) + elseif i + 1 == j + return @inbounds isassigned(A.ev, i) + else + return true + end +end + @inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T @boundscheck checkbounds(A, i, j) if i == j @@ -543,9 +557,10 @@ function size(M::Tridiagonal, d::Integer) end function Matrix{T}(M::Tridiagonal) where {T} - A = zeros(T, size(M)) + A = Matrix{T}(undef, size(M)) n = length(M.d) n == 0 && return A + n > 2 && fill!(A, zero(T)) for i in 1:n-1 A[i,i] = M.d[i] A[i+1,i] = M.dl[i] @@ -604,6 +619,19 @@ function diag(M::Tridiagonal{T}, n::Integer=0) where T end end +@inline function Base.isassigned(A::Tridiagonal, i::Int, j::Int) + @boundscheck checkbounds(A, i, j) + if i == j + return @inbounds isassigned(A.d, i) + elseif i == j + 1 + return @inbounds isassigned(A.dl, j) + elseif i + 1 == j + return @inbounds isassigned(A.du, i) + else + return true + end +end + @inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T @boundscheck checkbounds(A, i, j) if i == j diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl index e3f48c7b2e3fd..83a26c6050484 100644 --- a/stdlib/LinearAlgebra/test/abstractq.jl +++ b/stdlib/LinearAlgebra/test/abstractq.jl @@ -20,8 +20,8 @@ n = 5 Base.size(Q::MyQ) = size(Q.Q) LinearAlgebra.lmul!(Q::MyQ, B::AbstractVecOrMat) = lmul!(Q.Q, B) LinearAlgebra.lmul!(adjQ::AdjointQ{<:Any,<:MyQ}, B::AbstractVecOrMat) = lmul!(parent(adjQ).Q', B) - LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) = rmul!(A, Q.Q) - LinearAlgebra.rmul!(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q') + LinearAlgebra.rmul!(A::AbstractVecOrMat, Q::MyQ) = rmul!(A, Q.Q) + LinearAlgebra.rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q') Base.convert(::Type{AbstractQ{T}}, Q::MyQ) where {T} = MyQ{T}(Q.Q) LinearAlgebra.det(Q::MyQ) = det(Q.Q) @@ -84,6 +84,17 @@ n = 5 @test Q * x ≈ Q.Q * x @test Q' * x ≈ Q.Q' * x end + A = rand(Float64, 5, 3) + F = qr(A) + Q = MyQ(F.Q) + Prect = Matrix(F.Q) + Psquare = collect(F.Q) + @test Q == Prect + @test Q == Psquare + @test Q == F.Q*I + @test Q ≈ Prect + @test Q ≈ Psquare + @test Q ≈ F.Q*I end end # module diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl index 72fdf687bf5c3..3fff8289242f7 100644 --- a/stdlib/LinearAlgebra/test/addmul.jl +++ b/stdlib/LinearAlgebra/test/addmul.jl @@ -164,7 +164,8 @@ end Bc = Matrix(B) returned_mat = mul!(C, A, B, α, β) @test returned_mat === C - @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol + # This test is skipped because it is flakey, but should be fixed and put back (see #49966) + @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol y = C[:, 1] x = B[:, 1] @@ -189,7 +190,8 @@ end returned_mat = mul!(C, Af, Bf, α, β) @test returned_mat === C - @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol + # This test is skipped because it is flakey, but should be fixed and put back (see #49966) + @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol end end end @@ -201,7 +203,8 @@ end Bc = Matrix(B) returned_mat = mul!(C, A, B, α, zero(eltype(C))) @test returned_mat === C - @test collect(returned_mat) ≈ α * Ac * Bc rtol=rtol + # This test is skipped because it is flakey, but should be fixed and put back (see #49966) + @test_skip collect(returned_mat) ≈ α * Ac * Bc rtol=rtol end end diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl index e40beb29787cf..2362ec7fb28f2 100644 --- a/stdlib/LinearAlgebra/test/adjtrans.jl +++ b/stdlib/LinearAlgebra/test/adjtrans.jl @@ -643,4 +643,32 @@ end end end +@testset "structured printing" begin + D = Diagonal(1:3) + @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D) + @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D) + D = Diagonal((1:3)*im) + D2 = Diagonal((1:3)*(-im)) + @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D) + @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D2) + + struct OneHotVecOrMat{N} <: AbstractArray{Bool,N} + inds::NTuple{N,Int} + sz::NTuple{N,Int} + end + Base.size(x::OneHotVecOrMat) = x.sz + function Base.getindex(x::OneHotVecOrMat{N}, inds::Vararg{Int,N}) where {N} + checkbounds(x, inds...) + inds == x.inds + end + Base.replace_in_print_matrix(o::OneHotVecOrMat{1}, i::Integer, j::Integer, s::AbstractString) = + o.inds == (i,) ? s : Base.replace_with_centered_mark(s) + Base.replace_in_print_matrix(o::OneHotVecOrMat{2}, i::Integer, j::Integer, s::AbstractString) = + o.inds == (i,j) ? s : Base.replace_with_centered_mark(s) + + o = OneHotVecOrMat((2,), (4,)) + @test sprint(Base.print_matrix, Transpose(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4))) + @test sprint(Base.print_matrix, Adjoint(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4))) +end + end # module TestAdjointTranspose diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl index 89f2b21a6a973..d13009780b975 100644 --- a/stdlib/LinearAlgebra/test/bidiag.jl +++ b/stdlib/LinearAlgebra/test/bidiag.jl @@ -797,6 +797,14 @@ end @test iszero(BL[i,j]) end end + + M = ones(2,2) + for n in 0:1 + dv = fill(M, n) + ev = fill(M, 0) + B = Bidiagonal(dv, ev, :U) + @test B == Matrix{eltype(B)}(B) + end end @testset "copyto! with UniformScaling" begin diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl index 1546f3247acf4..efeedf93ebd1f 100644 --- a/stdlib/LinearAlgebra/test/dense.jl +++ b/stdlib/LinearAlgebra/test/dense.jl @@ -1213,6 +1213,11 @@ end @test exp(log(A2)) ≈ A2 end +@testset "sqrt of empty Matrix of type $T" for T in [Int,Float32,Float64,ComplexF32,ComplexF64] + @test sqrt(Matrix{T}(undef, 0, 0)) == Matrix{T}(undef, 0, 0) + @test_throws DimensionMismatch sqrt(Matrix{T}(undef, 0, 3)) +end + struct TypeWithoutZero end Base.zero(::Type{TypeWithoutZero}) = TypeWithZero() struct TypeWithZero end diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index 5f169d21ff6fb..2a8248d9ca716 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -755,6 +755,12 @@ end @test tr(D) == 10 @test det(D) == 4 + + M = [1 2; 3 4] + for n in 0:1 + D = Diagonal(fill(M, n)) + @test D == Matrix{eltype(D)}(D) + end end @testset "linear solve for block diagonal matrices" begin diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl index 3ebaf38e84945..33eb50d58836a 100644 --- a/stdlib/LinearAlgebra/test/generic.jl +++ b/stdlib/LinearAlgebra/test/generic.jl @@ -558,7 +558,7 @@ end end @testset "peakflops" begin - @test LinearAlgebra.peakflops() > 0 + @test LinearAlgebra.peakflops(1024, eltype=Float32, ntrials=2) > 0 end @testset "NaN handling: Issue 28972" begin diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 91e4e1b1b3df0..61e498211ca7b 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -158,7 +158,7 @@ let n = 10 hessstring = sprint((t, s) -> show(t, "text/plain", s), H) qstring = sprint((t, s) -> show(t, "text/plain", s), H.Q) hstring = sprint((t, s) -> show(t, "text/plain", s), H.H) - @test hessstring == "$(summary(H))\nQ factor:\n$qstring\nH factor:\n$hstring" + @test hessstring == "$(summary(H))\nQ factor: $qstring\nH factor:\n$hstring" #iterate q,h = H diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl index 8b4af6a0a5f8d..6bdc4efa5d6dd 100644 --- a/stdlib/LinearAlgebra/test/lq.jl +++ b/stdlib/LinearAlgebra/test/lq.jl @@ -213,8 +213,7 @@ L factor: 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 -Q factor: -4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}""" +Q factor: 4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}""" end @testset "adjoint of LQ" begin diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl index 2d99856a2667b..e6000a4b24e2d 100644 --- a/stdlib/LinearAlgebra/test/matmul.jl +++ b/stdlib/LinearAlgebra/test/matmul.jl @@ -4,10 +4,19 @@ module TestMatmul using Base: rtoldefault using Test, LinearAlgebra, Random -using LinearAlgebra: mul! +using LinearAlgebra: mul!, Symmetric, Hermitian ## Test Julia fallbacks to BLAS routines +mul_wrappers = [ + m -> m, + m -> Symmetric(m, :U), + m -> Symmetric(m, :L), + m -> Hermitian(m, :U), + m -> Hermitian(m, :L), + m -> adjoint(m), + m -> transpose(m)] + @testset "matrices with zero dimensions" begin for (dimsA, dimsB, dimsC) in ( ((0, 5), (5, 3), (0, 3)), @@ -42,6 +51,9 @@ end @test *(adjoint(Ai), adjoint(Bi)) == [-28.25-66im 9.75-58im; -26-89im 21-73im] @test_throws DimensionMismatch [1 2; 0 0; 0 0] * [1 2] end + for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers + @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB)) + end @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 3), AA, BB) end @testset "3x3 matmul" begin @@ -62,6 +74,9 @@ end @test *(adjoint(Ai), adjoint(Bi)) == [1+2im 20.75+9im -44.75+42im; 19.5+17.5im -54-36.5im 51-14.5im; 13+7.5im 11.25+31.5im -43.25-14.5im] @test_throws DimensionMismatch [1 2 3; 0 0 0; 0 0 0] * [1 2 3] end + for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers + @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB)) + end @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 4, 4), AA, BB) end @@ -979,4 +994,17 @@ end end end +@testset "Issue #46865: mul!() with non-const alpha, beta" begin + f!(C,A,B,alphas,betas) = mul!(C, A, B, alphas[1], betas[1]) + alphas = [1.0] + betas = [0.5] + for d in [2,3,4] # test native small-matrix cases as well as BLAS + A = rand(d,d) + B = copy(A) + C = copy(A) + f!(C, A, B, alphas, betas) + @test_broken (@allocated f!(C, A, B, alphas, betas)) == 0 + end +end + end # module TestMatmul diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl index 6e2e9a7b20603..184971da304f7 100644 --- a/stdlib/LinearAlgebra/test/qr.jl +++ b/stdlib/LinearAlgebra/test/qr.jl @@ -69,7 +69,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q) qrstring = sprint((t, s) -> show(t, "text/plain", s), qra) rstring = sprint((t, s) -> show(t, "text/plain", s), r) qstring = sprint((t, s) -> show(t, "text/plain", s), q) - @test qrstring == "$(summary(qra))\nQ factor:\n$qstring\nR factor:\n$rstring" + @test qrstring == "$(summary(qra))\nQ factor: $qstring\nR factor:\n$rstring" # iterate q, r = qra @test q*r ≈ a @@ -155,7 +155,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q) rstring = sprint((t, s) -> show(t, "text/plain", s), r) qstring = sprint((t, s) -> show(t, "text/plain", s), q) pstring = sprint((t, s) -> show(t, "text/plain", s), p) - @test qrstring == "$(summary(qrpa))\nQ factor:\n$qstring\nR factor:\n$rstring\npermutation:\n$pstring" + @test qrstring == "$(summary(qrpa))\nQ factor: $qstring\nR factor:\n$rstring\npermutation:\n$pstring" # iterate q, r, p = qrpa @test q*r[:,invperm(p)] ≈ a[:,1:n1] diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl index 04621c4b49e86..224b7b31a50df 100644 --- a/stdlib/LinearAlgebra/test/symmetric.jl +++ b/stdlib/LinearAlgebra/test/symmetric.jl @@ -64,6 +64,9 @@ end @test_throws ArgumentError Symmetric(asym, :R) @test_throws ArgumentError Hermitian(asym, :R) + @test_throws MethodError Symmetric{eltya,typeof(asym)}(asym, :L) + @test_throws MethodError Hermitian{eltya,typeof(aherm)}(aherm, :L) + # mixed cases with Hermitian/Symmetric if eltya <: Real @test Symmetric(Hermitian(aherm, :U)) === Symmetric(aherm, :U) @@ -824,4 +827,61 @@ end end end +@testset "Structured display" begin + @testset "Diagonal" begin + d = 10:13 + D = Diagonal(d) + for uplo in (:L, :U), SymHerm in (Symmetric, Hermitian) + S = SymHerm(D, uplo) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D) + end + + d = (10:13) .+ 2im + D = Diagonal(d) + DR = Diagonal(complex.(real.(d))) + for uplo in (:L, :U) + H = Hermitian(D, uplo) + @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, DR) + + S = Symmetric(D, uplo) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D) + end + end + @testset "Bidiagonal" begin + dv, ev = 1:4, 1:3 + ST = SymTridiagonal(dv, ev) + D = Diagonal(dv) + for B_uplo in (:L, :U) + B = Bidiagonal(dv, ev, B_uplo) + for Sym_uplo in (:L, :U), SymHerm in (Symmetric, Hermitian) + SB = SymHerm(B, Sym_uplo) + teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? ST : D) + @test sprint(Base.print_matrix, SB) == teststr + SB = SymHerm(Transpose(B), Sym_uplo) + teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? D : ST) + @test sprint(Base.print_matrix, SB) == teststr + end + end + end + @testset "Tridiagonal" begin + superd, d, subd = 3:5, 10:13, 1:3 + for uplo in (:U, :L), SymHerm in (Symmetric, Hermitian) + S = SymHerm(Tridiagonal(subd, d, superd), uplo) + ST = SymTridiagonal(d, uplo == :U ? superd : subd) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST) + end + + superd, d, subd = collect((3:5)*im), collect(Complex{Int}, 10:13), collect((1:3)*im) + for uplo in (:U, :L) + S = Symmetric(Tridiagonal(subd, d, superd), uplo) + ST = SymTridiagonal(d, uplo == :U ? superd : subd) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST) + + H = Hermitian(Tridiagonal(subd, d, superd), uplo) + T = Tridiagonal(uplo == :L ? subd : conj(superd), d, uplo == :U ? superd : conj(subd)) + @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, T) + end + end +end + end # module TestSymmetric diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl new file mode 100644 index 0000000000000..c28c17255c222 --- /dev/null +++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl @@ -0,0 +1,78 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module TestSymmetricEigen + +using Test, LinearAlgebra + +@testset "chol-eigen-eigvals" begin + ## Cholesky decomposition based + + # eigenvalue sorting + sf = x->(real(x),imag(x)) + + ## Real valued + A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4] + H = (A+A')/2 + B = Float64[2 1 4 3; 0 3 1 3; 3 1 0 0; 0 1 3 1] + BH = (B+B')/2 + # PD matrix + BPD = B*B' + # eigen + C = cholesky(BPD) + e,v = eigen(A, C; sortby=sf) + @test A*v ≈ BPD*v*Diagonal(e) + # eigvals + @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf) + + ## Complex valued + A = [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im] + AH = (A+A')/2 + B = [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im] + BH = (B+B')/2 + # PD matrix + BPD = B*B' + # eigen + C = cholesky(BPD) + e,v = eigen(A, C; sortby=sf) + @test A*v ≈ BPD*v*Diagonal(e) + # eigvals + @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf) +end + +@testset "issue #49533" begin + ## Real valued + A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4] + B = Matrix(Diagonal(Float64[1:4;])) + # eigen + e0,v0 = eigen(A, B) + e1,v1 = eigen(A, Symmetric(B)) + e2,v2 = eigen(Symmetric(A), B) + e3,v3 = eigen(Symmetric(A), Symmetric(B)) + @test e0 ≈ e1 && v0 ≈ v1 + @test e0 ≈ e2 && v0 ≈ v2 + @test e0 ≈ e3 && v0 ≈ v3 + # eigvals + @test eigvals(A, B) ≈ eigvals(A, Symmetric(B)) + @test eigvals(A, B) ≈ eigvals(Symmetric(A), B) + @test eigvals(A, B) ≈ eigvals(Symmetric(A), Symmetric(B)) + + ## Complex valued + A = [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im] + AH = A'A + B = [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im] + BH = B'B + # eigen + sf = x->(real(x),imag(x)) + e1,v1 = eigen(A, Hermitian(BH)) + @test A*v1 ≈ Hermitian(BH)*v1*Diagonal(e1) + e2,v2 = eigen(Hermitian(AH), B) + @test Hermitian(AH)*v2 ≈ B*v2*Diagonal(e2) + e3,v3 = eigen(Hermitian(AH), Hermitian(BH)) + @test Hermitian(AH)*v3 ≈ Hermitian(BH)*v3*Diagonal(e3) + # eigvals + @test eigvals(A, BH; sortby=sf) ≈ eigvals(A, Hermitian(BH); sortby=sf) + @test eigvals(AH, B; sortby=sf) ≈ eigvals(Hermitian(AH), B; sortby=sf) + @test eigvals(AH, BH; sortby=sf) ≈ eigvals(Hermitian(AH), Hermitian(BH); sortby=sf) +end + +end # module TestSymmetricEigen diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups index e281203bf3fa3..0f2f4f4af8708 100644 --- a/stdlib/LinearAlgebra/test/testgroups +++ b/stdlib/LinearAlgebra/test/testgroups @@ -27,3 +27,4 @@ pinv factorization abstractq ldlt +symmetriceigen diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl index e45fc9a65dba0..d4b2dd5e3f269 100644 --- a/stdlib/LinearAlgebra/test/tridiag.jl +++ b/stdlib/LinearAlgebra/test/tridiag.jl @@ -458,7 +458,7 @@ end end end -@testset "SymTridiagonal block matrix" begin +@testset "SymTridiagonal/Tridiagonal block matrix" begin M = [1 2; 2 4] n = 5 A = SymTridiagonal(fill(M, n), fill(M, n-1)) @@ -472,6 +472,27 @@ end @test_throws ArgumentError diag(A, 2) @test_throws ArgumentError diag(A, n+1) @test_throws ArgumentError diag(A, -n-1) + + A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1)) + @test @inferred A[1,1] == M + @test @inferred A[1,2] == M + @test @inferred A[2,1] == M + @test @inferred diag(A, 1) == fill(M, n-1) + @test @inferred diag(A, 0) == fill(M, n) + @test @inferred diag(A, -1) == fill(M, n-1) + @test_throws MethodError diag(A, -2) + @test_throws MethodError diag(A, 2) + @test_throws ArgumentError diag(A, n+1) + @test_throws ArgumentError diag(A, -n-1) + + for n in 0:2 + dv, ev = fill(M, n), fill(M, max(n-1,0)) + A = SymTridiagonal(dv, ev) + @test A == Matrix{eltype(A)}(A) + + A = Tridiagonal(ev, dv, ev) + @test A == Matrix{eltype(A)}(A) + end end @testset "Issue 12068" begin diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml index 6d953327003be..529c9945e65f1 100644 --- a/stdlib/OpenBLAS_jll/Project.toml +++ b/stdlib/OpenBLAS_jll/Project.toml @@ -3,6 +3,7 @@ uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" version = "0.3.23+0" [deps] +# See note in `src/OpenBLAS_jll.jl` about this dependency. CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl index 4f1c57a7d06be..a0c11ab047142 100644 --- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl +++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl @@ -2,7 +2,17 @@ ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenBLAS_jll.jl baremodule OpenBLAS_jll -using Base, Libdl, CompilerSupportLibraries_jll, Base.BinaryPlatforms +using Base, Libdl, Base.BinaryPlatforms + +# We are explicitly NOT loading this at runtime, as it contains `libgomp` +# which conflicts with `libiomp5`, breaking things like MKL. In the future, +# we hope to transition to a JLL interface that provides a more granular +# interface than eagerly dlopen'ing all libraries provided in the JLL +# which will eliminate issues like this, where we avoid loading a JLL +# because we don't want to load a library that we don't even use yet. +# using CompilerSupportLibraries_jll +# Because of this however, we have to manually load the libraries we +# _do_ care about, namely libgfortran Base.Experimental.@compiler_options compile=min optimize=0 infer=false const PATH_list = String[] @@ -25,10 +35,13 @@ end if Sys.iswindows() const libopenblas = "libopenblas$(libsuffix).dll" + const _libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll") elseif Sys.isapple() const libopenblas = "@rpath/libopenblas$(libsuffix).dylib" + const _libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib") else const libopenblas = "libopenblas$(libsuffix).so" + const _libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major) end function __init__() @@ -50,6 +63,10 @@ function __init__() ENV["OPENBLAS_DEFAULT_NUM_THREADS"] = "1" end + # As mentioned above, we are sneaking this in here so that we don't have to + # depend on CSL_jll and load _all_ of its libraries. + dlopen(_libgfortran) + global libopenblas_handle = dlopen(libopenblas) global libopenblas_path = dlpath(libopenblas_handle) global artifact_dir = dirname(Sys.BINDIR) diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index 7b5006f2141ff..6551c7e24049f 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = daf02a458ae6daa402a5dd6683c40d6910325c4e +PKG_SHA1 = e8197dd0ed8132d4a7619f3657363c8415249c47 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl index 62a84d7d36984..cb336a8d9c18b 100644 --- a/stdlib/Printf/src/Printf.jl +++ b/stdlib/Printf/src/Printf.jl @@ -122,7 +122,6 @@ end # parse format string function Format(f::AbstractString) - isempty(f) && throw(InvalidFormatStringError("Format string must not be empty", f, 1, 1)) bytes = codeunits(f) len = length(bytes) pos = 1 @@ -975,7 +974,7 @@ Use shorter of decimal or scientific 1.23 1.23e+07 julia> @printf "Use dynamic width and precision %*.*f" 10 2 0.12345 Use dynamic width and precision 0.12 ``` -For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/). +For a systematic specification of the format, see [here](https://en.cppreference.com/w/c/io/fprintf). See also [`@sprintf`](@ref) to get the result as a `String` instead of it being printed. # Caveats diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl index 96d61b61d02e3..33970f78648e2 100644 --- a/stdlib/Printf/test/runtests.jl +++ b/stdlib/Printf/test/runtests.jl @@ -339,7 +339,6 @@ end @test Printf.@sprintf("1%%2%%3") == "1%2%3" @test Printf.@sprintf("GAP[%%]") == "GAP[%]" @test Printf.@sprintf("hey there") == "hey there" - @test_throws Printf.InvalidFormatStringError Printf.Format("") @test_throws Printf.InvalidFormatStringError Printf.Format("%+") @test_throws Printf.InvalidFormatStringError Printf.Format("%.") @test_throws Printf.InvalidFormatStringError Printf.Format("%.0") @@ -488,6 +487,10 @@ end @test @sprintf("%d", 3//1) == "3" @test @sprintf("%d", Inf) == "Inf" @test @sprintf(" %d", NaN) == " NaN" + + # 50011 + @test Printf.@sprintf("") == "" + @test Printf.format(Printf.Format("")) == "" end @testset "integers" begin diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 4bce0c4fecd88..71bbfc70ee937 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -227,11 +227,15 @@ function print(io::IO, elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]]) @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report" end - any_nosamples = false - println(io, "Overhead ╎ [+additional indent] Count File:Line; Function") - println(io, "=========================================================") + any_nosamples = true + if format === :tree + Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n") + Base.print(io, "=========================================================\n") + end if groupby == [:task, :thread] - for taskid in intersect(get_task_ids(data), tasks) + taskids = intersect(get_task_ids(data), tasks) + isempty(taskids) && (any_nosamples = true) + for taskid in taskids threadids = intersect(get_thread_ids(data, taskid), threads) if length(threadids) == 0 any_nosamples = true @@ -247,7 +251,9 @@ function print(io::IO, end end elseif groupby == [:thread, :task] - for threadid in intersect(get_thread_ids(data), threads) + threadids = intersect(get_thread_ids(data), threads) + isempty(threadids) && (any_nosamples = true) + for threadid in threadids taskids = intersect(get_task_ids(data, threadid), tasks) if length(taskids) == 0 any_nosamples = true @@ -264,7 +270,9 @@ function print(io::IO, end elseif groupby === :task threads = 1:typemax(Int) - for taskid in intersect(get_task_ids(data), tasks) + taskids = intersect(get_task_ids(data), tasks) + isempty(taskids) && (any_nosamples = true) + for taskid in taskids printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color()) nosamples = print(io, data, lidict, pf, format, threads, taskid, true) nosamples && (any_nosamples = true) @@ -272,7 +280,9 @@ function print(io::IO, end elseif groupby === :thread tasks = 1:typemax(UInt) - for threadid in intersect(get_thread_ids(data), threads) + threadids = intersect(get_thread_ids(data), threads) + isempty(threadids) && (any_nosamples = true) + for threadid in threadids printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color()) nosamples = print(io, data, lidict, pf, format, threadid, tasks, true) nosamples && (any_nosamples = true) @@ -387,6 +397,7 @@ function getdict!(dict::LineInfoDict, data::Vector{UInt}) n_unique_ips = length(unique_ips) n_unique_ips == 0 && return dict iplookups = similar(unique_ips, Vector{StackFrame}) + sort!(unique_ips) # help each thread to get a disjoint set of libraries, as much if possible @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp)) Threads.@spawn begin for i in indexes_part @@ -653,7 +664,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, m = Int[] lilist_idx = Dict{T, Int}() recursive = Set{T}() - first = true + leaf = 0 totalshots = 0 startframe = length(data) skip = false @@ -677,12 +688,16 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, skip = false totalshots += 1 empty!(recursive) - first = true + if leaf != 0 + m[leaf] += 1 + end + leaf = 0 startframe = i elseif !skip frames = lidict[ip] nframes = (frames isa Vector ? length(frames) : 1) - for j = 1:nframes + # the last lookup is the non-inlined root frame, the first is the inlined leaf frame + for j = nframes:-1:1 frame = (frames isa Vector ? frames[j] : frames) !C && frame.from_c && continue key = (T === UInt64 ? ip : frame) @@ -696,10 +711,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, push!(recursive, key) n[idx] += 1 end - if first - m[idx] += 1 - first = false - end + leaf = idx end end end @@ -710,30 +722,31 @@ end function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool) lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks) + if false # optional: drop the "non-interpretable" ones + keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist) + lilist = lilist[keep] + n = n[keep] + m = m[keep] + end util_perc = (1 - (nsleeping / totalshots)) * 100 + filenamemap = Dict{Symbol,String}() if isempty(lilist) if is_subsection Base.print(io, "Total snapshots: ") printstyled(io, "$(totalshots)", color=Base.warn_color()) - Base.println(io, " (", round(Int, util_perc), "% utilization)") + Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n") else warning_empty() end return true end - if false # optional: drop the "non-interpretable" ones - keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist) - lilist = lilist[keep] - n = n[keep] - m = m[keep] - end - filenamemap = Dict{Symbol,String}() - print_flat(io, lilist, n, m, cols, filenamemap, fmt) - Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization") + is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt) + Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%") if is_subsection - println(io, ")") + println(io) + print_flat(io, lilist, n, m, cols, filenamemap, fmt) else - println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)") + Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n") end return false end @@ -1054,8 +1067,8 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat filenamemap = Dict{Symbol,String}() worklist = [(bt, 0, 0, "")] if !is_subsection - println(io, "Overhead ╎ [+additional indent] Count File:Line; Function") - println(io, "=========================================================") + Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n") + Base.print(io, "=========================================================\n") end while !isempty(worklist) (bt, level, noisefloor, str) = popfirst!(worklist) @@ -1101,24 +1114,23 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks) end util_perc = (1 - (nsleeping / root.count)) * 100 - !is_subsection && print_tree(io, root, cols, fmt, is_subsection) + is_subsection || print_tree(io, root, cols, fmt, is_subsection) if isempty(root.down) if is_subsection Base.print(io, "Total snapshots: ") printstyled(io, "$(root.count)", color=Base.warn_color()) - Base.println(io, ". Utilization: ", round(Int, util_perc), "%") + Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n") else warning_empty() end return true - else - Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%") end + Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%") if is_subsection - println(io) + Base.println(io) print_tree(io, root, cols, fmt, is_subsection) else - println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task") + Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n") end return false end diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index e09e3b2aa9e6b..20d26953eb22b 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -232,7 +232,10 @@ function complete_keyword(s::Union{String,SubString{String}}) Completion[KeywordCompletion(kw) for kw in sorted_keywords[r]] end -function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_escape=false) +function complete_path(path::AbstractString, pos::Int; + use_envpath=false, shell_escape=false, + string_escape=false) + @assert !(shell_escape && string_escape) if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path) # if the path is just "~", don't consider the expanded username as a prefix if path == "~" @@ -259,9 +262,9 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_ matches = Set{String}() for file in files if startswith(file, prefix) - id = try isdir(joinpath(dir, file)) catch; false end - # joinpath is not used because windows needs to complete with double-backslash - push!(matches, id ? file * (@static Sys.iswindows() ? "\\\\" : "/") : file) + p = joinpath(dir, file) + is_dir = try isdir(p) catch; false end + push!(matches, is_dir ? joinpath(file, "") : file) end end @@ -307,8 +310,14 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_ end end - matchList = Completion[PathCompletion(shell_escape ? replace(s, r"\s" => s"\\\0") : s) for s in matches] - startpos = pos - lastindex(prefix) + 1 - count(isequal(' '), prefix) + function do_escape(s) + return shell_escape ? replace(s, r"(\s|\\)" => s"\\\0") : + string_escape ? escape_string(s, ('\"','$')) : + s + end + + matchList = Completion[PathCompletion(do_escape(s)) for s in matches] + startpos = pos - lastindex(do_escape(prefix)) + 1 # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`, # hence we need to add one to get the first index. This is also correct when considering # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`. @@ -409,7 +418,7 @@ const REPL_INTERPRETER_CACHE = REPLInterpreterCache() function get_code_cache() # XXX Avoid storing analysis results into the cache that persists across precompilation, # as [sys|pkg]image currently doesn't support serializing externally created `CodeInstance`. - # Otherwise, `CodeInstance`s created by `REPLInterpreter``, that are much less optimized + # Otherwise, `CodeInstance`s created by `REPLInterpreter`, that are much less optimized # that those produced by `NativeInterpreter`, will leak into the native code cache, # potentially causing runtime slowdown. # (see https://github.com/JuliaLang/julia/issues/48453). @@ -524,9 +533,9 @@ function CC.concrete_eval_eligible(interp::REPLInterpreter, @nospecialize(f), result = CC.MethodCallResult(result.rt, result.edgecycle, result.edgelimited, result.edge, neweffects) end -return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any, - result::CC.MethodCallResult, arginfo::CC.ArgInfo, - sv::CC.InferenceState) + return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any, + result::CC.MethodCallResult, arginfo::CC.ArgInfo, + sv::CC.InferenceState) end function resolve_toplevel_symbols!(mod::Module, src::Core.CodeInfo) @@ -565,13 +574,28 @@ function repl_eval_ex(@nospecialize(ex), context_module::Module) interp = REPLInterpreter(result) frame = CC.InferenceState(result, src, #=cache=#:no, interp)::CC.InferenceState - CC.typeinf(interp, frame) + # NOTE Use the fixed world here to make `REPLInterpreter` robust against + # potential invalidations of `Core.Compiler` methods. + Base.invoke_in_world(COMPLETION_WORLD[], CC.typeinf, interp, frame) result = frame.result.result result === Union{} && return nothing # for whatever reason, callers expect this as the Bottom and/or Top type instead return result end +# `COMPLETION_WORLD[]` will be initialized within `__init__` +# (to allow us to potentially remove REPL from the sysimage in the future). +# Note that inference from the `code_typed` call below will use the current world age +# rather than `typemax(UInt)`, since `Base.invoke_in_world` uses the current world age +# when the given world age is higher than the current one. +const COMPLETION_WORLD = Ref{UInt}(typemax(UInt)) + +# Generate code cache for `REPLInterpreter` now: +# This code cache will be available at the world of `COMPLETION_WORLD`, +# assuming no invalidation will happen before initializing REPL. +# Once REPL is loaded, `REPLInterpreter` will be resilient against future invalidations. +code_typed(CC.typeinf, (REPLInterpreter, CC.InferenceState)) + # Method completion on function call expression that look like :(max(1)) MAX_METHOD_COMPLETIONS::Int = 40 function _complete_methods(ex_org::Expr, context_module::Module, shift::Bool) @@ -752,7 +776,7 @@ end function close_path_completion(str, startpos, r, paths, pos) length(paths) == 1 || return false # Only close if there's a single choice... _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path - path = expanduser(replace(_path, r"\\ " => " ")) + path = expanduser(unescape_string(replace(_path, "\\\$"=>"\$", "\\\""=>"\""))) # ...except if it's a directory... try isdir(path) @@ -1024,23 +1048,44 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0) return complete_identifiers!(Completion[], ffunc, context_module, string, string[startpos:pos], pos, dotpos, startpos) - # otherwise... - elseif inc_tag in [:cmd, :string] + elseif inc_tag === :cmd m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) startpos = nextind(partial, reverseind(partial, m.offset)) r = startpos:pos + # This expansion with "\\ "=>' ' replacement and shell_escape=true + # assumes the path isn't further quoted within the cmd backticks. expanded = complete_expanduser(replace(string[r], r"\\ " => " "), r) expanded[3] && return expanded # If user expansion available, return it - paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos) + paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos, + shell_escape=true) + + return sort!(paths, by=p->p.path), r, success + elseif inc_tag === :string + # Find first non-escaped quote + m = match(r"\"(?!\\)", reverse(partial)) + startpos = nextind(partial, reverseind(partial, m.offset)) + r = startpos:pos + + expanded = complete_expanduser(string[r], r) + expanded[3] && return expanded # If user expansion available, return it - if inc_tag === :string && close_path_completion(string, startpos, r, paths, pos) - paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"") + path_prefix = try + unescape_string(replace(string[r], "\\\$"=>"\$", "\\\""=>"\"")) + catch + nothing end + if !isnothing(path_prefix) + paths, r, success = complete_path(path_prefix, pos, string_escape=true) - #Latex symbols can be completed for strings - (success || inc_tag === :cmd) && return sort!(paths, by=p->p.path), r, success + if close_path_completion(string, startpos, r, paths, pos) + paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"") + end + + # Fallthrough allowed so that Latex symbols can be completed in strings + success && return sort!(paths, by=p->p.path), r, success + end end ok, ret = bslash_completions(string, pos) @@ -1175,6 +1220,7 @@ end function __init__() Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError) + COMPLETION_WORLD[] = Base.get_world_counter() nothing end diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl index db28c84b07cb6..b9797dee910c2 100644 --- a/stdlib/REPL/src/docview.jl +++ b/stdlib/REPL/src/docview.jl @@ -743,7 +743,7 @@ function doc_completions(name, mod::Module=Main) idxs = findall(!isnothing, ms) # avoid messing up the order while inserting - for i in reverse(idxs) + for i in reverse!(idxs) c = only((ms[i]::AbstractMatch).captures) insert!(res, i, "$(c)\"\"") end diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index b0d1ff4b5237a..b2199e10bef55 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -1177,7 +1177,7 @@ let current_dir, forbidden catch e e isa Base.IOError && occursin("ELOOP", e.msg) end - c, r = test_complete("\"$(joinpath(path, "selfsym"))") + c, r = test_complete("\""*escape_string(joinpath(path, "selfsym"))) @test c == ["selfsymlink"] end end @@ -1207,26 +1207,62 @@ end mktempdir() do path space_folder = randstring() * " α" dir = joinpath(path, space_folder) - dir_space = replace(space_folder, " " => "\\ ") - mkdir(dir) cd(path) do - open(joinpath(space_folder, "space .file"),"w") do f - s = Sys.iswindows() ? "rm $dir_space\\\\space" : "cd $dir_space/space" - c, r = test_scomplete(s) - @test r == lastindex(s)-4:lastindex(s) - @test "space\\ .file" in c + touch(joinpath(space_folder, "space .file")) + + dir_space = replace(space_folder, " " => "\\ ") + s = Sys.iswindows() ? "cd $dir_space\\\\space" : "cd $dir_space/space" + c, r = test_scomplete(s) + @test s[r] == "space" + @test "space\\ .file" in c + # Also use shell escape rules within cmd backticks + s = "`$s" + c, r = test_scomplete(s) + @test s[r] == "space" + @test "space\\ .file" in c + + # escape string according to Julia escaping rules + julia_esc(str) = escape_string(str, ('\"','$')) + + # For normal strings the string should be properly escaped according to + # the usual rules for Julia strings. + s = "cd(\"" * julia_esc(joinpath(path, space_folder, "space")) + c, r = test_complete(s) + @test s[r] == "space" + @test "space .file\"" in c + + # '$' is the only character which can appear in a windows filename and + # which needs to be escaped in Julia strings (on unix we could do this + # test with all sorts of special chars) + touch(joinpath(space_folder, "needs_escape\$.file")) + escpath = julia_esc(joinpath(path, space_folder, "needs_escape\$")) + s = "cd(\"$escpath" + c, r = test_complete(s) + @test s[r] == "needs_escape\\\$" + @test "needs_escape\\\$.file\"" in c - s = Sys.iswindows() ? "cd(\"β $dir_space\\\\space" : "cd(\"β $dir_space/space" + if !Sys.iswindows() + touch(joinpath(space_folder, "needs_escape2\n\".file")) + escpath = julia_esc(joinpath(path, space_folder, "needs_escape2\n\"")) + s = "cd(\"$escpath" c, r = test_complete(s) - @test r == lastindex(s)-4:lastindex(s) - @test "space .file\"" in c + @test s[r] == "needs_escape2\\n\\\"" + @test "needs_escape2\\n\\\".file\"" in c + + touch(joinpath(space_folder, "needs_escape3\\.file")) + escpath = julia_esc(joinpath(path, space_folder, "needs_escape3\\")) + s = "cd(\"$escpath" + c, r = test_complete(s) + @test s[r] == "needs_escape3\\\\" + @test "needs_escape3\\\\.file\"" in c end + # Test for issue #10324 - s = "cd(\"$dir_space" + s = "cd(\"$space_folder" c, r = test_complete(s) - @test r == 5:15 - @test s[r] == dir_space + @test r == 5:14 + @test s[r] == space_folder #Test for #18479 for c in "'`@\$;&" @@ -1240,8 +1276,9 @@ mktempdir() do path @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/") @test res end - c, r, res = test_complete("\""*test_dir) - @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/") + escdir = julia_esc(test_dir) + c, r, res = test_complete("\""*escdir) + @test c[1] == escdir*(Sys.iswindows() ? "\\\\" : "/") @test res finally rm(joinpath(path, test_dir), recursive=true) @@ -1285,7 +1322,7 @@ if Sys.iswindows() @test r == length(s)-1:length(s) @test file in c - s = "cd(\"..\\" + s = "cd(\"..\\\\" c,r = test_complete(s) @test r == length(s)+1:length(s) @test temp_name * "\\\\" in c diff --git a/stdlib/Random/src/DSFMT.jl b/stdlib/Random/src/DSFMT.jl index f72a9dd5e9a0a..4c5cb8c522667 100644 --- a/stdlib/Random/src/DSFMT.jl +++ b/stdlib/Random/src/DSFMT.jl @@ -194,8 +194,11 @@ function dsfmt_jump(s::DSFMT_state, jp::GF2X) work = zeros(Int32, JN32) rwork = reinterpret(UInt64, work) dsfmt = Vector{UInt64}(undef, nval >> 1) - ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt64}, Ptr{Int32}, Csize_t), - dsfmt, val, (nval - 1) * sizeof(Int32)) + GC.@preserve dsfmt val begin + pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmt) + pval = Base.unsafe_convert(Ptr{Cvoid}, val) + Base.Libc.memcpy(pdsfmt, pval, (nval - 1) * sizeof(Int32)) + end dsfmt[end] = UInt64(N*2) for i in 0:degree(jp) diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index 8da2dd6f3e9c7..78d4f15e2beac 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -16,7 +16,6 @@ using Base.GMP: Limb import SHA using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing - import Base: copymutable, copy, copy!, ==, hash, convert, rand, randn, show @@ -326,6 +325,8 @@ When only one argument is passed besides the optional `rng` and is a `Tuple`, it as a collection of values (`S`) and not as `dims`. +See also [`randn`](@ref) for normally distributed numbers, and [`rand!`](@ref) and [`randn!`](@ref) for the in-place equivalents. + !!! compat "Julia 1.1" Support for `S` as a tuple requires at least Julia 1.1. diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl index 9fb03f9572688..1a16baa4bce28 100644 --- a/stdlib/Random/src/XoshiroSimd.jl +++ b/stdlib/Random/src/XoshiroSimd.jl @@ -5,6 +5,7 @@ module XoshiroSimd import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView, SamplerType, SamplerTrivial using Base: BitInteger_types +using Base.Libc: memcpy using Core.Intrinsics: llvmcall # Vector-width. Influences random stream. @@ -180,7 +181,7 @@ end s3 = _rotl45(s3) ref = Ref(f(res, T)) # TODO: This may make the random-stream dependent on system endianness - ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i) + GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i) end if rng isa TaskLocalRNG task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3 @@ -222,7 +223,7 @@ end res = _plus(_rotl23(_plus(s0,s3)),s0) resLoc = _and(res, 0x0101010101010101) ref = Ref(resLoc) - ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i) + GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i) t = _shl17(s1) s2 = _xor(s2, s0) s3 = _xor(s3, s1) diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl index 9d0f1595f052f..c2738653a0438 100644 --- a/stdlib/Random/src/normal.jl +++ b/stdlib/Random/src/normal.jl @@ -20,6 +20,8 @@ The `Base` module currently provides an implementation for the types [`Complex`](@ref) counterparts. When the type argument is complex, the values are drawn from the circularly symmetric complex normal distribution of variance 1 (corresponding to real and imaginary part having independent normal distribution with mean zero and variance `1/2`). +See also [`randn!`](@ref) to act in-place. + # Examples ```jldoctest julia> using Random diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl index dd901d6910abf..7c1043f33bdfe 100644 --- a/stdlib/Serialization/src/Serialization.jl +++ b/stdlib/Serialization/src/Serialization.jl @@ -80,7 +80,7 @@ const TAGS = Any[ const NTAGS = length(TAGS) @assert NTAGS == 255 -const ser_version = 23 # do not make changes without bumping the version #! +const ser_version = 24 # do not make changes without bumping the version #! format_version(::AbstractSerializer) = ser_version format_version(s::Serializer) = s.version @@ -418,6 +418,7 @@ function serialize(s::AbstractSerializer, meth::Method) serialize(s, meth.nargs) serialize(s, meth.isva) serialize(s, meth.is_for_opaque_closure) + serialize(s, meth.nospecializeinfer) serialize(s, meth.constprop) serialize(s, meth.purity) if isdefined(meth, :source) @@ -1026,10 +1027,14 @@ function deserialize(s::AbstractSerializer, ::Type{Method}) nargs = deserialize(s)::Int32 isva = deserialize(s)::Bool is_for_opaque_closure = false + nospecializeinfer = false constprop = purity = 0x00 template_or_is_opaque = deserialize(s) if isa(template_or_is_opaque, Bool) is_for_opaque_closure = template_or_is_opaque + if format_version(s) >= 24 + nospecializeinfer = deserialize(s)::Bool + end if format_version(s) >= 14 constprop = deserialize(s)::UInt8 end @@ -1054,6 +1059,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method}) meth.nargs = nargs meth.isva = isva meth.is_for_opaque_closure = is_for_opaque_closure + meth.nospecializeinfer = nospecializeinfer meth.constprop = constprop meth.purity = purity if template !== nothing @@ -1195,6 +1201,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo}) if format_version(s) >= 20 ci.has_fcall = deserialize(s) end + if format_version(s) >= 24 + ci.nospecializeinfer = deserialize(s)::Bool + end if format_version(s) >= 21 ci.inlining = deserialize(s)::UInt8 end diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl index 6b87d417fc2a8..a347a91721bad 100644 --- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl +++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl @@ -82,30 +82,32 @@ else end function __init__() - global libamd_handle = dlopen(libamd) - global libamd_path = dlpath(libamd_handle) - global libbtf_handle = dlopen(libbtf) - global libbtf_path = dlpath(libbtf_handle) - global libcamd_handle = dlopen(libcamd) - global libcamd_path = dlpath(libcamd_handle) - global libccolamd_handle = dlopen(libccolamd) - global libccolamd_path = dlpath(libccolamd_handle) - global libcholmod_handle = dlopen(libcholmod) - global libcholmod_path = dlpath(libcholmod_handle) - global libcolamd_handle = dlopen(libcolamd) - global libcolamd_path = dlpath(libcolamd_handle) - global libklu_handle = dlopen(libklu) - global libklu_path = dlpath(libklu_handle) - global libldl_handle = dlopen(libldl) - global libldl_path = dlpath(libldl_handle) - global librbio_handle = dlopen(librbio) - global librbio_path = dlpath(librbio_handle) - global libspqr_handle = dlopen(libspqr) - global libspqr_path = dlpath(libspqr_handle) - global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig) - global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle) - global libumfpack_handle = dlopen(libumfpack) - global libumfpack_path = dlpath(libumfpack_handle) + if Base.USE_GPL_LIBS + global libamd_handle = dlopen(libamd) + global libamd_path = dlpath(libamd_handle) + global libbtf_handle = dlopen(libbtf) + global libbtf_path = dlpath(libbtf_handle) + global libcamd_handle = dlopen(libcamd) + global libcamd_path = dlpath(libcamd_handle) + global libccolamd_handle = dlopen(libccolamd) + global libccolamd_path = dlpath(libccolamd_handle) + global libcholmod_handle = dlopen(libcholmod) + global libcholmod_path = dlpath(libcholmod_handle) + global libcolamd_handle = dlopen(libcolamd) + global libcolamd_path = dlpath(libcolamd_handle) + global libklu_handle = dlopen(libklu) + global libklu_path = dlpath(libklu_handle) + global libldl_handle = dlopen(libldl) + global libldl_path = dlpath(libldl_handle) + global librbio_handle = dlopen(librbio) + global librbio_path = dlpath(librbio_handle) + global libspqr_handle = dlopen(libspqr) + global libspqr_path = dlpath(libspqr_handle) + global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig) + global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle) + global libumfpack_handle = dlopen(libumfpack) + global libumfpack_path = dlpath(libumfpack_handle) + end global artifact_dir = dirname(Sys.BINDIR) end diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 392b736c09837..622c696b383a0 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -27,7 +27,7 @@ export TestLogger, LogRecord using Random using Random: AbstractRNG, default_rng using InteractiveUtils: gen_call_with_extracted_types -using Base: typesplit +using Base: typesplit, remove_linenums! using Serialization: Serialization const DISPLAY_FAILED = ( @@ -500,19 +500,20 @@ macro test(ex, kws...) # Build the test expression test_expr!("@test", ex, kws...) - orig_ex = Expr(:inert, ex) result = get_test_result(ex, __source__) - return quote + ex = Expr(:inert, ex) + result = quote if $(length(skip) > 0 && esc(skip[1])) - record(get_testset(), Broken(:skipped, $orig_ex)) + record(get_testset(), Broken(:skipped, $ex)) else let _do = $(length(broken) > 0 && esc(broken[1])) ? do_broken_test : do_test - _do($result, $orig_ex) + _do($result, $ex) end end end + return result end """ @@ -540,10 +541,10 @@ Test Broken """ macro test_broken(ex, kws...) test_expr!("@test_broken", ex, kws...) - orig_ex = Expr(:inert, ex) result = get_test_result(ex, __source__) # code to call do_test with execution result and original expr - :(do_broken_test($result, $orig_ex)) + ex = Expr(:inert, ex) + return :(do_broken_test($result, $ex)) end """ @@ -570,9 +571,9 @@ Test Broken """ macro test_skip(ex, kws...) test_expr!("@test_skip", ex, kws...) - orig_ex = Expr(:inert, ex) - testres = :(Broken(:skipped, $orig_ex)) - :(record(get_testset(), $testres)) + ex = Expr(:inert, ex) + testres = :(Broken(:skipped, $ex)) + return :(record(get_testset(), $testres)) end # An internal function, called by the code generated by the @test @@ -660,7 +661,8 @@ function get_test_result(ex, source) $negate, )) else - testret = :(Returned($(esc(orig_ex)), nothing, $(QuoteNode(source)))) + ex = Expr(:block, source, esc(orig_ex)) + testret = :(Returned($ex, nothing, $(QuoteNode(source)))) end result = quote try @@ -670,7 +672,6 @@ function get_test_result(ex, source) Threw(_e, Base.current_exceptions(), $(QuoteNode(source))) end end - Base.remove_linenums!(result) result end @@ -759,9 +760,10 @@ In the final example, instead of matching a single string it could alternatively """ macro test_throws(extype, ex) orig_ex = Expr(:inert, ex) + ex = Expr(:block, __source__, esc(ex)) result = quote try - Returned($(esc(ex)), nothing, $(QuoteNode(__source__))) + Returned($ex, nothing, $(QuoteNode(__source__))) catch _e if $(esc(extype)) != InterruptException && _e isa InterruptException rethrow() @@ -769,8 +771,7 @@ macro test_throws(extype, ex) Threw(_e, nothing, $(QuoteNode(__source__))) end end - Base.remove_linenums!(result) - :(do_test_throws($result, $orig_ex, $(esc(extype)))) + return :(do_test_throws($result, $orig_ex, $(esc(extype)))) end const MACROEXPAND_LIKE = Symbol.(("@macroexpand", "@macroexpand1", "macroexpand")) @@ -1350,11 +1351,11 @@ function _check_testset(testsettype, testsetname) end """ - @testset [CustomTestSet] [option=val ...] ["description"] begin ... end - @testset [CustomTestSet] [option=val ...] ["description \$v"] for v in (...) ... end - @testset [CustomTestSet] [option=val ...] ["description \$v, \$w"] for v in (...), w in (...) ... end - @testset [CustomTestSet] [option=val ...] ["description"] foo() - @testset let v = (...) ... end + @testset [CustomTestSet] [options...] ["description"] begin test_ex end + @testset [CustomTestSet] [options...] ["description \$v"] for v in itr test_ex end + @testset [CustomTestSet] [options...] ["description \$v, \$w"] for v in itrv, w in itrw test_ex end + @testset [CustomTestSet] [options...] ["description"] test_func() + @testset let v = v, w = w; test_ex; end # With begin/end or function call @@ -1379,7 +1380,7 @@ accepts three boolean options: This can also be set globally via the env var `JULIA_TEST_FAILFAST`. !!! compat "Julia 1.8" - `@testset foo()` requires at least Julia 1.8. + `@testset test_func()` requires at least Julia 1.8. !!! compat "Julia 1.9" `failfast` requires at least Julia 1.9. @@ -1435,6 +1436,9 @@ parent test set (with the context object appended to any failing tests.) !!! compat "Julia 1.9" `@testset let` requires at least Julia 1.9. +!!! compat "Julia 1.10" + Multiple `let` assignements are supported since Julia 1.10. + ## Examples ```jldoctest julia> @testset let logi = log(im) @@ -1445,6 +1449,17 @@ Test Failed at none:3 Expression: !(iszero(real(logi))) Context: logi = 0.0 + 1.5707963267948966im +ERROR: There was an error during testing + +julia> @testset let logi = log(im), op = !iszero + @test imag(logi) == π/2 + @test op(real(logi)) + end +Test Failed at none:3 + Expression: op(real(logi)) + Context: logi = 0.0 + 1.5707963267948966im + op = !iszero + ERROR: There was an error during testing ``` """ @@ -1476,7 +1491,7 @@ trigger_test_failure_break(@nospecialize(err)) = """ Generate the code for an `@testset` with a `let` argument. """ -function testset_context(args, tests, source) +function testset_context(args, ex, source) desc, testsettype, options = parse_testset_args(args[1:end-1]) if desc !== nothing || testsettype !== nothing # Reserve this syntax if we ever want to allow this, but for now, @@ -1484,22 +1499,38 @@ function testset_context(args, tests, source) error("@testset with a `let` argument cannot be customized") end - assgn = tests.args[1] - if !isa(assgn, Expr) || assgn.head !== :(=) - error("`@testset let` must have exactly one assignment") + let_ex = ex.args[1] + + if Meta.isexpr(let_ex, :(=)) + contexts = Any[let_ex.args[1]] + elseif Meta.isexpr(let_ex, :block) + contexts = Any[] + for assign_ex in let_ex.args + if Meta.isexpr(assign_ex, :(=)) + push!(contexts, assign_ex.args[1]) + else + error("Malformed `let` expression is given") + end + end + else + error("Malformed `let` expression is given") end - assignee = assgn.args[1] + reverse!(contexts) + + test_ex = ex.args[2] - tests.args[2] = quote - $push_testset($(ContextTestSet)($(QuoteNode(assignee)), $assignee; $options...)) + ex.args[2] = quote + $(map(contexts) do context + :($push_testset($(ContextTestSet)($(QuoteNode(context)), $context; $options...))) + end...) try - $(tests.args[2]) + $(test_ex) finally - $pop_testset() + $(map(_->:($pop_testset()), contexts)...) end end - return esc(tests) + return esc(ex) end """ @@ -1828,10 +1859,9 @@ function _inferred(ex, mod, allow = :(Union{})) ex = Expr(:call, GlobalRef(Test, :_materialize_broadcasted), farg, ex.args[2:end]...) end - Base.remove_linenums!(let ex = ex; + result = let ex = ex quote - let - allow = $(esc(allow)) + let allow = $(esc(allow)) allow isa Type || throw(ArgumentError("@inferred requires a type as second argument")) $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args) # Has keywords @@ -1855,7 +1885,8 @@ function _inferred(ex, mod, allow = :(Union{})) result end end - end) + end + return remove_linenums!(result) end function is_in_mods(m::Module, recursive::Bool, mods) diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl index 58b9ab41b790a..e0ae78bd911a7 100644 --- a/stdlib/Unicode/src/Unicode.jl +++ b/stdlib/Unicode/src/Unicode.jl @@ -229,7 +229,7 @@ to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref). For example, the string `"noël"` can be constructed in two canonically equivalent ways in Unicode, depending on whether `"ë"` is formed from a single codepoint U+00EB or -from the ASCII character `'o'` followed by the U+0308 combining-diaeresis character. +from the ASCII character `'e'` followed by the U+0308 combining-diaeresis character. ```jldoctest julia> s1 = "no\u00EBl" diff --git a/sysimage.mk b/sysimage.mk index 7ed61d471a153..993ee9a990058 100644 --- a/sysimage.mk +++ b/sysimage.mk @@ -59,14 +59,14 @@ RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make $(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS) @$(call PRINT_JULIA, cd $(JULIAHOME)/base && \ - $(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp \ + $(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \ --startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl) @mv $@.tmp $@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS) @$(call PRINT_JULIA, cd $(JULIAHOME)/base && \ if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \ - $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \ + $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \ --startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \ echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \ false; \ @@ -76,9 +76,13 @@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAH define sysimg_builder $$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji $$(JULIAHOME)/contrib/generate_precompile.jl @$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \ - if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \ - JULIA_NUM_THREADS=1 \ - $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \ + if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) \ + WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \ + JULIA_LOAD_PATH='@stdlib' \ + JULIA_PROJECT= \ + JULIA_DEPOT_PATH=':' \ + JULIA_NUM_THREADS=1 \ + $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \ --startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \ echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \ false; \ diff --git a/test/Makefile b/test/Makefile index 24e137a5b1492..88dbe5b2b4ed6 100644 --- a/test/Makefile +++ b/test/Makefile @@ -6,6 +6,11 @@ VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION) STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR) # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE +export JULIA_DEPOT_PATH := $(build_prefix)/share/julia +export JULIA_LOAD_PATH := @stdlib +unexport JULIA_PROJECT := +unexport JULIA_BINDIR := + TESTGROUPS = unicode strings compiler TESTS = all default stdlib $(TESTGROUPS) \ $(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \ diff --git a/test/abstractarray.jl b/test/abstractarray.jl index c5ff97deb6777..912e0d5883d12 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -236,6 +236,19 @@ end end end +@testset "AbstractArray fallbacks for CartesianIndices" begin + @test ndims(CartesianIndices{3}) == 3 + @test eltype(CartesianIndices{3}) == CartesianIndex{3} + for t in ((1:2, 1:2), (3:4,), ()) + C2 = CartesianIndices(t) + @test ndims(C2) == length(t) + @test ndims(typeof(C2)) == length(t) + @test IndexStyle(C2) == IndexCartesian() + @test eltype(C2) == CartesianIndex{length(t)} + @test Base.IteratorSize(C2) isa Base.HasShape{length(t)} + end +end + @testset "LinearIndices" begin @testset "constructors" begin for oinds in [ @@ -682,6 +695,14 @@ function test_cat(::Type{TestAbstractArray}) @test Base.typed_hcat(Float64, B, B) == TSlow(b2hcat) @test Base.typed_hcat(Float64, B, B, B) == TSlow(b3hcat) + @testset "issue #49676, bad error message on v[1 +1]" begin + # This is here because all these expressions are handled by Base.typed_hcat + v = [1 2 3] + @test_throws ArgumentError v[1 +1] + @test_throws ArgumentError v[1 1] + @test_throws ArgumentError v[[1 2] [2 3]] + end + @test vcat(B1, B2) == TSlow(vcat([1:24...], [1:25...])) @test hcat(C1, C2) == TSlow([1 2 1 2 3; 3 4 4 5 6]) @test hcat(C1, C2, C1) == TSlow([1 2 1 2 3 1 2; 3 4 4 5 6 3 4]) diff --git a/test/atexit.jl b/test/atexit.jl index bf46edae6eaad..64b56e32466df 100644 --- a/test/atexit.jl +++ b/test/atexit.jl @@ -4,8 +4,9 @@ using Test @testset "atexit.jl" begin function _atexit_tests_gen_cmd_eval(expr::String) + # We run the atexit tests with 2 threads, for the parallelism tests at the end. cmd_eval = ``` - $(Base.julia_cmd()) -e $(expr) + $(Base.julia_cmd()) -t2 -e $(expr) ``` return cmd_eval end @@ -13,8 +14,9 @@ using Test script, io = mktemp(temp_dir) println(io, expr) close(io) + # We run the atexit tests with 2 threads, for the parallelism tests at the end. cmd_script = ``` - $(Base.julia_cmd()) $(script) + $(Base.julia_cmd()) -t2 $(script) ``` return cmd_script end @@ -172,5 +174,90 @@ using Test @test p_script.exitcode == expected_exit_code end end + @testset "test calling atexit() in parallel with running atexit hooks." begin + # These tests cover 3 parallelism cases, as described by the following comments. + julia_expr_list = Dict( + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # 1. registering a hook from inside a hook + """ + atexit() do + atexit() do + exit(11) + end + end + # This will attempt to exit 0, but the execution of the atexit hook will + # register another hook, which will exit 11. + exit(0) + """ => 11, + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # 2. registering a hook from another thread while hooks are running + """ + c = Channel() + # This hook must execute _last_. (Execution is LIFO.) + atexit() do + put!(c, nothing) + put!(c, nothing) + end + atexit() do + # This will run in a concurrent task, testing that we can register atexit + # hooks from another task while running atexit hooks. + Threads.@spawn begin + take!(c) # block on c + atexit() do + exit(11) + end + take!(c) # keep the _atexit() loop alive until we've added another item. + end + end + exit(0) + """ => 11, + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # 3. attempting to register a hook after all hooks have finished (disallowed) + """ + const atexit_has_finished = Threads.Atomic{Bool}(false) + atexit() do + Threads.@spawn begin + # Block until the atexit hooks have all finished. We use a manual "spin + # lock" because task switch is disallowed inside the finalizer, below. + while !atexit_has_finished[] end + try + # By the time this runs, all the atexit hooks will be done. + # So this will throw. + atexit() do + exit(11) + end + catch + # Meaning we _actually_ exit 22. + exit(22) + end + end + end + # Finalizers run after the atexit hooks, so this blocks exit until the spawned + # task above gets a chance to run. + x = [] + finalizer(x) do x + # Allow the spawned task to finish + atexit_has_finished[] = true + # Then spin forever to prevent exit. + while atexit_has_finished[] end + end + exit(0) + """ => 22, + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ) + for julia_expr in keys(julia_expr_list) + cmd_eval = _atexit_tests_gen_cmd_eval(julia_expr) + cmd_script = _atexit_tests_gen_cmd_script(atexit_temp_dir, julia_expr) + expected_exit_code = julia_expr_list[julia_expr] + @test_throws(ProcessFailedException, run(cmd_eval)) + @test_throws(ProcessFailedException, run(cmd_script)) + p_eval = run(cmd_eval; wait = false) + p_script = run(cmd_script; wait = false) + wait(p_eval) + wait(p_script) + @test p_eval.exitcode == expected_exit_code + @test p_script.exitcode == expected_exit_code + end + end rm(atexit_temp_dir; force = true, recursive = true) end diff --git a/test/backtrace.jl b/test/backtrace.jl index 38019880da35d..50a50100488c4 100644 --- a/test/backtrace.jl +++ b/test/backtrace.jl @@ -195,6 +195,13 @@ let bt, found = false end # Syntax error locations appear in backtraces +let trace = try + eval(Expr(:error, 1)) + catch + stacktrace(catch_backtrace()) + end + @test trace[1].func === Symbol("top-level scope") +end let trace = try include_string(@__MODULE__, """ @@ -221,7 +228,7 @@ let trace = try end @test trace[1].func === Symbol("top-level scope") @test trace[1].file === :a_filename - @test trace[1].line == 2 + @test trace[1].line == 3 end # issue #45171 diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl index 403014c94ed0d..f2eb2ea630893 100644 --- a/test/boundscheck_exec.jl +++ b/test/boundscheck_exec.jl @@ -282,7 +282,6 @@ begin # Pass inbounds meta to getindex on CartesianIndices (#42115) end end - # Test that --check-bounds=off doesn't permit const prop of indices into # function that are not dynamically reachable (the same test for @inbounds # is in the compiler tests). @@ -294,4 +293,9 @@ function f_boundscheck_elim(n) end @test Tuple{} <: code_typed(f_boundscheck_elim, Tuple{Int})[1][2] +# https://github.com/JuliaArrays/StaticArrays.jl/issues/1155 +@test Base.return_types() do + typeintersect(Int, Integer) +end |> only === Type{Int} + end diff --git a/test/ccall.jl b/test/ccall.jl index 0266dabd6332b..7e166ddbd9041 100644 --- a/test/ccall.jl +++ b/test/ccall.jl @@ -1757,37 +1757,11 @@ end )::Cstring))...) @test call == Base.remove_linenums!( quote - local arg1root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cstring)), $(Expr(:escape, :str))) - local arg1 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cstring)), arg1root) - local arg2root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cint)), $(Expr(:escape, :num1))) - local arg2 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cint)), arg2root) - local arg3root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cint)), $(Expr(:escape, :num2))) - local arg3 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cint)), arg3root) - $(Expr(:foreigncall, - :($(Expr(:escape, :((:func, libstring))))), - :($(Expr(:escape, :Cstring))), - :($(Expr(:escape, :(($(Expr(:core, :svec)))(Cstring, Cint, Cint))))), - 0, - :(:ccall), - :arg1, :arg2, :arg3, :arg1root, :arg2root, :arg3root)) + ccall($(Expr(:escape, :((:func, libstring)))), $(Expr(:cconv, :ccall, 0)), $(Expr(:escape, :Cstring)), ($(Expr(:escape, :Cstring)), $(Expr(:escape, :Cint)), $(Expr(:escape, :Cint))), $(Expr(:escape, :str)), $(Expr(:escape, :num1)), $(Expr(:escape, :num2))) end) - # pointer interpolation - call = ccall_macro_lower(:ccall, ccall_macro_parse(:( $(Expr(:$, :fptr))("bar"::Cstring)::Cvoid ))...) - @test Base.remove_linenums!(call) == Base.remove_linenums!( - quote - func = $(Expr(:escape, :fptr)) - begin - if !(func isa Ptr{Cvoid}) - name = :fptr - throw(ArgumentError("interpolated function `$(name)` was not a Ptr{Cvoid}, but $(typeof(func))")) - end - end - local arg1root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cstring)), $(Expr(:escape, "bar"))) - local arg1 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cstring)), arg1root) - $(Expr(:foreigncall, :func, :($(Expr(:escape, :Cvoid))), :($(Expr(:escape, :(($(Expr(:core, :svec)))(Cstring))))), 0, :(:ccall), :arg1, :arg1root)) - end) - + local fptr = :x + @test_throws ArgumentError("interpolated function `fptr` was not a Ptr{Cvoid}, but Symbol") @ccall $fptr()::Cvoid end @testset "check error paths" begin diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 1d04926ef23af..917031b57fe5f 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -134,10 +134,11 @@ end let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` # tests for handling of ENV errors - let v = writereadpipeline("println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))", - setenv(`$exename -i -E 'empty!(LOAD_PATH); @isdefined InteractiveUtils'`, + let v = writereadpipeline( + "println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))", + setenv(`$exename -i -E '@assert isempty(LOAD_PATH); push!(LOAD_PATH, "@stdlib"); @isdefined InteractiveUtils'`, "JULIA_LOAD_PATH" => "", - "JULIA_DEPOT_PATH" => "", + "JULIA_DEPOT_PATH" => ";:", "HOME" => homedir())) @test v == ("false\nREPL: InteractiveUtilstrue\n", true) end @@ -299,37 +300,43 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` @test errors_not_signals(`$exename -C invalidtarget`) @test errors_not_signals(`$exename --cpu-target=invalidtarget`) - # -t, --threads - code = "print(Threads.threadpoolsize())" - cpu_threads = ccall(:jl_effective_threads, Int32, ()) - @test string(cpu_threads) == - read(`$exename --threads auto -e $code`, String) == - read(`$exename --threads=auto -e $code`, String) == - read(`$exename -tauto -e $code`, String) == - read(`$exename -t auto -e $code`, String) - for nt in (nothing, "1") - withenv("JULIA_NUM_THREADS" => nt) do - @test read(`$exename --threads=2 -e $code`, String) == - read(`$exename -t 2 -e $code`, String) == "2" + if Sys.iswindows() + # -t, --threads + code = "print(Threads.threadpoolsize())" + cpu_threads = ccall(:jl_effective_threads, Int32, ()) + @test string(cpu_threads) == + read(`$exename --threads auto -e $code`, String) == + read(`$exename --threads=auto -e $code`, String) == + read(`$exename -tauto -e $code`, String) == + read(`$exename -t auto -e $code`, String) + for nt in (nothing, "1") + withenv("JULIA_NUM_THREADS" => nt) do + @test read(`$exename --threads=2 -e $code`, String) == + read(`$exename -t 2 -e $code`, String) == "2" + end end - end - # We want to test oversubscription, but on manycore machines, this can - # actually exhaust limited PID spaces - cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads)) - if Sys.WORD_SIZE == 32 - cpu_threads = min(cpu_threads, 50) - end - @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads) - withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do - @test read(`$exename -e $code`, String) == string(cpu_threads) - end - @test errors_not_signals(`$exename -t 0`) - @test errors_not_signals(`$exename -t -1`) + # We want to test oversubscription, but on manycore machines, this can + # actually exhaust limited PID spaces + cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads)) + if Sys.WORD_SIZE == 32 + cpu_threads = min(cpu_threads, 50) + end + @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads) + withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do + @test read(`$exename -e $code`, String) == string(cpu_threads) + end + @test errors_not_signals(`$exename -t 0`) + @test errors_not_signals(`$exename -t -1`) - # Combining --threads and --procs: --threads does propagate - withenv("JULIA_NUM_THREADS" => nothing) do - code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))" - @test read(`$exename -p2 -t2 -e $code`, String) == "6" + # Combining --threads and --procs: --threads does propagate + withenv("JULIA_NUM_THREADS" => nothing) do + code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))" + @test read(`$exename -p2 -t2 -e $code`, String) == "6" + end + else + @test_skip "Command line tests with -t are flakey on non-Windows OS" + # Known issue: https://github.com/JuliaLang/julia/issues/49154 + # These tests should be fixed and reenabled on all operating systems. end # Combining --threads and invalid -C should yield a decent error @@ -355,12 +362,19 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` withenv("JULIA_NUM_GC_THREADS" => nt) do @test read(`$exename --gcthreads=2 -e $code`, String) == "2" end + withenv("JULIA_NUM_GC_THREADS" => nt) do + @test read(`$exename --gcthreads=2,1 -e $code`, String) == "3" + end end withenv("JULIA_NUM_GC_THREADS" => 2) do @test read(`$exename -e $code`, String) == "2" end + withenv("JULIA_NUM_GC_THREADS" => "2,1") do + @test read(`$exename -e $code`, String) == "3" + end + # --machine-file # this does not check that machine file works, # only that the filename gets correctly passed to the option struct @@ -512,29 +526,34 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` # -g @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2" - let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g0`) - @test code[2] - code = code[1] - @test occursin("llvm.module.flags", code) - @test !occursin("llvm.dbg.cu", code) - @test !occursin("int.jl", code) - @test !occursin("Int64", code) - end - let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g1`) - @test code[2] - code = code[1] - @test occursin("llvm.module.flags", code) - @test occursin("llvm.dbg.cu", code) - @test occursin("int.jl", code) - @test !occursin("Int64", code) - end - let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g2`) - @test code[2] - code = code[1] - @test occursin("llvm.module.flags", code) - @test occursin("llvm.dbg.cu", code) - @test occursin("int.jl", code) - @test occursin("\"Int64\"", code) + # --print-before/--print-after with pass names is broken on Windows due to no-gnu-unique issues + if !Sys.iswindows() + withenv("JULIA_LLVM_ARGS" => "--print-before=FinalLowerGC") do + let code = readchomperrors(`$exename -g0 -E "@eval Int64(1)+Int64(1)"`) + @test code[1] + code = code[3] + @test occursin("llvm.module.flags", code) + @test !occursin("llvm.dbg.cu", code) + @test !occursin("int.jl", code) + @test !occursin("\"Int64\"", code) + end + let code = readchomperrors(`$exename -g1 -E "@eval Int64(1)+Int64(1)"`) + @test code[1] + code = code[3] + @test occursin("llvm.module.flags", code) + @test occursin("llvm.dbg.cu", code) + @test occursin("int.jl", code) + @test !occursin("\"Int64\"", code) + end + let code = readchomperrors(`$exename -g2 -E "@eval Int64(1)+Int64(1)"`) + @test code[1] + code = code[3] + @test occursin("llvm.module.flags", code) + @test occursin("llvm.dbg.cu", code) + @test occursin("int.jl", code) + @test occursin("\"Int64\"", code) + end + end end # --check-bounds @@ -917,7 +936,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` close(in) close(err.in) txt = readline(err) - @test startswith(txt, "ERROR: syntax: incomplete") + @test startswith(txt, r"ERROR: (syntax: incomplete|ParseError:)") end # Issue #29855 diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl index 0e94d42fa8866..9db0a8903593d 100644 --- a/test/compiler/AbstractInterpreter.jl +++ b/test/compiler/AbstractInterpreter.jl @@ -125,7 +125,7 @@ end using Core: SlotNumber, Argument using Core.Compiler: slot_id, tmerge_fast_path import .CC: - AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice, OptimizerLattice, + AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice, widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice, widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn @@ -146,7 +146,7 @@ const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}} CC.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance)) CC.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance)) -CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(OptimizerLattice()) +CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance) struct Taint typ @@ -246,13 +246,13 @@ end # External lattice without `Conditional` import .CC: - AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice, OptimizerLattice, + AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice, typeinf_lattice, ipo_lattice, optimizer_lattice @newinterp NonconditionalInterpreter CC.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice())) CC.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice())) -CC.optimizer_lattice(::NonconditionalInterpreter) = OptimizerLattice(PartialsLattice(ConstsLattice())) +CC.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice()) @test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x c = isa(x, Int) || isa(x, Float64) @@ -279,9 +279,9 @@ CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx) CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx) function CC.abstract_call(interp::NoinlineInterpreter, - arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing}) + arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter, - arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing}) + arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) if sv.mod in noinline_modules(interp) return CC.CallMeta(ret.rt, ret.effects, NoinlineCallInfo(ret.info)) end @@ -348,3 +348,8 @@ let NoinlineModule = Module() @test count(iscall((src, inlined_usually)), src.code) == 0 end end + +# Make sure that Core.Compiler has enough NamedTuple infrastructure +# to properly give error messages for basic kwargs... +Core.eval(Core.Compiler, quote f(;a=1) = a end) +@test_throws MethodError Core.Compiler.f(;b=2) diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl index 6894733e0fa45..bb3273b3e707a 100644 --- a/test/compiler/EscapeAnalysis/EAUtils.jl +++ b/test/compiler/EscapeAnalysis/EAUtils.jl @@ -64,7 +64,7 @@ import Core: import .CC: InferenceResult, OptimizationState, IRCode, copy as cccopy, @timeit, convert_to_ircode, slot2reg, compact!, ssa_inlining_pass!, sroa_pass!, - adce_pass!, type_lift_pass!, JLOptions, verify_ir, verify_linetable + adce_pass!, JLOptions, verify_ir, verify_linetable import .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState, is_ipo_profitable # when working outside of Core.Compiler, @@ -224,7 +224,6 @@ function run_passes_with_ea(interp::EscapeAnalyzer, ci::CodeInfo, sv::Optimizati end @timeit "SROA" ir = sroa_pass!(ir) @timeit "ADCE" ir = adce_pass!(ir) - @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) if JLOptions().debug_level == 2 @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable)) diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/local.jl index dd324c3619dc7..27e4fdeee28c6 100644 --- a/test/compiler/EscapeAnalysis/local.jl +++ b/test/compiler/EscapeAnalysis/local.jl @@ -63,8 +63,7 @@ include(normpath(@__DIR__, "setup.jl")) return @isdefined(s) end i = findfirst(isT(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement - @test !isnothing(i) - @test has_no_escape(result.state[SSAValue(i)]) + @test isnothing(i) || has_no_escape(result.state[SSAValue(i)]) end let # ϕ-node result = code_escapes((Bool,Any,Any)) do cond, a, b diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 8a3949212ea16..e93ecd232498f 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -17,8 +17,8 @@ end # The tests below assume a certain format and safepoint_on_entry=true breaks that. function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) - params = Base.CodegenParams(safepoint_on_entry=false) - d = InteractiveUtils._dump_function(f, t, false, false, !raw, dump_module, :att, optimize, :none, false, params) + params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false) + d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params) sprint(print, d) end diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl index 0e8fe27591a5e..bbcf7b0dfb959 100644 --- a/test/compiler/contextual.jl +++ b/test/compiler/contextual.jl @@ -201,3 +201,22 @@ finally @show err end end + +# Test that writing a bad cassette-style pass gives the expected error (#49715) +function generator49715(world, source, self, f, tt) + tt = tt.parameters[1] + sig = Tuple{f, tt.parameters...} + mi = Base._which(sig; world) + + error("oh no") + + stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec()) + stub(world, source, :(nothing)) +end + +@eval function doit49715(f, tt) + $(Expr(:meta, :generated, generator49715)) + $(Expr(:meta, :generated_only)) +end + +@test_throws "oh no" doit49715(sin, Tuple{Int}) diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl index f809192d8d1ed..a4b21da523a8e 100644 --- a/test/compiler/effects.jl +++ b/test/compiler/effects.jl @@ -760,21 +760,27 @@ end # arrayref # -------- -let effects = Base.infer_effects(Base.arrayref, (Vector{Any},Int)) - @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) - @test Core.Compiler.is_effect_free(effects) - @test !Core.Compiler.is_nothrow(effects) - @test Core.Compiler.is_terminates(effects) +for tt = Any[(Bool,Vector{Any},Int), + (Bool,Matrix{Any},Int,Int)] + @testset let effects = Base.infer_effects(Base.arrayref, tt) + @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) + @test Core.Compiler.is_effect_free(effects) + @test !Core.Compiler.is_nothrow(effects) + @test Core.Compiler.is_terminates(effects) + end end # arrayset # -------- -let effects = Base.infer_effects(Base.arrayset, (Vector{Any},Any,Int)) - @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) - @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects) - @test !Core.Compiler.is_nothrow(effects) - @test Core.Compiler.is_terminates(effects) +for tt = Any[(Bool,Vector{Any},Any,Int), + (Bool,Matrix{Any},Any,Int,Int)] + @testset let effects = Base.infer_effects(Base.arrayset, tt) + @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) + @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects) + @test !Core.Compiler.is_nothrow(effects) + @test Core.Compiler.is_terminates(effects) + end end # nothrow for arrayset @test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i @@ -982,3 +988,13 @@ isassigned_effects(s) = isassigned(Ref(s)) @test fully_eliminated(; retval=true) do isassigned_effects(:foo) end + +# Effects of Base.hasfield (#50198) +hf50198(s) = hasfield(typeof((;x=1, y=2)), s) +f50198() = (hf50198(Ref(:x)[]); nothing) +@test fully_eliminated(f50198) + +# Effects properly applied to flags by irinterp (#50311) +f50311(x, s) = Symbol(s) +g50311(x) = Val{f50311((1.0, x), "foo")}() +@test fully_eliminated(g50311, Tuple{Float64}) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 5987e10401bc8..ded9438037733 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -362,7 +362,7 @@ code_llvm(devnull, invoke_g10878, ()) # issue #10930 -@test isa(code_typed(promote,(Any,Any,Vararg{Any})), Array) +@test isa(Base.return_types(promote, (Any,Any,Vararg{Any})), Vector) find_tvar10930(sig::Type{T}) where {T<:Tuple} = 1 function find_tvar10930(arg) if isa(arg, Type) && arg<:Tuple @@ -644,7 +644,6 @@ for (codetype, all_ssa) in Any[ local i for i = 1:length(code.ssavaluetypes) typ = code.ssavaluetypes[i] - typ isa Core.Compiler.MaybeUndef && (typ = typ.typ) @test isa(typ, Type) || isa(typ, Const) || isa(typ, Conditional) || typ end test_inferred_static(codetype, all_ssa) @@ -699,7 +698,7 @@ f_infer_abstract_fieldtype() = fieldtype(HasAbstractlyTypedField, :x) let fieldtype_tfunc(@nospecialize args...) = Core.Compiler.fieldtype_tfunc(Core.Compiler.fallback_lattice, args...), fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Core.Compiler.fieldtype_nothrow( - Core.Compiler.OptimizerLattice(), s0, name) + Core.Compiler.SimpleInferenceLattice.instance, s0, name) @test fieldtype_tfunc(Union{}, :x) == Union{} @test fieldtype_tfunc(Union{Type{Int32}, Int32}, Const(:x)) == Union{} @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Array}, Const(:x)) == Type{<:Array} @@ -729,19 +728,6 @@ let fieldtype_tfunc(@nospecialize args...) = @test TypeVar <: fieldtype_tfunc(Any, Any) end -import Core.Compiler: MaybeUndef, builtin_nothrow -let 𝕃ₒ = Core.Compiler.OptimizerLattice() - @test !builtin_nothrow(𝕃ₒ, setfield!, Any[Base.RefValue{String}, Core.Const(:x), MaybeUndef(String)], Any) - @test !builtin_nothrow(𝕃ₒ, setfield!, Any[Base.RefValue{String}, Core.Const(:x), MaybeUndef(String), Core.Const(:not_atomic)], Any) - @test !builtin_nothrow(𝕃ₒ, isdefined, Any[Any,MaybeUndef(Symbol)], Bool) - @test !builtin_nothrow(𝕃ₒ, fieldtype, Any[MaybeUndef(Any),Symbol], Any) - @test !builtin_nothrow(𝕃ₒ, isa, Any[Type,MaybeUndef(Type)], Any) - @test !builtin_nothrow(𝕃ₒ, <:, Any[MaybeUndef(Any),MaybeUndef(Any)], Any) - @test !builtin_nothrow(𝕃ₒ, Core.ifelse, Any[MaybeUndef(Bool),Any,Any], Any) - @test !builtin_nothrow(𝕃ₒ, typeassert, Any[MaybeUndef(Any),Type{Symbol}], Any) - @test !builtin_nothrow(𝕃ₒ, Core.get_binding_type, Any[Module,MaybeUndef(Symbol)], Any) -end - # issue #11480 @noinline f11480(x,y) = x let A = Ref @@ -1167,25 +1153,18 @@ let typeargs = Tuple{Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int} @test only(Base.return_types(promote_type, typeargs)) === Type{Int} end -function count_specializations(method::Method) - specs = method.specializations - specs isa Core.MethodInstance && return 1 - n = count(!isnothing, specs::Core.SimpleVector) - return n -end - # demonstrate that inference can complete without waiting for MAX_TYPE_DEPTH copy_dims_out(out) = () copy_dims_out(out, dim::Int, tail...) = copy_dims_out((out..., dim), tail...) copy_dims_out(out, dim::Colon, tail...) = copy_dims_out((out..., dim), tail...) @test Base.return_types(copy_dims_out, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}] -@test all(m -> 4 < count_specializations(m) < 15, methods(copy_dims_out)) # currently about 5 +@test all(m -> 4 < length(Base.specializations(m)) < 15, methods(copy_dims_out)) # currently about 5 copy_dims_pair(out) = () copy_dims_pair(out, dim::Int, tail...) = copy_dims_pair(out => dim, tail...) copy_dims_pair(out, dim::Colon, tail...) = copy_dims_pair(out => dim, tail...) @test Base.return_types(copy_dims_pair, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}] -@test all(m -> 3 < count_specializations(m) < 15, methods(copy_dims_pair)) # currently about 5 +@test all(m -> 3 < length(Base.specializations(m)) < 15, methods(copy_dims_pair)) # currently about 5 # splatting an ::Any should still allow inference to use types of parameters preceding it f22364(::Int, ::Any...) = 0 @@ -1740,7 +1719,7 @@ let setfield!_tfunc(@nospecialize xs...) = @test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{} end let setfield!_nothrow(@nospecialize xs...) = - Core.Compiler.setfield!_nothrow(Core.Compiler.OptimizerLattice(), xs...) + Core.Compiler.setfield!_nothrow(Core.Compiler.SimpleInferenceLattice.instance, xs...) @test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int) @test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int) @test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int) @@ -2172,15 +2151,15 @@ mutable struct AliasableConstField{S,T} end import Core.Compiler: - InferenceLattice, OptimizerLattice, MustAliasesLattice, InterMustAliasesLattice, - BaseInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice + InferenceLattice, MustAliasesLattice, InterMustAliasesLattice, + BaseInferenceLattice, SimpleInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice include("newinterp.jl") @newinterp MustAliasInterpreter let CC = Core.Compiler CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance)) CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance)) - CC.optimizer_lattice(::MustAliasInterpreter) = OptimizerLattice() + CC.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance end # lattice @@ -2403,6 +2382,20 @@ from_interconditional_check22(::Union{Int,String}, y) = isa(y, Int) return 0 end |> only === Int +# prioritize constraints on slot objects +# https://github.com/aviatesk/JET.jl/issues/509 +struct JET509 + list::Union{Tuple{},Vector{Int}} +end +jet509_hasitems(list) = length(list) >= 1 +@test Base.return_types((JET509,); interp=MustAliasInterpreter()) do ilist::JET509 + list = ilist.list + if jet509_hasitems(list) + return list + end + error("list is empty") +end |> only == Vector{Int} + # === constraint # -------------- @@ -3390,7 +3383,7 @@ const DenseIdx = Union{IntRange,Integer} # Non uniformity in expressions with PartialTypeVar @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar) let N = TypeVar(:N) - 𝕃 = Core.Compiler.OptimizerLattice() + 𝕃 = Core.Compiler.SimpleInferenceLattice.instance argtypes = Any[Core.Compiler.Const(NTuple), Core.Compiler.PartialTypeVar(N, true, true), Core.Compiler.Const(Any)] @@ -3506,9 +3499,16 @@ end Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}} # Test that these don't throw @test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}} +@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{} +@test Core.Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{} +@test Core.Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}} @test Core.Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real} @test Core.Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}} +@test Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}}) +@test !Core.Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}}) +@test !Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int}) + @test Base.return_types(Issue35566.f) == [Val{:expected}] # constant prop through keyword arguments @@ -3797,7 +3797,7 @@ end end end end - @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 5)", string(timingmod.children)) + @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children)) # END LINE NUMBER SENSITIVITY # Recursive function @@ -4160,6 +4160,102 @@ Base.getproperty(x::Interface41024Extended, sym::Symbol) = x.x end |> only === Int +function call_func_itr(func, itr) + local r = 0 + r += func(itr[1]) + r += func(itr[2]) + r += func(itr[3]) + r += func(itr[4]) + r += func(itr[5]) + r +end + +global inline_checker = c -> c # untyped global, a call of this func will prevent inlining +# if `f` is inlined, `GlobalRef(m, :inline_checker)` should appear within the body of `invokef` +function is_inline_checker(@nospecialize stmt) + isa(stmt, GlobalRef) && stmt.name === :inline_checker +end + +function func_nospecialized(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining +end + +@inline function func_nospecialized_inline(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining (but forced by the annotation) +end + +Base.@nospecializeinfer function func_nospecializeinfer(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining +end + +Base.@nospecializeinfer @inline function func_nospecializeinfer_inline(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining (but forced by the annotation) +end + +Base.@nospecializeinfer Base.@constprop :aggressive function func_nospecializeinfer_constprop(c::Bool, @nospecialize a) + if c + return inline_checker(a) # dynamic dispatch, preventing inlining/constprop (but forced by the annotation) + end + return false +end +Base.@nospecializeinfer func_nospecializeinfer_constprop(@nospecialize a) = func_nospecializeinfer_constprop(false, a) + +itr_dispatchonly = Any[sin, muladd, "foo", nothing, missing] # untyped container can cause excessive runtime dispatch +itr_withinfernce = tuple(sin, muladd, "foo", nothing, missing) # typed container can cause excessive inference + +@testset "compilation annotations" begin + @testset "@nospecialize" begin + # `@nospecialize` should suppress runtime dispatches of `nospecialize` + @test call_func_itr(func_nospecialized, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecialized))))) == 1 + # `@nospecialize` should allow inference to happen + @test call_func_itr(func_nospecialized, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecialized))))) == 6 + @test count(is_inline_checker, @get_code call_func_itr(func_nospecialized, itr_dispatchonly)) == 0 + + # `@nospecialize` should allow inlinining + @test call_func_itr(func_nospecialized_inline, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecialized_inline))))) == 1 + @test call_func_itr(func_nospecialized_inline, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecialized_inline))))) == 6 + @test count(is_inline_checker, @get_code call_func_itr(func_nospecialized_inline, itr_dispatchonly)) == 5 + end + + @testset "@nospecializeinfer" begin + # `@nospecialize` should suppress runtime dispatches of `nospecialize` + @test call_func_itr(func_nospecializeinfer, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer))))) == 1 + # `@nospecializeinfer` suppresses inference also + @test call_func_itr(func_nospecializeinfer, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer))))) == 1 + @test !any(is_inline_checker, @get_code call_func_itr(func_nospecializeinfer, itr_dispatchonly)) + + # `@nospecializeinfer` should allow inlinining + @test call_func_itr(func_nospecializeinfer_inline, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer_inline))))) == 1 + @test call_func_itr(func_nospecializeinfer_inline, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer_inline))))) == 1 + @test any(is_inline_checker, @get_code call_func_itr(func_nospecializeinfer_inline, itr_dispatchonly)) + + # `@nospecializeinfer` should allow constprop + @test Base.return_types((Any,)) do x + Val(func_nospecializeinfer_constprop(x)) + end |> only == Val{false} + @test call_func_itr(func_nospecializeinfer_constprop, itr_dispatchonly) == 0 + for m = methods(func_nospecializeinfer_constprop) + @test length(Base.specializations(m)) == 1 + end + @test call_func_itr(func_nospecializeinfer_constprop, itr_withinfernce) == 0 + for m = methods(func_nospecializeinfer_constprop) + @test length(Base.specializations(m)) == 1 + end + end +end + @testset "fieldtype for unions" begin # e.g. issue #40177 f40177(::Type{T}) where {T} = fieldtype(T, 1) for T in [ @@ -4904,3 +5000,17 @@ let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do end @test count(@nospecialize(x)->isa(x, Core.PhiNode), src.code) == 0 end + +struct Issue49785{S, T<:S} end +let 𝕃 = Core.Compiler.SimpleInferenceLattice.instance + argtypes = Any[Core.Compiler.Const(Issue49785), + Union{Type{String},Type{Int}}, + Union{Type{String},Type{Int}}] + rt = Type{Issue49785{<:Any, Int}} + # the following should not throw + @test !Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt) + @test code_typed() do + S = Union{Type{String},Type{Int}}[Int][1] + map(T -> Issue49785{S,T}, (a = S,)) + end isa Vector +end diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 7920212537608..be821a88f00cc 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -705,6 +705,27 @@ mktempdir() do dir end end +# callsite inlining with cached frames +issue49823_events = @NamedTuple{evid::Int8, base_time::Float64}[ + (evid = 1, base_time = 0.0), (evid = -1, base_time = 0.0)] +issue49823_fl1(t, events) = @inline findlast(x -> x.evid ∈ (1, 4) && x.base_time <= t, events) +issue49823_fl3(t, events) = @inline findlast(x -> any(==(x.evid), (1,4)) && x.base_time <= t, events) +issue49823_fl5(t, events) = begin + f = let t=t + x -> x.evid ∈ (1, 4) && x.base_time <= t + end + @inline findlast(f, events) +end +let src = @code_typed1 issue49823_fl1(0.0, issue49823_events) + @test count(isinvoke(:findlast), src.code) == 0 # successful inlining +end +let src = @code_typed1 issue49823_fl3(0.0, issue49823_events) + @test count(isinvoke(:findlast), src.code) == 0 # successful inlining +end +let src = @code_typed1 issue49823_fl5(0.0, issue49823_events) + @test count(isinvoke(:findlast), src.code) == 0 # successful inlining +end + # Issue #42264 - crash on certain union splits let f(x) = (x...,) # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index c704a8cf1c434..a1738b52161bf 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -537,7 +537,7 @@ end # comparison lifting # ================== -let # lifting `===` +let # lifting `===` through PhiNode src = code_typed1((Bool,Int,)) do c, x y = c ? x : nothing y === nothing # => ϕ(false, true) @@ -557,7 +557,15 @@ let # lifting `===` end end -let # lifting `isa` +let # lifting `===` through Core.ifelse + src = code_typed1((Bool,Int,)) do c, x + y = Core.ifelse(c, x, nothing) + y === nothing # => Core.ifelse(c, false, true) + end + @test count(iscall((src, ===)), src.code) == 0 +end + +let # lifting `isa` through PhiNode src = code_typed1((Bool,Int,)) do c, x y = c ? x : nothing isa(y, Int) # => ϕ(true, false) @@ -580,7 +588,16 @@ let # lifting `isa` end end -let # lifting `isdefined` +let # lifting `isa` through Core.ifelse + src = code_typed1((Bool,Int,)) do c, x + y = Core.ifelse(c, x, nothing) + isa(y, Int) # => Core.ifelse(c, true, false) + end + @test count(iscall((src, isa)), src.code) == 0 +end + + +let # lifting `isdefined` through PhiNode src = code_typed1((Bool,Some{Int},)) do c, x y = c ? x : nothing isdefined(y, 1) # => ϕ(true, false) @@ -603,6 +620,14 @@ let # lifting `isdefined` end end +let # lifting `isdefined` through Core.ifelse + src = code_typed1((Bool,Some{Int},)) do c, x + y = Core.ifelse(c, x, nothing) + isdefined(y, 1) # => Core.ifelse(c, true, false) + end + @test count(iscall((src, isdefined)), src.code) == 0 +end + mutable struct Foo30594; x::Float64; end Base.copy(x::Foo30594) = Foo30594(x.x) function add!(p::Foo30594, off::Foo30594) @@ -717,6 +742,57 @@ let m = Meta.@lower 1 + 1 @test Core.Compiler.verify_ir(ir) === nothing end +# A lifted Core.ifelse with an eliminated branch (#50276) +let m = Meta.@lower 1 + 1 + @assert Meta.isexpr(m, :thunk) + src = m.args[1]::CodeInfo + src.code = Any[ + # block 1 + #= %1: =# Core.Argument(2), + # block 2 + #= %2: =# Expr(:call, Core.ifelse, SSAValue(1), true, missing), + #= %3: =# GotoIfNot(SSAValue(2), 11), + # block 3 + #= %4: =# PiNode(SSAValue(2), Bool), # <-- This PiNode is the trigger of the bug, since it + # means that only one branch of the Core.ifelse + # is lifted. + #= %5: =# GotoIfNot(false, 8), + # block 2 + #= %6: =# nothing, + #= %7: =# GotoNode(8), + # block 4 + #= %8: =# PhiNode(Int32[5, 7], Any[SSAValue(4), SSAValue(6)]), + # ^-- N.B. This PhiNode also needs to have a Union{ ... } type in order + # for lifting to be performed (it is skipped for e.g. `Bool`) + # + #= %9: =# Expr(:call, isa, SSAValue(8), Missing), + #= %10: =# ReturnNode(SSAValue(9)), + # block 5 + #= %11: =# ReturnNode(false), + ] + src.ssavaluetypes = Any[ + Any, + Union{Missing, Bool}, + Any, + Bool, + Any, + Missing, + Any, + Union{Nothing, Bool}, + Bool, + Any, + Any + ] + nstmts = length(src.code) + src.codelocs = fill(one(Int32), nstmts) + src.ssaflags = fill(one(Int32), nstmts) + src.slotflags = fill(zero(UInt8), 3) + ir = Core.Compiler.inflate_ir(src) + @test Core.Compiler.verify_ir(ir) === nothing + ir = @test_nowarn Core.Compiler.sroa_pass!(ir) + @test Core.Compiler.verify_ir(ir) === nothing +end + # Issue #31546 - missing widenconst in SROA function f_31546(x) (a, b) = x == "r" ? (false, false) : @@ -1245,3 +1321,37 @@ end return strct.b end @test fully_eliminated(one_const_field_partial; retval=5) + +# Test that SROA updates the type of intermediate phi nodes (#50285) +struct Immut50285 + x::Any +end + +function immut50285(b, x, y) + if b + z = Immut50285(x) + else + z = Immut50285(y) + end + z.x::Union{Float64, Int} +end + +let src = code_typed1(immut50285, Tuple{Bool, Int, Float64}) + @test count(isnew, src.code) == 0 + @test count(iscall((src, typeassert)), src.code) == 0 +end + +function mut50285(b, x, y) + z = Ref{Any}() + if b + z[] = x + else + z[] = y + end + z[]::Union{Float64, Int} +end + +let src = code_typed1(mut50285, Tuple{Bool, Int, Float64}) + @test count(isnew, src.code) == 0 + @test count(iscall((src, typeassert)), src.code) == 0 +end diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl index 95ac0d555ef88..00de9b2472de4 100644 --- a/test/compiler/irutils.jl +++ b/test/compiler/irutils.jl @@ -1,10 +1,17 @@ -import Core: CodeInfo, ReturnNode, MethodInstance -import Core.Compiler: IRCode, IncrementalCompact, VarState, argextype, singleton_type -import Base.Meta: isexpr +using Core: CodeInfo, ReturnNode, MethodInstance +using Core.Compiler: IRCode, IncrementalCompact, singleton_type, VarState +using Base.Meta: isexpr +using InteractiveUtils: gen_call_with_extracted_types_and_kwargs -argextype(@nospecialize args...) = argextype(args..., VarState[]) +argextype(@nospecialize args...) = Core.Compiler.argextype(args..., VarState[]) code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo +macro code_typed1(ex0...) + return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0) +end get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code +macro get_code(ex0...) + return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0) +end # check if `x` is a statement with a given `head` isnew(@nospecialize x) = isexpr(x, :new) @@ -45,3 +52,6 @@ function fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...) return length(code) == 1 && isreturn(code[1]) end end +macro fully_eliminated(ex0...) + return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0) +end diff --git a/test/core.jl b/test/core.jl index f71baa843d25f..f0439afeed23c 100644 --- a/test/core.jl +++ b/test/core.jl @@ -1897,7 +1897,7 @@ function f4528(A, B) end end @test f4528(false, Int32(12)) === nothing -@test_throws ErrorException f4528(true, Int32(12)) +@test_throws ArgumentError f4528(true, Int32(12)) # issue #4518 f4518(x, y::Union{Int32,Int64}) = 0 @@ -8010,3 +8010,33 @@ end # donotdelete should not taint consistency of the containing function f_donotdete(x) = (Core.Compiler.donotdelete(x); 1) @test Core.Compiler.is_consistent(Base.infer_effects(f_donotdete, (Tuple{Float64},))) + +# Test conditional UndefRefError (#50250) +struct Foo50250 + a::Int + x + Foo50250(a) = new() + Foo50250(a, x) = new(x) +end + +struct Bar50250 + a::Int + x + Bar50250(a) = new(a) + Bar50250(a, x) = new(a, x) +end + +foo50250(b, y) = (b ? Foo50250(y, y) : Foo50250(y)).x +bar50250(b, y) = (b ? Bar50250(y, y) : Bar50250(y)).x + +@test_throws UndefRefError foo50250(true, 1) +@test_throws UndefRefError foo50250(false, 1) +@test bar50250(true, 1) === 1 +@test_throws UndefRefError bar50250(false, 1) + +# Test that Type{typeof(Union{})} doesn't get codegen'ed as a constant (#50293) +baz50293(x::Union{Type, Core.Const}) = Base.issingletontype(x) +bar50293(@nospecialize(u)) = (Base.issingletontype(u.a), baz50293(u.a)) +let u = Union{Type{Union{}}, Type{Any}}, ab = bar50293(u) + @test ab[1] == ab[2] == false +end diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl index 5b465e05f0a12..61ffcc2a59ac6 100644 --- a/test/deprecation_exec.jl +++ b/test/deprecation_exec.jl @@ -8,8 +8,6 @@ using Test using Logging -using Base: remove_linenums! - module DeprecationTests # to test @deprecate f() = true diff --git a/test/docs.jl b/test/docs.jl index 6707278c53847..7f6ece4e76ab4 100644 --- a/test/docs.jl +++ b/test/docs.jl @@ -642,7 +642,7 @@ macro m1_11993() end macro m2_11993() - Symbol("@m1_11993") + esc(Symbol("@m1_11993")) end @doc "This should document @m1... since its the result of expansion" @m2_11993 diff --git a/test/enums.jl b/test/enums.jl index c7e3e3bf2abdb..6eb9360e08a23 100644 --- a/test/enums.jl +++ b/test/enums.jl @@ -179,6 +179,15 @@ end @enum HashEnum2 Enum2_a=1 @test hash(Enum1_a) != hash(Enum2_a) +# PR #49777: Check that `Base.hash` can be specialized by the user without +# overwriting a method definition. +@enum HashEnum3 Enum3_a=1 +@test which(hash, (HashEnum3, UInt)).sig != Tuple{typeof(hash), HashEnum3, UInt64} + +# Check that generic `hash` on custom enum subtypes works. +struct HashEnum4 <: Enum{Int} end +@test hash(HashEnum4(), zero(UInt)) == invoke(hash, Tuple{Any, UInt}, HashEnum4(), zero(UInt)) + @test (Vector{Fruit}(undef, 3) .= apple) == [apple, apple, apple] # long, discongruous diff --git a/test/errorshow.jl b/test/errorshow.jl index 94722b803865f..28ae3fd32365a 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -531,7 +531,7 @@ end ex = :(@nest2b 42) @test _macroexpand1(ex) != macroexpand(M,ex) @test _macroexpand1(_macroexpand1(ex)) == macroexpand(M, ex) - @test (@macroexpand1 @nest2b 42) == _macroexpand1(ex) + @test (@macroexpand1 @nest2b 42) == _macroexpand1(:(@nest2b 42)) end foo_9965(x::Float64; w=false) = x @@ -578,7 +578,7 @@ let end end -@testset "show for manually thrown MethodError" begin +@testset "show for MethodError with world age issue" begin global f21006 f21006() = nothing @@ -620,6 +620,32 @@ end end end +# Issue #50200 +using Base.Experimental: @opaque +@testset "show for MethodError with world age issue (kwarg)" begin + test_no_error(f) = @test f() === nothing + function test_worldage_error(f) + ex = try; f(); error("Should not have been reached") catch ex; ex; end + @test occursin("The applicable method may be too new", sprint(Base.showerror, ex)) + @test !occursin("!Matched::", sprint(Base.showerror, ex)) + end + + global callback50200 + + # First the no-kwargs version + callback50200 = (args...)->nothing + f = @opaque ()->callback50200() + test_no_error(f) + callback50200 = (args...)->nothing + test_worldage_error(f) + + callback50200 = (args...; kwargs...)->nothing + f = @opaque ()->callback50200(;a=1) + test_no_error(f) + callback50200 = (args...; kwargs...)->nothing + test_worldage_error(f) +end + # Custom hints struct HasNoOne end function recommend_oneunit(io, ex, arg_types, kwargs) @@ -957,43 +983,73 @@ end f_internal_wrap(g, a; kw...) = error(); @inline f_internal_wrap(a; kw...) = f_internal_wrap(identity, a; kw...); -bt = try - f_internal_wrap(1) -catch - catch_backtrace() +let bt + @test try + f_internal_wrap(1) + false + catch + bt = catch_backtrace() + true + end + @test !occursin("#f_internal_wrap#", sprint(Base.show_backtrace, bt)) end -@test !occursin("#f_internal_wrap#", sprint(Base.show_backtrace, bt)) g_collapse_pos(x, y=1.0, z=2.0) = error() -bt = try - g_collapse_pos(1.0) -catch - catch_backtrace() +let bt + @test try + g_collapse_pos(1.0) + false + catch + bt = catch_backtrace() + true + end + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("g_collapse_pos(x::Float64, y::Float64, z::Float64)", bt_str) + @test !occursin("g_collapse_pos(x::Float64)", bt_str) end -bt_str = sprint(Base.show_backtrace, bt) -@test occursin("g_collapse_pos(x::Float64, y::Float64, z::Float64)", bt_str) -@test !occursin("g_collapse_pos(x::Float64)", bt_str) g_collapse_kw(x; y=2.0) = error() -bt = try - g_collapse_kw(1.0) -catch - catch_backtrace() +let bt + @test try + g_collapse_kw(1.0) + false + catch + bt = catch_backtrace() + true + end + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("g_collapse_kw(x::Float64; y::Float64)", bt_str) + @test !occursin("g_collapse_kw(x::Float64)", bt_str) end -bt_str = sprint(Base.show_backtrace, bt) -@test occursin("g_collapse_kw(x::Float64; y::Float64)", bt_str) -@test !occursin("g_collapse_kw(x::Float64)", bt_str) g_collapse_pos_kw(x, y=1.0; z=2.0) = error() -bt = try - g_collapse_pos_kw(1.0) -catch - catch_backtrace() -end -bt_str = sprint(Base.show_backtrace, bt) -@test occursin("g_collapse_pos_kw(x::Float64, y::Float64; z::Float64)", bt_str) -@test !occursin("g_collapse_pos_kw(x::Float64, y::Float64)", bt_str) -@test !occursin("g_collapse_pos_kw(x::Float64)", bt_str) +let bt + @test try + g_collapse_pos_kw(1.0) + false + catch + bt = catch_backtrace() + true + end + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("g_collapse_pos_kw(x::Float64, y::Float64; z::Float64)", bt_str) + @test !occursin("g_collapse_pos_kw(x::Float64, y::Float64)", bt_str) + @test !occursin("g_collapse_pos_kw(x::Float64)", bt_str) +end + +simplify_kwargs_type(pos; kws...) = (pos, sum(kws)) +let bt + res = try + simplify_kwargs_type(0; kw1=1.0, kw2="2.0") + false + catch + bt = catch_backtrace() + true + end + @test res + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("simplify_kwargs_type(pos::$Int; kws::@Kwargs{kw1::Float64, kw2::String})", bt_str) +end # Test Base.print_with_compare in convert MethodErrors struct TypeCompareError{A,B} <: Exception end diff --git a/test/fastmath.jl b/test/fastmath.jl index 8755e727db092..21f3ebc1e603f 100644 --- a/test/fastmath.jl +++ b/test/fastmath.jl @@ -284,3 +284,12 @@ end end end end + +@testset "+= with indexing (#47241)" begin + i = 0 + x = zeros(2) + @fastmath x[i += 1] += 1 + @fastmath x[end] += 1 + @test x == [1, 1] + @test i == 1 +end diff --git a/test/gc.jl b/test/gc.jl index ecf71fe51f6ad..e085c1d8658e5 100644 --- a/test/gc.jl +++ b/test/gc.jl @@ -5,10 +5,12 @@ using Test function run_gctest(file) let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file` @testset for test_nthreads in (1, 2, 4) - new_env = copy(ENV) - new_env["JULIA_NUM_THREADS"] = string(test_nthreads) - new_env["JULIA_NUM_GC_THREADS"] = string(test_nthreads) - @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))) + @testset for concurrent_sweep in (0, 1) + new_env = copy(ENV) + new_env["JULIA_NUM_THREADS"] = string(test_nthreads) + new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)" + @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))) + end end end end diff --git a/test/goto.jl b/test/goto.jl index 011ec32a851bd..e069058f38d52 100644 --- a/test/goto.jl +++ b/test/goto.jl @@ -87,7 +87,7 @@ end @test goto_test5_3() -@test Expr(:error, "goto from a try/finally block is not permitted") == +@test Expr(:error, "goto from a try/finally block is not permitted around $(@__FILE__):$(3 + @__LINE__)") == Meta.lower(@__MODULE__, quote function goto_test6() try diff --git a/test/hashing.jl b/test/hashing.jl index 943109924f280..1c7c37d00f93b 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -8,7 +8,8 @@ types = Any[ Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Rational{Int8}, Rational{UInt8}, Rational{Int16}, Rational{UInt16}, - Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64} + Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64}, + BigFloat, BigInt, Rational{BigInt} ] vals = vcat( typemin(Int64), @@ -51,8 +52,7 @@ let collides = 0 collides += eq end end - # each pair of types has one collision for these values - @test collides <= (length(types) - 1)^2 + @test collides <= 516 end @test hash(0.0) != hash(-0.0) @@ -302,3 +302,11 @@ let t1 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}}, @test hash(t1) == hash(t2) @test length(Set{Type}([t1, t2])) == 1 end + +struct AUnionParam{T<:Union{Nothing,Float32,Float64}} end +@test AUnionParam.body.hash == 0 +@test Type{AUnionParam}.hash != 0 +@test Type{AUnionParam{<:Union{Float32,Float64}}}.hash == 0 +@test Type{AUnionParam{<:Union{Nothing,Float32,Float64}}} === Type{AUnionParam} +@test Type{AUnionParam.body}.hash == 0 +@test Type{Base.Broadcast.Broadcasted}.hash != 0 diff --git a/test/intrinsics.jl b/test/intrinsics.jl index aa2a9649857c4..3c49afe2c4d7e 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -107,16 +107,28 @@ end const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T +@testset "elsize(::Type{<:Ptr})" begin + @test Base.elsize(Ptr{Any}) == sizeof(Int) + @test Base.elsize(Ptr{NTuple{3,Int8}}) == 3 + @test Base.elsize(Ptr{Cvoid}) == 0 + @test Base.elsize(Ptr{Base.RefValue{Any}}) == sizeof(Int) + @test Base.elsize(Ptr{Int}) == sizeof(Int) + @test_throws MethodError Base.elsize(Ptr) + @test_throws ErrorException Base.elsize(Ptr{Ref{Int}}) + @test_throws ErrorException Base.elsize(Ptr{Ref}) + @test_throws ErrorException Base.elsize(Ptr{Complex}) +end + # issue #29929 let p = Ptr{Nothing}(0) @test unsafe_store!(p, nothing) === C_NULL @test unsafe_load(p) === nothing - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing - @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p - @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing - @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing) - @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true)) - @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) + @test unsafe_load(p, :sequentially_consistent) === nothing + @test unsafe_store!(p, nothing, :sequentially_consistent) === p + @test unsafe_swap!(p, nothing, :sequentially_consistent) === nothing + @test unsafe_modify!(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing) + @test unsafe_replace!(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true)) + @test unsafe_replace!(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) end struct GhostStruct end @@ -214,54 +226,90 @@ swap(i, j) = j for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Complex{Int512}, Any) r = Ref{TT}(10) GC.@preserve r begin - (function (::Type{TT}) where TT + (@noinline function (::Type{TT}) where TT p = Base.unsafe_convert(Ptr{TT}, r) T(x) = convert(TT, x) S = UInt32 if TT !== Any @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) - @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) - @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(2), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(10), S(3), :sequentially_consistent, :sequentially_consistent) end @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[] if sizeof(r) > 8 - @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) - @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) - @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent) - @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) - @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) - @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent) - @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) + @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") unsafe_load(p, :sequentially_consistent) + @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") unsafe_store!(p, T(1), :sequentially_consistent) + @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") unsafe_swap!(p, T(100), :sequentially_consistent) + @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") unsafe_modify!(p, add, T(1), :sequentially_consistent) + @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") unsafe_modify!(p, swap, S(1), :sequentially_consistent) + @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") unsafe_replace!(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent) + @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[] else - TT !== Any && @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10) - @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1) - @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100) - @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100) - @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101)) - @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102) - @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102) - @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103) + if TT !== Any + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(5)), T(10), :sequentially_consistent) + end + @test unsafe_load(p, :sequentially_consistent) === T(10) + @test unsafe_store!(p, T(1), :sequentially_consistent) === p + @test unsafe_load(p, :sequentially_consistent) === T(1) + @test unsafe_replace!(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true)) + @test unsafe_load(p, :sequentially_consistent) === T(100) + @test unsafe_replace!(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false)) + @test unsafe_load(p, :sequentially_consistent) === T(100) + @test unsafe_modify!(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101)) + @test unsafe_modify!(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102)) + @test unsafe_load(p, :sequentially_consistent) === T(102) + @test unsafe_swap!(p, T(103), :sequentially_consistent) === T(102) + @test unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false)) + @test unsafe_load(p, :sequentially_consistent) === T(103) + @test unsafe_modify!(p, Returns(T(105)), nothing, :sequentially_consistent) === Pair{TT,TT}(T(103), T(105)) + @test unsafe_load(p, :sequentially_consistent) === T(105) end if TT === Any - @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103) - @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p - @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1) - @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false)) - @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2) + @test unsafe_modify!(p, swap, S(105), :sequentially_consistent) === Pair{TT,TT}(T(105), S(105)) + @test unsafe_load(p, :sequentially_consistent) === S(105) + @test unsafe_store!(p, S(1), :sequentially_consistent) === p + @test unsafe_swap!(p, S(100), :sequentially_consistent) === S(1) + @test unsafe_replace!(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false)) + @test unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true)) + @test unsafe_load(p, :sequentially_consistent) === T(2) end end)(TT,) end end +for TT in (Ptr{Nothing}, Ptr) + r = Ref(nothing) + GC.@preserve r begin + p = Ref{TT}(Base.unsafe_convert(Ptr{Nothing}, r)) + (@noinline function (p::Ref) + p = p[] + S = UInt32 + @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, nothing, S(2), :sequentially_consistent, :sequentially_consistent) + @test Core.Intrinsics.pointerref(p, 1, 1) === nothing === r[] + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(1)), nothing, :sequentially_consistent) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerreplace(p, S(1), nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointermodify(p, Returns(nothing), nothing, :sequentially_consistent) === Pair{Nothing,Nothing}(nothing, nothing) + @test Core.Intrinsics.atomic_pointermodify(p, Returns(nothing), S(1), :sequentially_consistent) === Pair{Nothing,Nothing}(nothing, nothing) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerreplace(p, S(100), nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + end)(p,) + end +end + + mutable struct IntWrap <: Signed x::Int end @@ -278,38 +326,38 @@ Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")") @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) - r2 = Core.Intrinsics.pointerref(p, 1, 1) + r2 = unsafe_load(p, 1) @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[] @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[] - @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + @test unsafe_store!(p, T(1), :sequentially_consistent) === p + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 1 === r[].x && r2 !== r[] - r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) + r2, succ = unsafe_replace!(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 1 && r[].x === 100 && r2 !== r[] @test succ - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[] - r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) + r2, succ = unsafe_replace!(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[] @test !succ - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[] - r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) + r2, r3 = unsafe_modify!(p, add, T(1), :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 !== r[].x && r2 !== r[] @test r3 isa IntWrap && r3.x === 101 === r[].x && r3 !== r[] - r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) + r2, r3 = unsafe_modify!(p, add, T(1), :sequentially_consistent) @test r2 isa IntWrap && r2.x === 101 !== r[].x && r2 !== r[] @test r3 isa IntWrap && r3.x === 102 === r[].x && r3 !== r[] - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 102 === r[].x && r2 !== r[] - r2 = Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) + r2 = unsafe_swap!(p, T(103), :sequentially_consistent) @test r2 isa IntWrap && r2.x === 102 !== r[].x && r[].x == 103 && r2 !== r[] - r2, succ = Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) + r2, succ = unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[] @test !succ - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[] end end)() diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl index 8926b962a35c6..07b27fc407433 100644 --- a/test/llvmcall2.jl +++ b/test/llvmcall2.jl @@ -60,3 +60,16 @@ let err = ErrorException("llvmcall only supports intrinsic calls") @test_throws err (@eval ccall("llvm.floor.f64", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0 @test_throws err (@eval ccall("llvm.floor", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0 end + +@testset "JLJIT API" begin + function JLJITGetJuliaOJIT() + ccall(:JLJITGetJuliaOJIT, Ptr{Cvoid}, ()) + end + function JLJITGetTripleString(JIT) + ccall(:JLJITGetTripleString, Cstring, (Ptr{Cvoid},), JIT) + end + jit = JLJITGetJuliaOJIT() + str = JLJITGetTripleString(jit) + jl_str = unsafe_string(str) + @test length(jl_str) > 4 +end diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile index ec0333178c225..7318d1b67da02 100644 --- a/test/llvmpasses/Makefile +++ b/test/llvmpasses/Makefile @@ -9,6 +9,7 @@ TESTS_jl := $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl)) TESTS := $(TESTS_ll) $(TESTS_jl) . $(TESTS): + $(MAKE) -C $(JULIAHOME)/deps install-llvm-tools PATH=$(build_bindir):$(build_depsbindir):$$PATH \ LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \ $(build_depsbindir)/lit/lit.py -v "$(addprefix $(SRCDIR)/,$@)" diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.jl b/test/llvmpasses/alloc-opt-gcframe-addrspaces.jl deleted file mode 100644 index 093c062deca64..0000000000000 --- a/test/llvmpasses/alloc-opt-gcframe-addrspaces.jl +++ /dev/null @@ -1,40 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S - | FileCheck %s -# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S - | FileCheck %s - -isz = sizeof(UInt) == 8 ? "i64" : "i32" - -println(""" -target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" - -@tag = external addrspace(10) global {} - -declare {}*** @julia.ptls_states() -declare {}*** @julia.get_pgcstack() -declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*) -declare {}* @julia.pointer_from_objref({} addrspace(11)*) -""") - -# Test that non-0 addrspace allocas are properly emitted and handled - -# CHECK-LABEL: @non_zero_addrspace -# CHECK: %1 = alloca i32, align 8, addrspace(5) -# CHECK: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)* -# CHECK: %3 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)* -# CHECK: %var1 = addrspacecast {} addrspace(5)* %3 to {} addrspace(10)* -# CHECK: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2) -# CHECK: ret void -println(""" -define void @non_zero_addrspace() { - %pgcstack = call {}*** @julia.get_pgcstack() - %ptls = call {}*** @julia.ptls_states() - %ptls_i8 = bitcast {}*** %ptls to i8* - %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 4, {} addrspace(10)* @tag) - %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)* - %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2) - ret void -} -""") -# CHECK-LABEL: }{{$}} diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll new file mode 100644 index 0000000000000..b33f2cdac7dd4 --- /dev/null +++ b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll @@ -0,0 +1,42 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE + +target triple = "amdgcn-amd-amdhsa" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" + +@tag = external addrspace(10) global {} + +declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() +declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*) +declare {}* @julia.pointer_from_objref({} addrspace(11)*) + +; Test that non-0 addrspace allocas are properly emitted and handled + +; CHECK-LABEL: @non_zero_addrspace +; CHECK: %1 = alloca i32, align 8, addrspace(5) + +; TYPED: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)* +; TYPED: %3 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)* +; TYPED: %var1 = addrspacecast {} addrspace(5)* %3 to {} addrspace(10)* +; TYPED: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2) + +; OPAQUE: %var1 = addrspacecast ptr addrspace(5) %1 to ptr addrspace(10) +; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %1) + +; CHECK: ret void +define void @non_zero_addrspace() { + %pgcstack = call {}*** @julia.get_pgcstack() + %ptls = call {}*** @julia.ptls_states() + %ptls_i8 = bitcast {}*** %ptls to i8* + %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 4, {} addrspace(10)* @tag) + %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)* + %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2) + ret void +} +; CHECK-LABEL: }{{$}} diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.ll similarity index 50% rename from test/llvmpasses/alloc-opt-gcframe.jl rename to test/llvmpasses/alloc-opt-gcframe.ll index e7ddf12d79bc7..a04d6566cec0a 100644 --- a/test/llvmpasses/alloc-opt-gcframe.jl +++ b/test/llvmpasses/alloc-opt-gcframe.ll @@ -1,49 +1,54 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license +; This file is a part of Julia. License is MIT: https://julialang.org/license -# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s -# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED -isz = sizeof(UInt) == 8 ? "i64" : "i32" +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE -println(""" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @tag = external addrspace(10) global {} -""") - -# CHECK-LABEL: @return_obj -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 -# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -# CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -# CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -# CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -# CHECK-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16) -# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 -println(""" + +; CHECK-LABEL: @return_obj +; CHECK-NOT: @julia.gc_alloc_obj + +; TYPED: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16) +; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16) +; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 + define {} addrspace(10)* @return_obj() { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) ret {} addrspace(10)* %v } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @return_load -# CHECK: alloca i64 -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8* -# CHECK-NOT: @tag -# CHECK-NOT: @llvm.lifetime.end -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @return_load +; CHECK: alloca i64 +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8* +; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr +; CHECK-NOT: @tag +; CHECK-NOT: @llvm.lifetime.end define i64 @return_load(i64 %i) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)* store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4 @@ -51,43 +56,46 @@ define i64 @return_load(i64 %i) { %l = load i64, i64 addrspace(11)* %v64a11, align 16, !tbaa !4 ret i64 %l } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @ccall_obj -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK: @ijl_gc_pool_alloc -# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @ccall_obj +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK: @ijl_gc_pool_alloc +; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 +; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 define void @ccall_obj(i8* %fptr) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %f = bitcast i8* %fptr to void ({} addrspace(10)*)* call void %f({} addrspace(10)* %v) ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @ccall_ptr -# CHECK: alloca i64 -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8* -# CHECK: %f = bitcast i8* %fptr to void (i8*)* -# Currently the GC frame lowering pass strips away all operand bundles -# CHECK-NEXT: call void %f(i8* -# CHECK-NEXT: ret void -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @ccall_ptr +; CHECK: alloca i64 +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8* +; TYPED: %f = bitcast i8* %fptr to void (i8*)* + +; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr +; OPAQUE: %f = bitcast ptr %fptr to ptr +; Currently the GC frame lowering pass strips away all operand bundles +; TYPED-NEXT: call void %f(i8* +; OPAQUE-NEXT: call void %f(ptr +; CHECK-NEXT: ret void define void @ccall_ptr(i8* %fptr) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va) %ptr = bitcast {}* %ptrj to i8* @@ -95,20 +103,20 @@ define void @ccall_ptr(i8* %fptr) { call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v), "unknown_bundle"(i8* %ptr) ] ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @ccall_unknown_bundle -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK: @ijl_gc_pool_alloc -# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @ccall_unknown_bundle +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK: @ijl_gc_pool_alloc +; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 +; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 define void @ccall_unknown_bundle(i8* %fptr) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va) %ptr = bitcast {}* %ptrj to i8* @@ -116,26 +124,32 @@ define void @ccall_unknown_bundle(i8* %fptr) { call void %f(i8* %ptr) [ "jl_not_jl_roots"({} addrspace(10)* %v) ] ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @lifetime_branches -# CHECK: alloca i64 -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK: L1: -# CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8, -# CHECK: %f = bitcast i8* %fptr to void (i8*)* -# CHECK-NEXT: call void %f(i8* -# CHECK-NEXT: br i1 %b2, label %L2, label %L3 - -# CHECK: L2: -# CHECK-NEXT: %f2 = bitcast i8* %fptr to void ({}*)* -# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, -# CHECK-NEXT: call void %f2({}* null) - -# CHECK: L3: -# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @lifetime_branches +; CHECK: alloca i64 +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK: L1: +; CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8, + +; TYPED: %f = bitcast i8* %fptr to void (i8*)* +; TYPED-NEXT: call void %f(i8* + +; OPAQUE: %f = bitcast ptr %fptr to ptr +; OPAQUE-NEXT: call void %f(ptr + +; CHECK-NEXT: br i1 %b2, label %L2, label %L3 + +; CHECK: L2: +; TYPED-NEXT: %f2 = bitcast i8* %fptr to void ({}*)* +; OPAQUE-NEXT: %f2 = bitcast ptr %fptr to ptr +; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, +; TYPED-NEXT: call void %f2({}* null) +; OPAQUE-NEXT: call void %f2(ptr null) + +; CHECK: L3: +; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** @@ -143,7 +157,7 @@ define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) { br i1 %b, label %L1, label %L3 L1: - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va) %ptr = bitcast {}* %ptrj to i8* @@ -159,62 +173,60 @@ L2: L3: ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @object_field -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @object_field +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4 define void @object_field({} addrspace(10)* %field) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)* store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8 ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @memcpy_opt -# CHECK: alloca [16 x i8], align 16 -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @memcpy_opt +; CHECK: alloca [16 x i8], align 16 +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; TYPED: call void @llvm.memcpy.p0i8.p0i8.i64 +; OPAQUE: call void @llvm.memcpy.p0.p0.i64 define void @memcpy_opt(i8* %v22) { top: %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag) + %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10)* @tag) %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)* %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)* call void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* %v21, i8* %v22, i64 16, i32 8, i1 false) ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @preserve_opt -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK-NOT: @llvm.lifetime.end -# CHECK: @external_function -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @preserve_opt +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; CHECK-NOT: @llvm.lifetime.end +; CHECK: @external_function define void @preserve_opt(i8* %v22) { top: %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag) + %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10)* @tag) %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)* %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)* %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v19) @@ -223,21 +235,20 @@ top: call void @external_function() ret void } -""") -# CHECK-LABEL: }{{$}} +; CHECK-LABEL: }{{$}} -# CHECK-LABEL: @preserve_branches -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK: L1: -# CHECK-NEXT: @external_function() -# CHECK-NEXT: br i1 %b2, label %L2, label %L3 +; CHECK-LABEL: @preserve_branches +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK: L1: +; CHECK-NEXT: @external_function() +; CHECK-NEXT: br i1 %b2, label %L2, label %L3 -# CHECK: L2: -# CHECK: @external_function() -# CHECK-NEXT: br label %L3 +; CHECK: L2: +; CHECK: @external_function() +; CHECK-NEXT: br label %L3 -# CHECK: L3: -println(""" +; CHECK: L3: define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** @@ -245,7 +256,7 @@ define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) { br i1 %b, label %L1, label %L3 L1: - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v) call void @external_function() br i1 %b2, label %L2, label %L3 @@ -257,15 +268,16 @@ L2: L3: ret void } -""") -# CHECK-LABEL: }{{$}} +; CHECK-LABEL: }{{$}} + +; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, +; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, -# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, -# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, -println(""" +; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc(ptr, +; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc(ptr, declare void @external_function() declare {}*** @julia.get_pgcstack() -declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, $isz, {} addrspace(10)*) +declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) declare {}* @julia.pointer_from_objref({} addrspace(11)*) declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) declare token @llvm.julia.gc_preserve_begin(...) @@ -278,4 +290,3 @@ declare void @llvm.julia.gc_preserve_end(token) !4 = !{!5, !5, i64 0} !5 = !{!"jtbaa_mutab", !6, i64 0} !6 = !{!"jtbaa_value", !2, i64 0} -""") diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index 4ce152669246f..b7e0647263caa 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -1,14 +1,18 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {} ; Test that the gc_preserve intrinsics are deleted directly. ; CHECK-LABEL: @preserve_branches -; CHECK: call {}*** @julia.ptls_states() +; TYPED: call {}*** @julia.ptls_states() +; OPAQUE: call ptr @julia.ptls_states() ; CHECK: L1: ; CHECK-NOT: @llvm.julia.gc_preserve_begin ; CHECK-NEXT: @external_function() @@ -41,9 +45,11 @@ L3: ; CHECK-LABEL: }{{$}} ; CHECK-LABEL: @preserve_branches2 -; CHECK: call {}*** @julia.ptls_states() +; TYPED: call {}*** @julia.ptls_states() +; OPAQUE: call ptr @julia.ptls_states() ; CHECK: L1: -; CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2 +; TYPED-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2 +; OPAQUE-NEXT: @llvm.julia.gc_preserve_begin{{.*}}ptr addrspace(10) %v2 ; CHECK-NEXT: @external_function() ; CHECK-NEXT: br i1 %b2, label %L2, label %L3 @@ -97,16 +103,21 @@ declare {}*** @julia.get_pgcstack() declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*) declare {}* @julia.pointer_from_objref({} addrspace(11)*) declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) declare token @llvm.julia.gc_preserve_begin(...) declare void @llvm.julia.gc_preserve_end(token) ; CHECK-LABEL: @memref_collision -; CHECK: call {}*** @julia.ptls_states() -; CHECK-NOT: store {} +; TYPED: call {}*** @julia.ptls_states() +; OPAQUE: call ptr @julia.ptls_states() +; TYPED-NOT: store {} +; OPAQUE-NOT: store ptr ; CHECK: store i -; CHECK-NOT: store {} +; TYPED-NOT: store {} +; OPAQUE-NOT: store ptr ; CHECK: L1: -; CHECK: load {} +; TYPED: load {} +; OPAQUE: load ptr ; CHECK: L2: ; CHECK: load i define void @memref_collision(i64 %x) { @@ -129,3 +140,25 @@ L2: ret void } ; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @lifetime_no_preserve_end +; CHECK: alloca +; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin +; CHECK: call void @llvm.lifetime.start +; CHECK-NOT: call void @llvm.lifetime.end +define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret({}) %0) { + %pgcstack = call {}*** @julia.get_pgcstack() + %ptls = call {}*** @julia.ptls_states() + %ptls_i8 = bitcast {}*** %ptls to i8* + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag) + %token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v) + %v_derived = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* + %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %v_derived) + %ptr_raw = bitcast {}* %ptr to i8* + call void @external_function() ; safepoint + %ret_raw = bitcast {}* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %ret_raw, i8 * align 8 %ptr_raw, i64 0, i1 false) + %ret_raw2 = bitcast {}* %0 to i8* + ret void +} +; CHECK-LABEL: }{{$}} diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll index 8a21091ce558c..d3868548a00d7 100644 --- a/test/llvmpasses/alloc-opt-unsized.ll +++ b/test/llvmpasses/alloc-opt-unsized.ll @@ -1,6 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE source_filename = "text" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" @@ -27,11 +31,16 @@ entry: ret void } -; CHECK: %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16 -; CHECK: %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8* -; CHECK: %i18 = bitcast i8* %[[i1]] to {}* -; CHECK: %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)** -; CHECK: %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i -; CHECK: store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8 -; CHECK: %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}* -; CHECK: %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8 +; TYPED: %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16 +; TYPED: %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8* +; TYPED: %i18 = bitcast i8* %[[i1]] to {}* +; TYPED: %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)** +; TYPED: %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i +; TYPED: store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8 +; TYPED: %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}* +; TYPED: %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8 + +; OPAQUE: %[[i0:.+]] = alloca ptr addrspace(10), i64 1000, align 16 +; OPAQUE: %i23 = getelementptr inbounds ptr addrspace(10), ptr %i18, i64 %iv.i +; OPAQUE: store ptr addrspace(10) %arg, ptr %i23, align 8 +; OPAQUE: %l = load ptr addrspace(10), ptr %i23, align 8 diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll index 1a04db5749b39..eea3d1b288204 100644 --- a/test/llvmpasses/cpu-features.ll +++ b/test/llvmpasses/cpu-features.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s declare i1 @julia.cpu.have_fma.f64() declare double @with_fma(double %0, double %1, double %2) diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl index 76b048c19a2a0..7338d1c3ccc5a 100644 --- a/test/llvmpasses/fastmath.jl +++ b/test/llvmpasses/fastmath.jl @@ -14,7 +14,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl")) import Base.FastMath -# CHECK: call fast float @llvm.sqrt.f32(float %0) +# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}}) emit(FastMath.sqrt_fast, Float32) diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll index 61e9e33875078..d3cdea7454972 100644 --- a/test/llvmpasses/final-lower-gc-addrspaces.ll +++ b/test/llvmpasses/final-lower-gc-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" @@ -24,9 +27,11 @@ attributes #0 = { allocsize(1) } define void @gc_frame_addrspace(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_addrspace -; CHECK: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5) -; CHECK: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)** -; CHECK: %1 = bitcast {} addrspace(10)** %gcframe to i8* +; TYPED: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5) +; OPAQUE: %0 = alloca ptr addrspace(10), i32 4, align 16, addrspace(5) +; TYPED: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)** +; OPAQUE: %gcframe = addrspacecast ptr addrspace(5) %0 to ptr +; TYPED: %1 = bitcast {} addrspace(10)** %gcframe to i8* %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) %pgcstack = call {}*** @julia.get_pgcstack() call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll index 6f1be3d240ae4..5bbaa2f4d81ea 100644 --- a/test/llvmpasses/final-lower-gc.ll +++ b/test/llvmpasses/final-lower-gc.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {} @@ -22,34 +25,50 @@ attributes #0 = { allocsize(1) } define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack() +; TYPED: [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack() +; OPAQUE: [[GCFRAME_SLOT:%.*]] = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0 -; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* -; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0 -; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 -; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}*** -; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8 -; CHECK-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0 -; CHECK-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)*** -; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8 +; TYPED-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0 +; TYPED-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* +; TYPED-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0 +; TYPED-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 +; TYPED-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}*** +; TYPED-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8 +; TYPED-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0 +; TYPED-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)*** +; TYPED-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8 + +; OPAQUE-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 0 +; OPAQUE-DAG: store i64 8, ptr [[GCFRAME_SIZE_PTR]], align 8, !tbaa !0 +; OPAQUE-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1 +; OPAQUE-DAG: [[PREV_GCFRAME:%.*]] = load ptr, ptr [[GCFRAME_SLOT]], align 8 +; OPAQUE-DAG: store ptr [[PREV_GCFRAME]], ptr [[PREV_GCFRAME_PTR]], align 8, !tbaa !0 +; OPAQUE-NEXT: store ptr %gcframe, ptr [[GCFRAME_SLOT]], align 8 call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a) -; CHECK: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3 +; TYPED: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3 +; OPAQUE: %frame_slot_1 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 3 %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8 %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b) -; CHECK: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; OPAQUE: %frame_slot_2 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8 -; CHECK: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) +; TYPED: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) +; OPAQUE: call void @boxed_simple(ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed) call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 -; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 -; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)** -; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0 +; TYPED-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 +; TYPED-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 +; TYPED-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)** +; TYPED-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0 + +; OPAQUE-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1 +; OPAQUE-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load ptr addrspace(10), ptr [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 +; OPAQUE-NEXT: store ptr addrspace(10) [[PREV_GCFRAME_PTR4]], ptr [[GCFRAME_SLOT]], align 8, !tbaa !0 call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) ; CHECK-NEXT: ret void ret void @@ -61,7 +80,8 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* -; CHECK: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc +; TYPED: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc +; OPAQUE: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8) %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1 @@ -76,7 +96,8 @@ top: %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* ; CHECK: %0 = add i64 %size, 8 -; CHECK: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null) +; TYPED: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null) +; OPAQUE: %v = call noalias nonnull dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %0, ptr null) %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size) %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1 diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll index 668c6ff3dd261..ab1425ec12fa5 100644 --- a/test/llvmpasses/float16.ll +++ b/test/llvmpasses/float16.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s define half @demotehalf_test(half %a, half %b) #0 { top: diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll index eefd847bf68fa..7d29a9e3b1f9e 100644 --- a/test/llvmpasses/gcroots.ll +++ b/test/llvmpasses/gcroots.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) @@ -16,17 +19,28 @@ top: ; CHECK-LABEL: @simple %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 -; CHECK: call {} addrspace(10)* @jl_box_int64 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 +; TYPED: call {} addrspace(10)* @jl_box_int64 +; OPAQUE: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; CHECK-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] -; CHECK: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]] -; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; TYPED-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] +; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]] +; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; OPAQUE-NOT: getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]] +; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]] +; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] + ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) @@ -37,7 +51,8 @@ define void @leftover_alloca({} addrspace(10)* %a) { ; If this pass encounters an alloca, it'll just sink it into the gcframe, ; relying on mem2reg to catch simple cases such as this earlier ; CHECK-LABEL: @leftover_alloca -; CHECK: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe +; TYPED: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe +; OPAQUE: %var = getelementptr inbounds ptr addrspace(10), ptr %gcframe %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %var = alloca {} addrspace(10)* @@ -55,11 +70,16 @@ define void @simple_union() { ; CHECK-LABEL: @simple_union %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() -; CHECK: %a = call { {} addrspace(10)*, i8 } @union_ret() +; TYPED: %a = call { {} addrspace(10)*, i8 } @union_ret() +; OPAQUE: %a = call { ptr addrspace(10), i8 } @union_ret() %a = call { {} addrspace(10)*, i8 } @union_ret() -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0 -; CHECK-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0 +; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE-NEXT: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %a, 0 +; OPAQUE-NEXT: store ptr addrspace(10) [[EXTRACT]], ptr [[GEP0]] call void @union_arg({{} addrspace(10)*, i8} %a) ret void } @@ -81,7 +101,8 @@ define void @select_simple(i64 %a, i64 %b) { define void @phi_simple(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_simple -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b @@ -94,8 +115,11 @@ blabel: br label %common common: %phi = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] -; CHECK: [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]] +; TYPED: [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]] + +; OPAQUE: [[GEP:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %phi, ptr [[GEP]] call void @one_arg_boxed({} addrspace(10)* %phi) ret void } @@ -104,7 +128,8 @@ declare void @one_arg_decayed(i64 addrspace(12)*) define void @select_lift(i64 %a, i64 %b) { ; CHECK-LABEL: @select_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) @@ -112,7 +137,8 @@ define void @select_lift(i64 %a, i64 %b) { %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)* %cmp = icmp eq i64 %a, %b -; CHECK: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed +; TYPED: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed +; OPAQUE: %gclift = select i1 %cmp, ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed call void @one_arg_decayed(i64 addrspace(12)* %selectb) ret void @@ -121,7 +147,8 @@ define void @select_lift(i64 %a, i64 %b) { define void @phi_lift(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_lift -; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] +; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] +; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b @@ -150,7 +177,8 @@ top: br i1 %cmp, label %alabel, label %blabel alabel: %u = call { {} addrspace(10)*, i8 } @union_ret() -; CHECK: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0 +; TYPED: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0 +; OPAQUE: %aboxed = extractvalue { ptr addrspace(10), i8 } %u, 0 %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0 %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* ; CHECK: br label %common @@ -160,7 +188,8 @@ blabel: %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)* br label %common common: -; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] +; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] +; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ] %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ] call void @one_arg_decayed(i64 addrspace(12)* %phi) ret void @@ -169,7 +198,8 @@ common: define void @live_if_live_out(i64 %a, i64 %b) { ; CHECK-LABEL: @live_if_live_out top: -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() ; The failure case is failing to realize that `aboxed` is live across the first @@ -187,25 +217,34 @@ succ: ; safepoint define {} addrspace(10)* @ret_use(i64 %a, i64 %b) { ; CHECK-LABEL: @ret_use -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ret {} addrspace(10)* %aboxed } define {{} addrspace(10)*, i8} @ret_use_struct() { ; CHECK-LABEL: @ret_use_struct -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() -; CHECK: %aunion = call { {} addrspace(10)*, i8 } @union_ret() +; TYPED: %aunion = call { {} addrspace(10)*, i8 } @union_ret() +; OPAQUE: %aunion = call { ptr addrspace(10), i8 } @union_ret() %aunion = call { {} addrspace(10)*, i8 } @union_ret() -; CHECK-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0 -; CHECK-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] +; TYPED-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0 +; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] + +; OPAQUE-DAG: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE-DAG: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %aunion, 0 +; OPAQUE-NEXT: store ptr addrspace(10) [[EXTRACT]], ptr [[GEP0]] + ; CHECK-NEXT: call void @jl_safepoint() call void @jl_safepoint() ret {{} addrspace(10)*, i8} %aunion @@ -234,23 +273,27 @@ top: define void @global_ref() { ; CHECK-LABEL: @global_ref -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1) -; CHECK: store {} addrspace(10)* %loaded, {} addrspace(10)** +; TYPED: store {} addrspace(10)* %loaded, {} addrspace(10)** +; OPAQUE: store ptr addrspace(10) %loaded, ptr call void @one_arg_boxed({} addrspace(10)* %loaded) ret void } define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) { ; CHECK-LABEL: @no_redundant_rerooting -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed ; CHECK-NEXT: call void @jl_safepoint() call void @jl_safepoint() br i1 %cond, label %blocka, label %blockb @@ -270,12 +313,14 @@ declare void @llvm.memcpy.p064.p10i8.i64(i64*, i8 addrspace(10)*, i64, i32, i1) define void @memcpy_use(i64 %a, i64 *%aptr) { ; CHECK-LABEL: @memcpy_use -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed call void @jl_safepoint() %acast = bitcast {} addrspace(10)* %aboxed to i8 addrspace(10)* call void @llvm.memcpy.p064.p10i8.i64(i64* %aptr, i8 addrspace(10)* %acast, i64 8, i32 1, i1 false) @@ -287,20 +332,24 @@ declare void @llvm.julia.gc_preserve_end(token) define void @gc_preserve(i64 %a) { ; CHECK-LABEL: @gc_preserve -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed call void @jl_safepoint() %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %aboxed) %aboxed2 = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed2 +; TYPED: store {} addrspace(10)* %aboxed2 +; OPAQUE: store ptr addrspace(10) %aboxed2 call void @jl_safepoint() call void @llvm.julia.gc_preserve_end(token %tok) %aboxed3 = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed3 +; TYPED: store {} addrspace(10)* %aboxed3 +; OPAQUE: store ptr addrspace(10) %aboxed3 call void @jl_safepoint() call void @one_arg_boxed({} addrspace(10)* %aboxed2) call void @one_arg_boxed({} addrspace(10)* %aboxed3) @@ -309,23 +358,37 @@ top: define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapture nonnull readonly dereferenceable(16)) { ; CHECK-LABEL: @gc_preserve_vec -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 6 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 6 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 6 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8 -; CHECK-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 -; CHECK-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 -; CHECK-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 -; CHECK-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 -; CHECK-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0 -; CHECK-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1 -; CHECK-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0 -; CHECK-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1 -; CHECK-DAG: store {} addrspace(10)* [[V11]] -; CHECK-DAG: store {} addrspace(10)* [[V12]] -; CHECK-DAG: store {} addrspace(10)* [[V21]] -; CHECK-DAG: store {} addrspace(10)* [[V22]] +; TYPED-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 +; TYPED-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 +; TYPED-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 +; TYPED-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 +; TYPED-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0 +; TYPED-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1 +; TYPED-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0 +; TYPED-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1 +; TYPED-DAG: store {} addrspace(10)* [[V11]] +; TYPED-DAG: store {} addrspace(10)* [[V12]] +; TYPED-DAG: store {} addrspace(10)* [[V21]] +; TYPED-DAG: store {} addrspace(10)* [[V22]] + +; OPAQUE-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0 +; OPAQUE-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0 +; OPAQUE-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 1 +; OPAQUE-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 1 +; OPAQUE-DAG: [[V11:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT11]], i32 0 +; OPAQUE-DAG: [[V12:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT12]], i32 1 +; OPAQUE-DAG: [[V21:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT21]], i32 0 +; OPAQUE-DAG: [[V22:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT22]], i32 1 +; OPAQUE-DAG: store ptr addrspace(10) [[V11]] +; OPAQUE-DAG: store ptr addrspace(10) [[V12]] +; OPAQUE-DAG: store ptr addrspace(10) [[V21]] +; OPAQUE-DAG: store ptr addrspace(10) [[V22]] %tok = call token (...) @llvm.julia.gc_preserve_begin([2 x <2 x {} addrspace(10)*>] %v, i64 addrspace(10)* null, {}*** %ptls) call void @jl_safepoint() ret void @@ -365,7 +428,8 @@ declare {} addrspace(10) *@alloc() define {} addrspace(10)* @vec_loadobj() { ; CHECK-LABEL: @vec_loadobj -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() @@ -379,7 +443,8 @@ define {} addrspace(10)* @vec_loadobj() { define {} addrspace(10)* @vec_gep() { ; CHECK-LABEL: @vec_gep -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() @@ -394,7 +459,8 @@ define {} addrspace(10)* @vec_gep() { declare i1 @check_property({} addrspace(10)* %val) define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) { ; CHECK-LABEL: @loopyness -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -407,15 +473,21 @@ header: a: ; This needs a store ; CHECK-LABEL: a: -; CHECK: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]] +; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]] + +; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE: store ptr addrspace(10) %phi, ptr [[GEP1]] call void @one_arg_boxed({} addrspace(10)* %phi) br label %latch latch: ; This as well in case we went the other path -; CHECK: [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] -; CHECK: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]] +; TYPED: [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] +; TYPED: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]] + +; OPAQUE: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]] +; OPAQUE: store ptr addrspace(10) %phi, ptr [[GEP2]] %obj = call {} addrspace(10)* @alloc() %cond = call i1 @check_property({} addrspace(10)* %phi) br i1 %cond, label %exit, label %header @@ -426,7 +498,8 @@ exit: define {} addrspace(10)* @phi_union(i1 %cond) { ; CHECK-LABEL: @phi_union -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -451,7 +524,8 @@ join: define {} addrspace(10)* @select_union(i1 %cond) { ; CHECK-LABEL: @select_union -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -467,7 +541,8 @@ top: define i8 @simple_arrayptr() { ; CHECK-LABEL: @simple_arrayptr -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -484,7 +559,8 @@ top: define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecstoreload -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -498,7 +574,8 @@ top: define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecphi -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -524,7 +601,8 @@ common: define i8 @phi_arrayptr(i1 %cond) { ; CHECK-LABEL: @phi_arrayptr -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -563,7 +641,8 @@ common: define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecselect -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -580,13 +659,15 @@ top: define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*> call void @jl_safepoint() -; CHECK: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}} %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -598,13 +679,15 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) { define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecvecselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*> call void @jl_safepoint() -; CHECK: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}} %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -616,14 +699,16 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) { define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) { ; CHECK-LABEL: @vecscalarselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* %avec = getelementptr i64, i64 addrspace(12)* %adecayed, <2 x i32> zeroinitializer call void @jl_safepoint() -; CHECK: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %aboxed %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -635,14 +720,16 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) { define void @scalarvecselect_lift(i1 %cond, i64 %a) { ; CHECK-LABEL: @scalarvecselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* %avec = getelementptr i64, i64 addrspace(12)* %adecayed, <2 x i32> zeroinitializer call void @jl_safepoint() -; CHECK: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %aboxed %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -654,7 +741,8 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) { define i8 @select_arrayptr(i1 %cond) { ; CHECK-LABEL: @select_arrayptr -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -681,9 +769,14 @@ top: define i8 @vector_arrayptrs() { ; CHECK-LABEL: @vector_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -702,10 +795,16 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8 (<2 x i8 ad define i8 @masked_arrayptrs() { ; CHECK-LABEL: @masked_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.load.v2p13.p11(ptr addrspace(11) %arrayptrptr, i32 16, <2 x i1> , <2 x ptr addrspace(13)> zeroinitializer) +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -724,10 +823,16 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8 (<2 x i8 define i8 @gather_arrayptrs() { ; CHECK-LABEL: @gather_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> , <2 x ptr addrspace(13)> zeroinitializer) +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -745,10 +850,16 @@ top: define i8 @gather_arrayptrs_alltrue() { ; CHECK-LABEL: @gather_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> , <2 x ptr addrspace(13)> zeroinitializer) +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -766,9 +877,14 @@ top: define i8 @lost_select_decayed(i1 %arg1) { ; CHECK-LABEL: @lost_select_decayed -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) [[SOMETHING:%.*]], ptr [[GEP0]] top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() diff --git a/test/llvmpasses/image-codegen.jl b/test/llvmpasses/image-codegen.jl new file mode 100644 index 0000000000000..8132dc4faa22a --- /dev/null +++ b/test/llvmpasses/image-codegen.jl @@ -0,0 +1,22 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--print-before=loop-vectorize --print-module-scope" +# RUN: rm -rf %t +# RUN: mkdir %t +# RUN: julia --image-codegen --startup-file=no %s 2> %t/output.txt +# RUN: FileCheck %s < %t/output.txt + +# COM: checks that global variables compiled in imaging codegen +# COM: are marked as external and not internal +# COM: Also makes sure that --imaging-codegen doesn't crash + +# CHECK: *** IR Dump Before +# CHECK-NOT: internal global +# CHECK-NOT: private global +# CHECK: jl_global +# CHECK-SAME: = global +# CHECK: julia_f_ +# CHECK-NOT: internal global +# CHECK-NOT: private global + +f() = "abcd" +f() diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll index 250ad620b05e6..464a96f1413d9 100644 --- a/test/llvmpasses/julia-licm-fail.ll +++ b/test/llvmpasses/julia-licm-fail.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE ; COM: This file contains functions that should not trigger allocations to be hoisted out of loops @@ -22,9 +25,11 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc) +; TYPED-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc) +; OPAQUE-NEXT: %ignore = call ptr addrspace(10) @escape(ptr addrspace(10) %alloc) %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc) br i1 %ret, label %return, label %loop return: @@ -46,11 +51,14 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; TYPED-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; OPAQUE-NEXT: %cast = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11) %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* -; CHECK-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast) +; TYPED-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast) +; OPAQUE-NEXT: %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %cast) %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast) br i1 %ret, label %return, label %loop return: diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll new file mode 100644 index 0000000000000..e1684c7577578 --- /dev/null +++ b/test/llvmpasses/julia-licm-memoryssa.ll @@ -0,0 +1,171 @@ +; COM: NewPM-only test, tests that memoryssa is preserved correctly + +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE + +@tag = external addrspace(10) global {}, align 16 + +declare void @julia.write_barrier({} addrspace(10)*, ...) + +declare {}*** @julia.get_pgcstack() + +declare token @llvm.julia.gc_preserve_begin(...) + +declare void @llvm.julia.gc_preserve_end(token) + +declare void @mssa_use({} addrspace(10)*) + +declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) + +; COM: check basic preserve hoist/sink functionality +; CHECK-LABEL: MemorySSA for function: hoist_sink_preserves +; CHECK-LABEL: @hoist_sink_preserves +define void @hoist_sink_preserves({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[PRESERVE_TOKEN:[0-9]+]] = MemoryDef([[PGCSTACK]]) +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[PRESERVE_TOKEN]]},{loop,[[MPHI]]}) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[PRESERVE_END:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} + +; COM: check sink functionality when there are multiple loop exit blocks +; CHECK-LABEL: MemorySSA for function: hoist_multisink_preserves +; CHECK-LABEL: @hoist_multisink_preserves +define void @hoist_multisink_preserves({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[PRESERVE_TOKEN:[0-9]+]] = MemoryDef([[PGCSTACK]]) +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[PRESERVE_TOKEN]]},{loop2,[[MPHI]]}) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop2 +; CHECK: loop2: +loop2: +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return2, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[PRESERVE_END_1:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END_1]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +; CHECK: return2: +return2: +; CHECK-NEXT: [[PRESERVE_END_2:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END_2]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} + +define void @hoist_allocation({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[ALLOC:[0-9]+]] = MemoryDef([[PGCSTACK]]) + +; TYPED-NEXT: %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag) +; TYPED-NEXT: %[[BCAST:.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)* + +; OPAQUE-NEXT: %alloc = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %current_task, i64 0, ptr addrspace(10) @tag) + +; CHECK-NEXT: [[MSET:[0-9]+]] = MemoryDef([[ALLOC]]) +; CHECK-NEXT: call void @llvm.memset +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: %alloc +; CHECK-NOT: @julia.gc_alloc_obj + %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[MSET]]},{loop,[[MPHI]]}) + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} + +define void @hoist_write_barrier({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[WB:[0-9]+]] = MemoryDef([[PGCSTACK]]) +; CHECK-NEXT: call void +; CHECK-SAME: @julia.write_barrier +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: @julia.write_barrier + call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %obj) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[WB]]},{loop,[[MPHI]]}) + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll index 977b8e2a787f9..941b2d072a1cc 100644 --- a/test/llvmpasses/julia-licm-missed.ll +++ b/test/llvmpasses/julia-licm-missed.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE ; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops ; COM: i.e. they are missed optimizations @@ -26,13 +29,17 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; TYPED-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; OPAQUE-NEXT: %derived = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11) %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* -; CHECK-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)* +; TYPED-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)* +; OPAQUE-NEXT: %ptr = bitcast ptr addrspace(11) %derived to ptr addrspace(11) %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)* -; CHECK-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8 +; TYPED-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8 +; OPAQUE-NEXT: store ptr addrspace(10) %obj, ptr addrspace(11) %ptr, align 8 store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8 br i1 %ret, label %return, label %loop return: @@ -56,12 +63,14 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) br label %other ; CHECK: other: other: -; CHECK-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ] +; TYPED-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ] +; OPAQUE-NEXT: %phi = phi ptr addrspace(10) [ %alloc, %loop ] %phi = phi {} addrspace(10)* [ %alloc, %loop ] br i1 %ret, label %return, label %loop return: diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll index 6fc6f85de7c26..8bedc5db75d96 100644 --- a/test/llvmpasses/julia-licm.ll +++ b/test/llvmpasses/julia-licm.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {}, align 16 @@ -9,6 +12,75 @@ declare void @julia.write_barrier({}*, ...) declare {}*** @julia.get_pgcstack() +declare token @llvm.julia.gc_preserve_begin(...) + +declare void @llvm.julia.gc_preserve_end(token) + +; COM: check basic preserve hoist/sink functionality +; CHECK-LABEL: @hoist_sink_preserves +define void @hoist_sink_preserves({} addrspace(10)* %obj, i1 %ret) { +top: + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: ret void + ret void +} + +; COM: check sink functionality when there are multiple loop exit blocks +; CHECK-LABEL: @hoist_multisink_preserves +define void @hoist_multisink_preserves({} addrspace(10)* %obj, i1 %ret) { +top: + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop2 +; CHECK: loop2: +loop2: +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return2, label %loop +; CHECK: return: +return: +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: ret void + ret void +; CHECK: return2: +return2: +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: ret void + ret void +} + ; COM: check basic allocation hoisting functionality ; CHECK-LABEL: @julia_allocation_hoist define nonnull {} addrspace(10)* @julia_allocation_hoist(i64 signext %0) #0 { @@ -28,16 +100,23 @@ L3: ; preds = %L3.loopexit, %top L4: ; preds = %top %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12 %current_task1 = bitcast {}*** %current_task112 to {}** - ; CHECK: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) - ; CHECK-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)* - ; CHECK-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false) + ; TYPED: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) + ; TYPED-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)* + ; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false) + + ; OPAQUE: %3 = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task1, i64 8, ptr addrspace(10) @tag) + ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %3, i8 0, i64 8, i1 false) + ; CHECK-NEXT: br label %L22 br label %L22 L22: ; preds = %L4, %L22 %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ] - ; CHECK: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ] - ; CHECK-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)* + ; TYPED: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ] + ; TYPED-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)* + + ; OPAQUE: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ] + ; OPAQUE-NEXT %4 = bitcast ptr addrspace(10) %3 to ptr addrspace(10) %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) #1 %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)* store i64 %value_phi5, i64 addrspace(10)* %4, align 8, !tbaa !2 @@ -56,9 +135,13 @@ top: br label %preheader ; CHECK: preheader: preheader: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)* -; CHECK-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)* +; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false) + +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) +; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %alloc, i8 0, i64 8, i1 false) + ; CHECK-NEXT: br label %loop br label %loop loop: diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll index 84a6da9f2554d..9849f432fb9a7 100644 --- a/test/llvmpasses/late-lower-gc-addrspaces.ll +++ b/test/llvmpasses/late-lower-gc-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" @@ -19,24 +22,39 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*) define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %pgcstack = call {}*** @julia.get_pgcstack() +; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) +; TYPED: %pgcstack = call {}*** @julia.get_pgcstack() + +; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; OPAQUE: %pgcstack = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) -; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64 +; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) +; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64 + +; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) +; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; CHECK-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) -; CHECK: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] +; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) +; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) +; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] + ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; CHECK-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) ret void } @@ -46,17 +64,25 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: ret {} addrspace(10)* %v +; TYPED-NEXT: ret {} addrspace(10)* %v +; OPAQUE-NEXT: ret ptr addrspace(10) %v ret {} addrspace(10)* %v } @@ -71,23 +97,34 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* -; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1 -; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2 -; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4 ; CHECK-NEXT: ret void ret void diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index 98c472771aaf9..36e581993c176 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s -check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {}, align 16 @@ -16,24 +19,39 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*) define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %pgcstack = call {}*** @julia.get_pgcstack() +; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) +; TYPED: %pgcstack = call {}*** @julia.get_pgcstack() + +; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; OPAQUE: %pgcstack = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) -; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64 +; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) +; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64 + +; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) +; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; CHECK-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) -; CHECK: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] +; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) +; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) +; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] + ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; CHECK-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) ret void } @@ -43,17 +61,25 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: ret {} addrspace(10)* %v +; TYPED-NEXT: ret {} addrspace(10)* %v +; OPAQUE-NEXT: ret ptr addrspace(10) %v ret {} addrspace(10)* %v } @@ -68,23 +94,34 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* -; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1 -; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2 -; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4 ; CHECK-NEXT: ret void ret void @@ -161,13 +198,21 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) { } ; CHECK-LABEL: @decayar -; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) -; CHECK: store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8 -; CHECK: %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) -; CHECK: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8 -; CHECK: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) -; CHECK: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) +; TYPED: %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) +; TYPED: store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8 +; TYPED: %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) +; TYPED: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8 +; TYPED: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) +; TYPED: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) + +; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; OPAQUE: %1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1) +; OPAQUE: store ptr addrspace(10) %l0, ptr %1, align 8 +; OPAQUE: %2 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) +; OPAQUE: store ptr addrspace(10) %l1, ptr %2, align 8 +; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1) +; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe) !0 = !{i64 0, i64 23} !1 = !{!1} diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl index 687abe0a8cd46..3e0df7a8885a7 100644 --- a/test/llvmpasses/llvmcall.jl +++ b/test/llvmpasses/llvmcall.jl @@ -1,7 +1,14 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + # RUN: julia --startup-file=no %s %t -# RUN: cat %t/* | FileCheck %s +# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,TYPED + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + +# RUN: julia --startup-file=no %s %t +# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,OPAQUE include(joinpath("..", "testhelpers", "llvmpasses.jl")) @@ -13,20 +20,119 @@ end @generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x)) bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2) -# CHECK: call half @foo(half %{{[0-9]+}}) +# CHECK: define +# CHECK-SAME: half @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] = call half @foo(half [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret half +# CHECK-NOT: define +# CHECK: } emit(foo, Float16) -# CHECK: call [2 x half] @foo([2 x half] %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: [2 x half] @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] = call [2 x half] @foo([2 x half] [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret [2 x half] +# CHECK-NOT: define +# CHECK: } emit(foo, NTuple{2, Float16}) -# CHECK: call <2 x half> @foo(<2 x half> %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: <2 x half> @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] call <2 x half> @foo(<2 x half> [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret <2 x half> +# CHECK-NOT: define +# CHECK: } emit(foo, NTuple{2, VecElement{Float16}}) -# CHECK: call i8 addrspace(3)* @foo(i8 addrspace(3)* %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr +# CHECK-SAME: { + +# CHECK: define +# TYPED-SAME: i8 addrspace(3)* @julia_foo +# OPAQUE-SAME: ptr addrspace(3) @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# TYPED: [[FOO_RET:%.*]] call i8 addrspace(3)* @foo(i8 addrspace(3)* [[FOO_ARG:%.*]]) +# OPAQUE: [[FOO_RET:%.*]] call ptr addrspace(3) @foo(ptr addrspace(3) [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# TYPED: ret i8 addrspace(3)* +# OPAQUE: ret ptr addrspace(3) +# CHECK-NOT: define +# CHECK: } emit(foo, Core.LLVMPtr{Float32, 3}) -# CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: [2 x i32] @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] call { i32, i32 } @foo({ i32, i32 } [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret [2 x i32] +# CHECK-NOT: define +# CHECK: } emit(foo, Foo) -# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half] +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: <2 x half> @julia_bar +# TYPED-SAME: [2 x half] +# OPAQUE-SAME: ptr +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: ret <2 x half> +# CHECK-NOT: define +# CHECK: } emit(bar, NTuple{2, Float16}) + +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr +# CHECK-SAME: { diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index c970e07f8a125..b9b388c73d0c5 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -29,10 +29,10 @@ function simdf(X) acc += x # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]] # LOWER-NOT: llvm.mem.parallel_loop_access -# LOWER: fadd fast double +# LOWER: fadd reassoc contract double # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] -# FINAL: fadd fast <{{(vscale x )?}}{{[0-9]+}} x double> +# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double> end acc end @@ -46,7 +46,7 @@ function simdf2(X) # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]] # LOWER: llvm.mem.parallel_loop_access # LOWER-NOT: call void @julia.loopinfo_marker() -# LOWER: fadd fast double +# LOWER: fadd reassoc contract double # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] end acc @@ -64,10 +64,10 @@ end # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]] # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration # FINAL: br end end @@ -90,17 +90,17 @@ end # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]] # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration end end @@ -111,8 +111,8 @@ end 1 <= j <= I && continue @show (i,j) iteration(i) -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration end $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),))) end diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll index fcc4dc0114c21..744bf09082646 100644 --- a/test/llvmpasses/lower-handlers-addrspaces.ll +++ b/test/llvmpasses/lower-handlers-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll index c3d51f2e94c30..2f5dea6cf0892 100644 --- a/test/llvmpasses/lower-handlers.ll +++ b/test/llvmpasses/lower-handlers.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s attributes #1 = { returns_twice } declare i32 @julia.except_enter() #1 diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll index f93940db392af..afeb068317844 100644 --- a/test/llvmpasses/muladd.ll +++ b/test/llvmpasses/muladd.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s ; CHECK-LABEL: @fast_muladd1 diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll index ababb4fc74b8a..0109010f4c1a1 100644 --- a/test/llvmpasses/multiversioning-annotate-only.ll +++ b/test/llvmpasses/multiversioning-annotate-only.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s ; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning ; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll index 897652700c335..e37eefdc362f7 100644 --- a/test/llvmpasses/multiversioning-clone-only.ll +++ b/test/llvmpasses/multiversioning-clone-only.ll @@ -1,12 +1,17 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE ; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer ; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer -; CHECK: @subtarget_cloned_gv = hidden global i64* null -; CHECK: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null +; TYPED: @subtarget_cloned_gv = hidden global i64* null +; OPAQUE: @subtarget_cloned_gv = hidden global ptr null +; TYPED: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null +; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null ; CHECK: @jl_fvar_offsets = hidden constant [2 x i32] [i32 1, i32 0] ; CHECK: @jl_gvar_base = hidden constant i64 0 ; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll new file mode 100644 index 0000000000000..434ffbb26c95f --- /dev/null +++ b/test/llvmpasses/parsing.ll @@ -0,0 +1,7 @@ +; COM: NewPM-only test, tests for ability to parse Julia passes + +; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,FinalLowerGC,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null + +define void @test() { + ret void +} diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl index 1b5d1df3c9f36..e48a5f7df111f 100644 --- a/test/llvmpasses/pipeline-o0.jl +++ b/test/llvmpasses/pipeline-o0.jl @@ -1,5 +1,14 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s @@ -9,7 +18,6 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl")) # CHECK-LABEL: @julia_simple # CHECK-NOT: julia.get_pgcstack -# CHECK: asm # CHECK-NOT: julia.gc_alloc_obj # CHECK: ijl_gc_pool_alloc # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl index 86e1ded3f11e5..86ab9125f2f27 100644 --- a/test/llvmpasses/pipeline-o2-allocs.jl +++ b/test/llvmpasses/pipeline-o2-allocs.jl @@ -1,5 +1,12 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl index 584e8855f0f8c..83a4450522c79 100644 --- a/test/llvmpasses/pipeline-o2-broadcast.jl +++ b/test/llvmpasses/pipeline-o2-broadcast.jl @@ -1,5 +1,12 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl index fcb2161de7614..9fd42562f96aa 100644 --- a/test/llvmpasses/pipeline-o2.jl +++ b/test/llvmpasses/pipeline-o2.jl @@ -1,5 +1,18 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL +# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL + +# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF +# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF + +# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO +# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll new file mode 100644 index 0000000000000..0c0d81420d9fe --- /dev/null +++ b/test/llvmpasses/pipeline-prints.ll @@ -0,0 +1,335 @@ +; COM: This is a newpm-only test, no legacypm command +; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION + +; ModuleID = 'f' +source_filename = "f" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @julia_f_199({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0) #0 !dbg !4 { +top: + %x = alloca {} addrspace(10)*, align 8 + %1 = call {}*** @julia.get_pgcstack() + store {} addrspace(10)* null, {} addrspace(10)** %x, align 8 + %2 = bitcast {}*** %1 to {}** + %current_task = getelementptr inbounds {}*, {}** %2, i64 -14 + %3 = bitcast {}** %current_task to i64* + %world_age = getelementptr inbounds i64, i64* %3, i64 15 + store {} addrspace(10)* %0, {} addrspace(10)** %x, align 8 + %4 = bitcast {}*** %1 to {}** + %current_task1 = getelementptr inbounds {}*, {}** %4, i64 -14 + %ptls_field = getelementptr inbounds {}*, {}** %current_task1, i64 16 + %ptls_load = load {}*, {}** %ptls_field, align 8, !tbaa !8 + %ptls = bitcast {}* %ptls_load to {}** + %5 = bitcast {}** %ptls to i64** + %6 = getelementptr inbounds i64*, i64** %5, i64 2 + %safepoint = load i64*, i64** %6, align 8, !tbaa !12, !invariant.load !7 + fence syncscope("singlethread") seq_cst + call void @julia.safepoint(i64* %safepoint), !dbg !14 + fence syncscope("singlethread") seq_cst + %7 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !15, !nonnull !7, !dereferenceable !23, !align !24 + %8 = addrspacecast {} addrspace(10)* %7 to {} addrspace(11)*, !dbg !15 + %9 = bitcast {} addrspace(11)* %8 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !15 + %10 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %9, i32 0, i32 1, !dbg !15 + %11 = load i64, i64 addrspace(11)* %10, align 8, !dbg !15, !tbaa !12, !range !25, !invariant.load !7, !alias.scope !26, !noalias !29 + %12 = icmp sle i64 0, %11, !dbg !34 + %13 = icmp ult i64 0, %11, !dbg !42 + %14 = and i1 %12, %13, !dbg !43 + %15 = zext i1 %14 to i8, !dbg !18 + %16 = trunc i8 %15 to i1, !dbg !18 + %17 = xor i1 %16, true, !dbg !18 + br i1 %17, label %L12, label %L9, !dbg !18 + +L9: ; preds = %top + %18 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !46, !nonnull !7, !dereferenceable !23, !align !24 + %19 = addrspacecast {} addrspace(10)* %18 to {} addrspace(11)*, !dbg !46 + %20 = bitcast {} addrspace(11)* %19 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !46 + %21 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %20, i32 0, i32 0, !dbg !46 + %22 = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %21, align 8, !dbg !46, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7 + %23 = bitcast i8 addrspace(13)* %22 to i64 addrspace(13)*, !dbg !46 + %24 = getelementptr inbounds i64, i64 addrspace(13)* %23, i64 0, !dbg !46 + %25 = load i64, i64 addrspace(13)* %24, align 8, !dbg !46, !tbaa !48, !alias.scope !51, !noalias !52 + br label %L13, !dbg !18 + +L12: ; preds = %top + br label %L13, !dbg !18 + +L13: ; preds = %L12, %L9 + %value_phi = phi i8 [ 0, %L9 ], [ 1, %L12 ] + %value_phi2 = phi i64 [ %25, %L9 ], [ undef, %L12 ] + %value_phi3 = phi i64 [ 2, %L9 ], [ undef, %L12 ] + br label %L17, !dbg !21 + +L17: ; preds = %L13 + %26 = trunc i8 %value_phi to i1, !dbg !22 + %27 = xor i1 %26, true, !dbg !22 + %28 = zext i1 %27 to i8, !dbg !22 + %29 = trunc i8 %28 to i1, !dbg !22 + %30 = xor i1 %29, true, !dbg !22 + br i1 %30, label %L17.L41_crit_edge, label %L17.L19_crit_edge, !dbg !22 + +L17.L41_crit_edge: ; preds = %L17 + br label %L41, !dbg !53 + +L17.L19_crit_edge: ; preds = %L17 + br label %L19, !dbg !18 + +L19: ; preds = %L17.L19_crit_edge, %L40 + %value_phi4 = phi i64 [ %value_phi2, %L17.L19_crit_edge ], [ %value_phi7, %L40 ] + %value_phi5 = phi i64 [ %value_phi3, %L17.L19_crit_edge ], [ %value_phi8, %L40 ] + %value_phi6 = phi i64 [ 0, %L17.L19_crit_edge ], [ %31, %L40 ] + %31 = add i64 %value_phi6, %value_phi4, !dbg !55 + %32 = sub i64 %value_phi5, 1, !dbg !58 + %33 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !61, !nonnull !7, !dereferenceable !23, !align !24 + %34 = addrspacecast {} addrspace(10)* %33 to {} addrspace(11)*, !dbg !61 + %35 = bitcast {} addrspace(11)* %34 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !61 + %36 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %35, i32 0, i32 1, !dbg !61 + %37 = load i64, i64 addrspace(11)* %36, align 8, !dbg !61, !tbaa !12, !range !25, !invariant.load !7, !alias.scope !26, !noalias !29 + %38 = icmp sle i64 0, %37, !dbg !62 + %39 = icmp ult i64 %32, %37, !dbg !65 + %40 = and i1 %38, %39, !dbg !66 + %41 = zext i1 %40 to i8, !dbg !53 + %42 = trunc i8 %41 to i1, !dbg !53 + %43 = xor i1 %42, true, !dbg !53 + br i1 %43, label %L34, label %L31, !dbg !53 + +L31: ; preds = %L19 + %44 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !67, !nonnull !7, !dereferenceable !23, !align !24 + %45 = sub i64 %value_phi5, 1, !dbg !67 + %46 = mul i64 %45, 1, !dbg !67 + %47 = add i64 0, %46, !dbg !67 + %48 = addrspacecast {} addrspace(10)* %44 to {} addrspace(11)*, !dbg !67 + %49 = bitcast {} addrspace(11)* %48 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !67 + %50 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %49, i32 0, i32 0, !dbg !67 + %51 = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %50, align 8, !dbg !67, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7 + %52 = bitcast i8 addrspace(13)* %51 to i64 addrspace(13)*, !dbg !67 + %53 = getelementptr inbounds i64, i64 addrspace(13)* %52, i64 %47, !dbg !67 + %54 = load i64, i64 addrspace(13)* %53, align 8, !dbg !67, !tbaa !48, !alias.scope !51, !noalias !52 + %55 = add i64 %value_phi5, 1, !dbg !68 + br label %L35, !dbg !53 + +L34: ; preds = %L19 + br label %L35, !dbg !53 + +L35: ; preds = %L34, %L31 + %value_phi7 = phi i64 [ %54, %L31 ], [ undef, %L34 ] + %value_phi8 = phi i64 [ %55, %L31 ], [ undef, %L34 ] + %value_phi9 = phi i8 [ 0, %L31 ], [ 1, %L34 ] + %56 = trunc i8 %value_phi9 to i1, !dbg !54 + %57 = xor i1 %56, true, !dbg !54 + %58 = zext i1 %57 to i8, !dbg !54 + %59 = trunc i8 %58 to i1, !dbg !54 + %60 = xor i1 %59, true, !dbg !54 + br i1 %60, label %L35.L41_crit_edge, label %L40, !dbg !54 + +L35.L41_crit_edge: ; preds = %L35 + br label %L41, !dbg !53 + +L40: ; preds = %L35 + br label %L19, !dbg !18 + +L41: ; preds = %L17.L41_crit_edge, %L35.L41_crit_edge + %value_phi10 = phi i64 [ %31, %L35.L41_crit_edge ], [ 0, %L17.L41_crit_edge ] + ret i64 %value_phi10, !dbg !69 +} + +; Function Attrs: noinline optnone +define nonnull {} addrspace(10)* @jfptr_f_200({} addrspace(10)* %0, {} addrspace(10)** noalias nocapture noundef readonly %1, i32 %2) #1 { +top: + %3 = call {}*** @julia.get_pgcstack() + %4 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %1, i32 0 + %5 = load {} addrspace(10)*, {} addrspace(10)** %4, align 8, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7, !dereferenceable !23, !align !24 + %6 = call i64 @julia_f_199({} addrspace(10)* %5) + %7 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %6) + ret {} addrspace(10)* %7 +} + +declare {}*** @julia.get_pgcstack() + +declare nonnull {} addrspace(10)* @ijl_box_int64(i64 signext) + +; Function Attrs: inaccessiblemem_or_argmemonly +declare void @julia.safepoint(i64*) #2 + +attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" } +attributes #1 = { noinline optnone "frame-pointer"="all" "probe-stack"="inline-asm" } +attributes #2 = { inaccessiblemem_or_argmemonly } + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: GNU) +!3 = !DIFile(filename: "julia", directory: ".") +!4 = distinct !DISubprogram(name: "f", linkageName: "julia_f_199", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!5 = !DIFile(filename: "REPL[2]", directory: ".") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !{!9, !9, i64 0} +!9 = !{!"jtbaa_gcframe", !10, i64 0} +!10 = !{!"jtbaa", !11, i64 0} +!11 = !{!"jtbaa"} +!12 = !{!13, !13, i64 0, i64 1} +!13 = !{!"jtbaa_const", !10, i64 0} +!14 = !DILocation(line: 1, scope: !4) +!15 = !DILocation(line: 10, scope: !16, inlinedAt: !18) +!16 = distinct !DISubprogram(name: "length;", linkageName: "length", scope: !17, file: !17, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!17 = !DIFile(filename: "essentials.jl", directory: ".") +!18 = !DILocation(line: 943, scope: !19, inlinedAt: !21) +!19 = distinct !DISubprogram(name: "iterate;", linkageName: "iterate", scope: !20, file: !20, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!20 = !DIFile(filename: "array.jl", directory: ".") +!21 = !DILocation(line: 943, scope: !19, inlinedAt: !22) +!22 = !DILocation(line: 3, scope: !4) +!23 = !{i64 40} +!24 = !{i64 16} +!25 = !{i64 0, i64 9223372036854775807} +!26 = !{!27} +!27 = !{!"jnoalias_const", !28} +!28 = !{!"jnoalias"} +!29 = !{!30, !31, !32, !33} +!30 = !{!"jnoalias_gcframe", !28} +!31 = !{!"jnoalias_stack", !28} +!32 = !{!"jnoalias_data", !28} +!33 = !{!"jnoalias_typemd", !28} +!34 = !DILocation(line: 514, scope: !35, inlinedAt: !37) +!35 = distinct !DISubprogram(name: "<=;", linkageName: "<=", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!36 = !DIFile(filename: "int.jl", directory: ".") +!37 = !DILocation(line: 423, scope: !38, inlinedAt: !40) +!38 = distinct !DISubprogram(name: ">=;", linkageName: ">=", scope: !39, file: !39, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!39 = !DIFile(filename: "operators.jl", directory: ".") +!40 = !DILocation(line: 520, scope: !41, inlinedAt: !18) +!41 = distinct !DISubprogram(name: "<;", linkageName: "<", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!42 = !DILocation(line: 513, scope: !41, inlinedAt: !40) +!43 = !DILocation(line: 38, scope: !44, inlinedAt: !40) +!44 = distinct !DISubprogram(name: "&;", linkageName: "&", scope: !45, file: !45, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!45 = !DIFile(filename: "bool.jl", directory: ".") +!46 = !DILocation(line: 13, scope: !47, inlinedAt: !18) +!47 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !17, file: !17, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!48 = !{!49, !49, i64 0} +!49 = !{!"jtbaa_arraybuf", !50, i64 0} +!50 = !{!"jtbaa_data", !10, i64 0} +!51 = !{!32} +!52 = !{!30, !31, !33, !27} +!53 = !DILocation(line: 943, scope: !19, inlinedAt: !54) +!54 = !DILocation(line: 5, scope: !4) +!55 = !DILocation(line: 87, scope: !56, inlinedAt: !57) +!56 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!57 = !DILocation(line: 4, scope: !4) +!58 = !DILocation(line: 86, scope: !59, inlinedAt: !60) +!59 = distinct !DISubprogram(name: "-;", linkageName: "-", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!60 = !DILocation(line: 1068, scope: !59, inlinedAt: !53) +!61 = !DILocation(line: 10, scope: !16, inlinedAt: !53) +!62 = !DILocation(line: 514, scope: !35, inlinedAt: !63) +!63 = !DILocation(line: 423, scope: !38, inlinedAt: !64) +!64 = !DILocation(line: 520, scope: !41, inlinedAt: !53) +!65 = !DILocation(line: 513, scope: !41, inlinedAt: !64) +!66 = !DILocation(line: 38, scope: !44, inlinedAt: !64) +!67 = !DILocation(line: 13, scope: !47, inlinedAt: !53) +!68 = !DILocation(line: 87, scope: !56, inlinedAt: !53) +!69 = !DILocation(line: 6, scope: !4) + +; BEFOREEARLYSIMPLIFICATION: IR Dump Before BeforeEarlySimplification +; AFTEREARLYSIMPLIFICATION: IR Dump Before AfterEarlySimplification +; BEFOREEARLYOPTIMIZATION: IR Dump Before BeforeEarlyOptimization +; AFTEREARLYOPTIMIZATION: IR Dump Before AfterEarlyOptimization +; BEFORELOOPOPTIMIZATION: IR Dump Before BeforeLoopOptimization +; BEFORELICM: IR Dump Before BeforeLICM +; AFTERLICM: IR Dump Before AfterLICM +; BEFORELOOPSIMPLIFICATION: IR Dump Before BeforeLoopSimplification +; AFTERLOOPSIMPLIFICATION: IR Dump Before AfterLoopSimplification +; AFTERLOOPOPTIMIZATION: IR Dump Before AfterLoopOptimization +; BEFORESCALAROPTIMIZATION: IR Dump Before BeforeScalarOptimization +; AFTERSCALAROPTIMIZATION: IR Dump Before AfterScalarOptimization +; BEFOREVECTORIZATION: IR Dump Before BeforeVectorization +; AFTERVECTORIZATION: IR Dump Before AfterVectorization +; BEFOREINTRINSICLOWERING: IR Dump Before BeforeIntrinsicLowering +; AFTERINTRINSICLOWERING: IR Dump Before AfterIntrinsicLowering +; BEFORECLEANUP: IR Dump Before BeforeCleanup +; AFTERCLEANUP: IR Dump Before AfterCleanup +; AFTEROPTIMIZATION: IR Dump Before AfterOptimization + +; COM: simplifycfg should have killed this block +; BEFOREOPTIMIZATION: L17.L41_crit_edge: ; preds = %L17 +; BEFOREOPTIMIZATION-NEXT: br label %L41, !dbg !53 + +; BEFOREEARLYSIMPLIFICATION: L17.L41_crit_edge: ; preds = %L17 +; BEFOREEARLYSIMPLIFICATION-NEXT: br label %L41, !dbg !53 + +; AFTEREARLYSIMPLIFICATION-NOT: L17.L41_crit_edge: ; preds = %L17 +; AFTEREARLYSIMPLIFICATION-NOT: br label %L41, !dbg !53 + +; BEFOREEARLYOPTIMIZATION-NOT: L17.L41_crit_edge: ; preds = %L17 +; BEFOREEARLYOPTIMIZATION-NOT: br label %L41, !dbg !53 + + +; COM: InstSimplify/InstCombine should kill this zext-trunc pair +; AFTEREARLYSIMPLIFICATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8 +; AFTEREARLYSIMPLIFICATION-NEXT: trunc i8 [[ZEXT]] to i1 + +; BEFOREEARLYOPTIMIZATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8 +; BEFOREEARLYOPTIMIZATION-NEXT: trunc i8 [[ZEXT]] to i1 + +; AFTEREARLYOPTIMIZATION-NOT: zext i1 {{%.*}} to i8 +; AFTEREARLYOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1 + +; BEFORELOOPOPTIMIZATION-NOT: zext i1 {{%.*}} to i8 +; BEFORELOOPOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1 + +; COM: Loop simplification makes the exit condition obvious +; AFTERLOOPSIMPLIFICATION: L35.lr.ph: +; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw + +; COM: Scalar optimization removes the previous add from the preheader +; AFTERSCALAROPTIMIZATION: L35.preheader: +; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw +; AFTERSCALAROPTIMIZATION-NEXT: br label %L35 + +; COM: Vectorization does stuff +; AFTERVECTORIZATION: vector.body +; AFTERVECTORIZATION: llvm.vector.reduce.add + +; COM: Intrinsics are lowered and cleaned up by the time optimization is finished +; AFTEROPTIMIZATION-NOT: call void @julia.safepoint +; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint \ No newline at end of file diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll index c1ba2069102ac..ac491000ba1e5 100644 --- a/test/llvmpasses/propagate-addrspace-non-zero.ll +++ b/test/llvmpasses/propagate-addrspace-non-zero.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll index 92bf68578477f..ffed83ddb615a 100644 --- a/test/llvmpasses/propagate-addrspace.ll +++ b/test/llvmpasses/propagate-addrspace.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s define i64 @simple() { ; CHECK-LABEL: @simple diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll index 3600fb76804ab..4637fc4b45071 100644 --- a/test/llvmpasses/refinements.ll +++ b/test/llvmpasses/refinements.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE declare {}*** @julia.ptls_states() @@ -27,13 +30,15 @@ define void @argument_refinement({} addrspace(10)* %a) { ; Check that we reuse the gc slot from the box define void @heap_refinement1(i64 %a) { ; CHECK-LABEL: @heap_refinement1 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a) %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)* %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed call void @jl_safepoint() %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)* %loaded2 = load i64, i64 addrspace(10)* %casted2 @@ -44,13 +49,15 @@ define void @heap_refinement1(i64 %a) { ; Check that we don't root the allocated value here, just the derived value define void @heap_refinement2(i64 %a) { ; CHECK-LABEL: @heap_refinement2 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a) %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)* %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 -; CHECK: store {} addrspace(10)* %loaded1 +; TYPED: store {} addrspace(10)* %loaded1 +; OPAQUE: store ptr addrspace(10) %loaded1 call void @jl_safepoint() %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)* %loaded2 = load i64, i64 addrspace(10)* %casted2 @@ -60,24 +67,33 @@ define void @heap_refinement2(i64 %a) { ; Check that the way we compute rooting is compatible with refinements define void @issue22770() { ; CHECK-LABEL: @issue22770 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %y = call {} addrspace(10)* @allocate_some_value() %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)* %x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 -; CHECK: store {} addrspace(10)* %y, +; TYPED: store {} addrspace(10)* %y, +; OPAQUE: store ptr addrspace(10) %y, %a = call {} addrspace(10)* @allocate_some_value() -; CHECK: store {} addrspace(10)* %a -; CHECK: call void @one_arg_boxed({} addrspace(10)* %x) -; CHECK: call void @one_arg_boxed({} addrspace(10)* %a) -; CHECK: call void @one_arg_boxed({} addrspace(10)* %y) +; TYPED: store {} addrspace(10)* %a +; TYPED: call void @one_arg_boxed({} addrspace(10)* %x) +; TYPED: call void @one_arg_boxed({} addrspace(10)* %a) +; TYPED: call void @one_arg_boxed({} addrspace(10)* %y) + +; OPAQUE: store ptr addrspace(10) %a +; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %x) +; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %a) +; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %y) call void @one_arg_boxed({} addrspace(10)* %x) call void @one_arg_boxed({} addrspace(10)* %a) call void @one_arg_boxed({} addrspace(10)* %y) -; CHECK: store {} addrspace(10)* %x +; TYPED: store {} addrspace(10)* %x +; OPAQUE: store ptr addrspace(10) %x %c = call {} addrspace(10)* @allocate_some_value() -; CHECK: store {} addrspace(10)* %c +; TYPED: store {} addrspace(10)* %c +; OPAQUE: store ptr addrspace(10) %c call void @one_arg_boxed({} addrspace(10)* %x) call void @one_arg_boxed({} addrspace(10)* %c) ret void @@ -107,7 +123,8 @@ L3: define void @dont_refine_loop({} addrspace(10)* %x) { ; CHECK-LABEL: @dont_refine_loop -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -150,7 +167,8 @@ L2: define void @refine_loop_indirect({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_indirect -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -175,7 +193,8 @@ L2: define void @refine_loop_indirect2({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_indirect2 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll index 4710f9bd6c4d6..b2d14ae49c8e7 100644 --- a/test/llvmpasses/remove-addrspaces.ll +++ b/test/llvmpasses/remove-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) { @@ -34,7 +37,8 @@ top: define nonnull {} addrspace(10)* @constexpr(i64) { ; CHECK-LABEL: @constexpr top: -; CHECK: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1) +; TYPED: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1) +; OPAQUE: call ptr inttoptr (i64 139806640486784 to ptr)(ptr inttoptr (i64 139806425039920 to ptr), i64 1) %1 = call {} addrspace(10)* inttoptr (i64 139806640486784 to {} addrspace(10)* ({} addrspace(10)*, i64)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 139806425039920 to {}*) to {} addrspace(10)*), i64 1) ; CHECK-NOT: addrspacecast ; CHECK-NOT: addrspace @@ -63,23 +67,23 @@ top: %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1 ; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix. ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %a +; TYPED-SAME: %list* %a +; OPAQUE-SAME: ptr %a ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %a +; TYPED-SAME: %list* %a +; OPAQUE-SAME: ptr %a ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %b +; TYPED-SAME: %list* %b +; OPAQUE-SAME: ptr %b ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %b +; TYPED-SAME: %list* %b +; OPAQUE-SAME: ptr %b ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %c +; TYPED-SAME: %list* %c +; OPAQUE-SAME: ptr %c ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %c +; TYPED-SAME: %list* %c +; OPAQUE-SAME: ptr %c store i64 111, i64* %a.car store i64 222, i64* %b.car store i64 333, i64* %c.car @@ -108,7 +112,8 @@ exit: ; COM: check that address spaces in byval types are processed correctly define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) { -; CHECK: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0) +; TYPED: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0) +; OPAQUE: define void @byval_type(ptr byval([1 x ptr]) %0) ret void } diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll index 404330ac3f7e1..699d89f7257d0 100644 --- a/test/llvmpasses/returnstwicegc.ll +++ b/test/llvmpasses/returnstwicegc.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=OPAQUE declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) @@ -14,7 +17,8 @@ declare void @one_arg_boxed({} addrspace(10)*) define void @try_catch(i64 %a, i64 %b) { ; Because of the returns_twice function, we need to keep aboxed live everywhere -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %sigframe = alloca [208 x i8], align 16 %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0 diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index 142250212984e..929fbeea2c3f5 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s declare void @julia.loopinfo_marker() @@ -37,7 +40,7 @@ loop: ; CHECK: llvm.mem.parallel_loop_access %aval = load double, double *%aptr %nextv = fsub double %v, %aval -; CHECK: fsub fast double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 @@ -56,7 +59,7 @@ loop: %aptr = getelementptr double, double *%a, i64 %i %aval = load double, double *%aptr %nextv = fsub double %v, %aval -; CHECK: fsub fast double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 call void @julia.loopinfo_marker(), !julia.loopinfo !2 %done = icmp sgt i64 %nexti, 500 diff --git a/test/loading.jl b/test/loading.jl index ea544c2635dbc..394c13c5f2962 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -692,7 +692,9 @@ mktempdir() do dir mkpath(vpath) script = "@assert startswith(Base.active_project(), $(repr(vpath)))" cmd = `$(Base.julia_cmd()) --startup-file=no -e $(script)` - cmd = addenv(cmd, "JULIA_DEPOT_PATH" => dir) + cmd = addenv(cmd, + "JULIA_DEPOT_PATH" => dir, + "JULIA_LOAD_PATH" => Sys.iswindows() ? ";" : ":") cmd = pipeline(cmd; stdout, stderr) @test success(cmd) end diff --git a/test/meta.jl b/test/meta.jl index 399e106684a81..36a8acbfe08dd 100644 --- a/test/meta.jl +++ b/test/meta.jl @@ -221,8 +221,25 @@ let a = 1 @test @macroexpand @is_dollar_expr $a end -@test Meta.parseatom("@foo", 1, filename=:bar)[1].args[2].file === :bar -@test Meta.parseall("@foo", filename=:bar).args[1].file === :bar +let ex = Meta.parse("@foo"; filename=:bar) + @test Meta.isexpr(ex, :macrocall) + arg2 = ex.args[2] + @test isa(arg2, LineNumberNode) && arg2.file === :bar +end +let ex = Meta.parseatom("@foo", 1, filename=:bar)[1] + @test Meta.isexpr(ex, :macrocall) + arg2 = ex.args[2] + @test isa(arg2, LineNumberNode) && arg2.file === :bar +end +let ex = Meta.parseall("@foo", filename=:bar) + @test Meta.isexpr(ex, :toplevel) + arg1 = ex.args[1] + @test isa(arg1, LineNumberNode) && arg1.file === :bar + arg2 = ex.args[2] + @test Meta.isexpr(arg2, :macrocall) + arg2arg2 = arg2.args[2] + @test isa(arg2arg2, LineNumberNode) && arg2arg2.file === :bar +end _lower(m::Module, ex, world::UInt) = ccall(:jl_expand_in_world, Any, (Any, Ref{Module}, Cstring, Cint, Csize_t), ex, m, "none", 0, world) diff --git a/test/missing.jl b/test/missing.jl index f06d1aad7a6b1..36155eb32fe49 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -80,7 +80,7 @@ end @test isapprox(missing, 1.0, atol=1e-6) === missing @test isapprox(1.0, missing, rtol=1e-6) === missing - @test !any(T -> T === Union{Missing,Bool}, Base.return_types(isequal, Tuple{Any,Any})) + @test all(==(Bool), Base.return_types(isequal, Tuple{Any,Any})) end @testset "arithmetic operators" begin @@ -643,4 +643,11 @@ end @test isequal(sort(X, alg=MergeSort, rev=true), XRP) end -sortperm(reverse([NaN, missing, NaN, missing])) +@test (sortperm(reverse([NaN, missing, NaN, missing])); true) + +# use LazyString for MissingException to get the better effects +for func in (round, ceil, floor, trunc) + @testset let func = func + @test Core.Compiler.is_foldable(Base.infer_effects(func, (Type{Int},Union{Int,Missing}))) + end +end diff --git a/test/namedtuple.jl b/test/namedtuple.jl index ea3a5cdbb8ee4..eb3846c8cbffd 100644 --- a/test/namedtuple.jl +++ b/test/namedtuple.jl @@ -342,6 +342,12 @@ end @test_throws LoadError include_string(Main, "@NamedTuple(a::Int, b)") end +# @Kwargs +@testset "@Kwargs" begin + @test @Kwargs{a::Int,b::String} == typeof(pairs((;a=1,b="2"))) + @test @Kwargs{} == typeof(pairs((;))) +end + # issue #29333, implicit names let x = 1, y = 2 @test (;y) === (y = 2,) diff --git a/test/numbers.jl b/test/numbers.jl index efb2702aff1c2..d7fd6531b157d 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2216,13 +2216,11 @@ end @test round(Int16, -32768.1) === Int16(-32768) end # issue #7508 -@test_throws ErrorException reinterpret(Int, 0x01) +@test_throws ArgumentError reinterpret(Int, 0x01) @testset "issue #12832" begin - @test_throws ErrorException reinterpret(Float64, Complex{Int64}(1)) - @test_throws ErrorException reinterpret(Float64, ComplexF32(1)) - @test_throws ErrorException reinterpret(ComplexF32, Float64(1)) - @test_throws ErrorException reinterpret(Int32, false) + @test_throws ArgumentError reinterpret(Float64, Complex{Int64}(1)) + @test_throws ArgumentError reinterpret(Int32, false) end # issue #41 ndigf(n) = Float64(log(Float32(n))) @@ -2264,6 +2262,17 @@ end @test_throws InexactError convert(Int16, big(2)^100) @test_throws InexactError convert(Int, typemax(UInt)) +@testset "infinity to integer conversion" begin + for T in ( + UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128, BigInt + ) + for S in (Float16, Float32, Float64, BigFloat) + @test_throws InexactError convert(T, typemin(S)) + @test_throws InexactError convert(T, typemax(S)) + end + end +end + @testset "issue #9789" begin @test_throws InexactError convert(Int8, typemax(UInt64)) @test_throws InexactError convert(Int16, typemax(UInt64)) diff --git a/test/precompile.jl b/test/precompile.jl index 606ee1087e51e..62d862c384040 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -4,6 +4,7 @@ original_depot_path = copy(Base.DEPOT_PATH) original_load_path = copy(Base.LOAD_PATH) using Test, Distributed, Random +using REPL # doc lookup function Foo_module = :Foo4b3a94a1a081a8cb Foo2_module = :F2oo4b3a94a1a081a8cb @@ -339,17 +340,20 @@ precompile_test_harness(false) do dir cachedir = joinpath(dir, "compiled", "v$(VERSION.major).$(VERSION.minor)") cachedir2 = joinpath(dir2, "compiled", "v$(VERSION.major).$(VERSION.minor)") cachefile = joinpath(cachedir, "$Foo_module.ji") - if Base.JLOptions().use_pkgimages == 1 - ocachefile = Base.ocachefile_from_cachefile(cachefile) - else - ocachefile = nothing - end - # use _require_from_serialized to ensure that the test fails if - # the module doesn't reload from the image: - @test_warn "@ccallable was already defined for this method name" begin - @test_logs (:warn, "Replacing module `$Foo_module`") begin - m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile) - @test isa(m, Module) + do_pkgimg = Base.JLOptions().use_pkgimages == 1 && Base.JLOptions().permalloc_pkgimg == 1 + if do_pkgimg || Base.JLOptions().use_pkgimages == 0 + if do_pkgimg + ocachefile = Base.ocachefile_from_cachefile(cachefile) + else + ocachefile = nothing + end + # use _require_from_serialized to ensure that the test fails if + # the module doesn't reload from the image: + @test_warn "@ccallable was already defined for this method name" begin + @test_logs (:warn, "Replacing module `$Foo_module`") begin + m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile) + @test isa(m, Module) + end end end @@ -394,7 +398,7 @@ precompile_test_harness(false) do dir Dict(let m = Base.root_module(Base, s) Base.PkgId(m) => Base.module_build_id(m) end for s in - [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates, + [:ArgTools, :Artifacts, :Base64, :CRC32c, :Dates, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra, :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf, @@ -651,7 +655,10 @@ precompile_test_harness("code caching") do dir precompile(getelsize, (Vector{Int32},)) end """) - Base.compilecache(Base.PkgId(string(Cache_module))) + pkgid = Base.PkgId(string(Cache_module)) + @test !Base.isprecompiled(pkgid) + Base.compilecache(pkgid) + @test Base.isprecompiled(pkgid) @eval using $Cache_module M = getfield(@__MODULE__, Cache_module) # Test that this cache file "owns" all the roots diff --git a/test/ranges.jl b/test/ranges.jl index ec69c57fc0a8f..b263e6d4d530d 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -59,6 +59,9 @@ using InteractiveUtils: code_llvm @test last(10:0.2:3) === 9.8 @test step(10:0.2:3) === 0.2 @test isempty(10:0.2:3) + + unitrangeerrstr = "promotion of types Char and Char failed to change any arguments" + @test_throws unitrangeerrstr UnitRange('a', 'b') end using Dates, Random @@ -2477,3 +2480,13 @@ end @test !occursin("_colon", ir) @test !occursin("StepRange", ir) end + +# DimensionMismatch and LazyString +function check_ranges(rx, ry) + if length(rx) != length(ry) + throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))")) + end + rx, ry +end +@test Core.Compiler.is_foldable(Base.infer_effects(check_ranges, (UnitRange{Int},UnitRange{Int}))) +# TODO JET.@test_opt check_ranges(1:2, 3:4) diff --git a/test/rational.jl b/test/rational.jl index a1af6eda64516..4b29618bd15e0 100644 --- a/test/rational.jl +++ b/test/rational.jl @@ -33,6 +33,11 @@ using Test @test @inferred(rationalize(Int, 3.0, 0.0)) === 3//1 @test @inferred(rationalize(Int, 3.0, 0)) === 3//1 + @test @inferred(rationalize(Int, 33//100; tol=0.1)) === 1//3 # because tol + @test @inferred(rationalize(Int, 3; tol=0.0)) === 3//1 + @test @inferred(rationalize(Int8, 1000//333)) === Rational{Int8}(3//1) + @test @inferred(rationalize(Int8, 1000//3)) === Rational{Int8}(1//0) + @test @inferred(rationalize(Int8, 1000)) === Rational{Int8}(1//0) @test_throws OverflowError rationalize(UInt, -2.0) @test_throws ArgumentError rationalize(Int, big(3.0), -1.) # issue 26823 @@ -727,3 +732,10 @@ end @test rationalize(1.192 + 2.233im) == 149//125 + 2233//1000*im @test rationalize(Int8, 1.192 + 2.233im) == 118//99 + 67//30*im end +@testset "rationalize(Complex) with tol" begin + # test: rationalize(x::Complex; kvs...) + precise_next = 7205759403792795//72057594037927936 + @assert Float64(precise_next) == nextfloat(0.1) + @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im + @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im) +end diff --git a/test/reflection.jl b/test/reflection.jl index 0ae8cb3f9d393..c13e7d88d8cfd 100644 --- a/test/reflection.jl +++ b/test/reflection.jl @@ -909,10 +909,9 @@ _test_at_locals2(1,1,0.5f0) f31687_parent() = f31687_child(0) params = Base.CodegenParams() _dump_function(f31687_parent, Tuple{}, - #=native=#false, #=wrapper=#false, #=strip=#false, + #=native=#false, #=wrapper=#false, #=raw=#true, #=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none, - #=binary=#false, - params) + #=binary=#false) end @test nameof(Any) === :Any diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl index fae4c6434e00d..501e9f4a9b57f 100644 --- a/test/reinterpretarray.jl +++ b/test/reinterpretarray.jl @@ -450,10 +450,10 @@ end SomeSingleton(x) = new() end - @test_throws ErrorException reinterpret(Int, nothing) - @test_throws ErrorException reinterpret(Missing, 3) - @test_throws ErrorException reinterpret(Missing, NotASingleton()) - @test_throws ErrorException reinterpret(NotASingleton, ()) + @test_throws ArgumentError reinterpret(Int, nothing) + @test_throws ArgumentError reinterpret(Missing, 3) + @test_throws ArgumentError reinterpret(Missing, NotASingleton()) + @test_throws ArgumentError reinterpret(NotASingleton, ()) @test_throws ArgumentError reinterpret(NotASingleton, fill(nothing, ())) @test_throws ArgumentError reinterpret(reshape, NotASingleton, fill(missing, 3)) @@ -513,3 +513,25 @@ end @test setindex!(x, SomeSingleton(:), 3, 5) == x2 @test_throws MethodError x[2,4] = nothing end + +# reinterpret of arbitrary bitstypes +@testset "Reinterpret arbitrary bitstypes" begin + struct Bytes15 + a::Int8 + b::Int16 + c::Int32 + d::Int64 + end + + @test reinterpret(Float64, ComplexF32(1, 1)) === 0.007812501848093234 + @test reinterpret(ComplexF32, 0.007812501848093234) === ComplexF32(1, 1) + @test reinterpret(Tuple{Float64, Float64}, ComplexF64(1, 1)) === (1.0, 1.0) + @test reinterpret(ComplexF64, (1.0, 1.0)) === ComplexF64(1, 1) + @test reinterpret(Tuple{Int8, Int16, Int32, Int64}, (Int64(1), Int32(2), Int16(3), Int8(4))) === (Int8(1), Int16(0), Int32(0), 288233674686595584) + @test reinterpret(Tuple{Int8, Int16, Tuple{Int32, Int64}}, (Int64(1), Int32(2), Int16(3), Int8(4))) === (Int8(1), Int16(0), (Int32(0), 288233674686595584)) + @test reinterpret(Tuple{Int64, Int32, Int16, Int8}, (Int8(1), Int16(0), (Int32(0), 288233674686595584))) === (Int64(1), Int32(2), Int16(3), Int8(4)) + @test reinterpret(Tuple{Int8, Int16, Int32, Int64}, Bytes15(Int8(1), Int16(2), Int32(3), Int64(4))) === (Int8(1), Int16(2), Int32(3), Int64(4)) + @test reinterpret(Bytes15, (Int8(1), Int16(2), Int32(3), Int64(4))) == Bytes15(Int8(1), Int16(2), Int32(3), Int64(4)) + + @test_throws ArgumentError reinterpret(Tuple{Int32, Int64}, (Int16(1), Int64(4))) +end diff --git a/test/runtests.jl b/test/runtests.jl index 16f60ddcf6764..1264acae985b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -71,6 +71,7 @@ function move_to_node1(t) end # Base.compilecache only works from node 1, so precompile test is handled specially +move_to_node1("ccall") move_to_node1("precompile") move_to_node1("SharedArrays") move_to_node1("threads") diff --git a/test/show.jl b/test/show.jl index 76772c649a838..25c5a49372054 100644 --- a/test/show.jl +++ b/test/show.jl @@ -633,7 +633,7 @@ end @test_repr "::@m(x, y) + z" @test_repr "[@m(x) y z]" @test_repr "[@m(x) y; z]" -@test_repr "let @m(x), y=z; end" +test_repr("let @m(x), y=z; end", true) @test repr(:(@m x y)) == ":(#= $(@__FILE__):$(@__LINE__) =# @m x y)" @test string(:(@m x y)) == "#= $(@__FILE__):$(@__LINE__) =# @m x y" @@ -1010,6 +1010,9 @@ test_mt(show_f5, "show_f5(A::AbstractArray{T, N}, indices::Vararg{$Int, N})") @test sprint(show, :(function f end)) == ":(function f end)" @test_repr "function g end" +# Printing of :(function (x...) end) +@test startswith(replstr(Meta.parse("function (x...) end")), ":(function (x...,)") + # Printing of macro definitions @test sprint(show, :(macro m end)) == ":(macro m end)" @test_repr "macro m end" @@ -1366,6 +1369,8 @@ test_repr("(:).a") @test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}" @test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}" +# Test general printing of `Base.Pairs` (it should not use the `@Kwargs` macro syntax) +@test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Tuple{Symbol}, @NamedTuple{init::$Int}}" @testset "issue #42931" begin @test repr(NTuple{4, :A}) == "NTuple{4, :A}" diff --git a/test/sorting.jl b/test/sorting.jl index ec1666dabb2fb..147a70a5db7d9 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -88,6 +88,20 @@ end vcat(2000, (x:x+99 for x in 1900:-100:100)..., 1:99) end +function tuple_sort_test(x) + @test issorted(sort(x)) + length(x) > 9 && return # length > 9 uses a vector fallback + @test 0 == @allocated sort(x) +end +@testset "sort(::NTuple)" begin + @test sort((9,8,3,3,6,2,0,8)) == (0,2,3,3,6,8,8,9) + @test sort((9,8,3,3,6,2,0,8), by=x->x÷3) == (2,0,3,3,8,6,8,9) + for i in 1:40 + tuple_sort_test(tuple(rand(i)...)) + end + @test_throws ArgumentError sort((1,2,3.0)) +end + @testset "partialsort" begin @test partialsort([3,6,30,1,9],3) == 6 @test partialsort([3,6,30,1,9],3:4) == [6,9] @@ -530,6 +544,23 @@ end @test isequal(a, [8,6,7,NaN,5,3,0,9]) end +@testset "sort!(iterable)" begin + gen = (x % 7 + 0.1x for x in 1:50) + @test sort(gen) == sort!(collect(gen)) + gen = (x % 7 + 0.1y for x in 1:10, y in 1:5) + @test sort(gen; dims=1) == sort!(collect(gen); dims=1) + @test sort(gen; dims=2) == sort!(collect(gen); dims=2) + + @test_throws ArgumentError("dimension out of range") sort(gen; dims=3) + + @test_throws UndefKeywordError(:dims) sort(gen) + @test_throws UndefKeywordError(:dims) sort(collect(gen)) + @test_throws UndefKeywordError(:dims) sort!(collect(gen)) + + @test_throws ArgumentError sort("string") + @test_throws ArgumentError("1 cannot be sorted") sort(1) +end + @testset "sort!(::AbstractVector{<:Integer}) with short int range" begin a = view([9:-1:0;], :)::SubArray sort!(a) @@ -980,6 +1011,60 @@ end end end +struct MyArray49392{T, N} <: AbstractArray{T, N} + data::Array{T, N} +end +Base.size(A::MyArray49392) = size(A.data) +Base.getindex(A::MyArray49392, i...) = getindex(A.data, i...) +Base.setindex!(A::MyArray49392, v, i...) = setindex!(A.data, v, i...) +Base.similar(A::MyArray49392, ::Type{T}, dims::Dims{N}) where {T, N} = MyArray49392(similar(A.data, T, dims)) + +@testset "Custom matrices (#49392)" begin + x = rand(10, 10) + y = MyArray49392(copy(x)) + @test all(sort!(y, dims=2) .== sort!(x,dims=2)) +end + +@testset "MissingOptimization fastpath for Perm ordering when lo:hi ≠ eachindex(v)" begin + v = [rand() < .5 ? missing : rand() for _ in 1:100] + ix = collect(1:100) + sort!(ix, 1, 10, Base.Sort.DEFAULT_STABLE, Base.Order.Perm(Base.Order.Forward, v)) + @test issorted(v[ix[1:10]]) +end + +struct NonScalarIndexingOfWithoutMissingVectorAlg <: Base.Sort.Algorithm end +function Base.Sort._sort!(v::AbstractVector, ::NonScalarIndexingOfWithoutMissingVectorAlg, o::Base.Order.Ordering, kw) + Base.Sort.@getkw lo hi + first_half = v[lo:lo+(hi-lo)÷2] + second_half = v[lo+(hi-lo)÷2+1:hi] + whole = v[lo:hi] + all(vcat(first_half, second_half) .=== whole) || error() + out = Base.Sort._sort!(whole, Base.Sort.DEFAULT_STABLE, o, (;kw..., lo=1, hi=length(whole))) + v[lo:hi] .= whole + out +end + +@testset "Non-scaler indexing of WithoutMissingVector" begin + @testset "Unit test" begin + wmv = Base.Sort.WithoutMissingVector(Union{Missing, Int}[1, 7, 2, 9]) + @test wmv[[1, 3]] == [1, 2] + @test wmv[1:3] == [1, 7, 2] + end + @testset "End to end" begin + alg = Base.Sort.InitialOptimizations(NonScalarIndexingOfWithoutMissingVectorAlg()) + @test issorted(sort(rand(100); alg)) + @test issorted(sort([rand() < .5 ? missing : randstring() for _ in 1:100]; alg)) + end +end + +struct DispatchLoopTestAlg <: Base.Sort.Algorithm end +function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, ::DispatchLoopTestAlg, order::Base.Order.Ordering) + sort!(view(v, lo:hi); order) +end +@testset "Support dispatch from the old style to the new style and back" begin + @test issorted(sort!(rand(100), Base.Sort.InitialOptimizations(DispatchLoopTestAlg()), Base.Order.Forward)) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, diff --git a/test/stacktraces.jl b/test/stacktraces.jl index 96393b124f70e..590abb90c590f 100644 --- a/test/stacktraces.jl +++ b/test/stacktraces.jl @@ -192,3 +192,57 @@ let bt end @test any(s->startswith(string(s), "f33065(x::Float32, y::Float32; b::Float64, a::String, c::"), bt) end + +struct F49231{a,b,c,d,e,f,g} end +(::F49231)(a,b,c) = error("oops") + +@testset "type_depth_limit" begin + tdl = Base.type_depth_limit + + str = repr(typeof(view([1, 2, 3], 1:2))) + @test tdl(str, 0, maxdepth = 1) == "SubArray{…}" + @test tdl(str, 0, maxdepth = 2) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}" + @test tdl(str, 0, maxdepth = 3) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{…}}, true}" + @test tdl(str, 0, maxdepth = 4) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{$Int}}, true}" + @test tdl(str, 3) == "SubArray{…}" + @test tdl(str, 44) == "SubArray{…}" + @test tdl(str, 45) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}" + @test tdl(str, 59) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}" + @test tdl(str, 60) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{…}}, true}" + @test tdl(str, 100) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{$Int}}, true}" + + str = repr(Vector{V} where V<:AbstractVector{T} where T<:Real) + @test tdl(str, 0, maxdepth = 1) == "Vector{…} where {…}" + @test tdl(str, 0, maxdepth = 2) == "Vector{V} where {T<:Real, V<:AbstractVector{…}}" + @test tdl(str, 0, maxdepth = 3) == "Vector{V} where {T<:Real, V<:AbstractVector{T}}" + @test tdl(str, 20) == "Vector{…} where {…}" + @test tdl(str, 46) == "Vector{…} where {…}" + @test tdl(str, 47) == "Vector{V} where {T<:Real, V<:AbstractVector{T}}" + + str = "F49231{Vector,Val{('}','}')},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}" + @test tdl(str, 105) == "F49231{Vector,Val{('}','}')},Vector{Vector{Vector{…}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}" + @test tdl(str, 85) == "F49231{Vector,Val{…},Vector{…},Tuple{…},Int,Int,Int}" + + # Stacktrace + a = UInt8(81):UInt8(160) + b = view(a, 1:64) + c = reshape(b, (8, 8)) + d = reinterpret(reshape, Float64, c) + sqrteach(a) = [sqrt(x) for x in a] + st = try + sqrteach(d) + catch e + stacktrace(catch_backtrace()) + end + str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,105))) + @test endswith(str, "to see complete types.") + @test contains(str, "[5] \e[0m\e[1mcollect_to!\e[22m\e[0m\e[1m(\e[22m\e[90mdest\e[39m::\e[0mVector\e[90m{…}\e[39m, \e[90mitr\e[39m::\e[0mBase.Generator\e[90m{…}\e[39m, \e[90moffs\e[39m::\e[0m$Int, \e[90mst\e[39m::\e[0mTuple\e[90m{…}\e[39m\e[0m\e[1m)\e[22m\n\e[90m") + + st = try + F49231{Vector,Val{'}'},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}()(1,2,3) + catch e + stacktrace(catch_backtrace()) + end + str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,132))) + @test contains(str, "[2] \e[0m\e[1m(::$F49231{Vector, Val{…}, Vector{…}, NTuple{…}, $Int, $Int, $Int})\e[22m\e[0m\e[1m(\e[22m\e[90ma\e[39m::\e[0m$Int, \e[90mb\e[39m::\e[0m$Int, \e[90mc\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n\e[90m") +end diff --git a/test/staged.jl b/test/staged.jl index 0fa8ecb182cff..df351d8d47b96 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -182,7 +182,7 @@ let gf_err, tsk = @async nothing # create a Task for yield to try to run Expected = ErrorException("task switch not allowed from inside staged nor pure functions") @test_throws Expected gf_err() @test_throws Expected gf_err() - @test gf_err_ref[] == 4 + @test gf_err_ref[] < 1000 end gf_err_ref[] = 0 diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 602c38551f6d8..13f2f5197187a 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -176,8 +176,11 @@ end x = "abcdefg" @testset "basic unit range" begin @test SubString(x, 2:4) == "bcd" - @test view(x, 2:4) == "bcd" - @test view(x, 2:4) isa SubString + sx = view(x, 2:4) + @test sx == "bcd" + @test sx isa SubString + @test parent(sx) === x + @test parentindices(sx) == (2:4,) @test (@view x[4:end]) == "defg" @test (@view x[4:end]) isa SubString end @@ -247,8 +250,6 @@ end @test string(sym) == string(Char(0xdcdb)) @test String(sym) == string(Char(0xdcdb)) @test Meta.lower(Main, sym) === sym - @test Meta.parse(string(Char(0xe0080)," = 1"), 1, raise=false)[1] == - Expr(:error, "invalid character \"\Ue0080\" near column 1") end @testset "Symbol and gensym" begin @@ -758,11 +759,6 @@ function getData(dic) end @test getData(Dict()) == ",,,,,,,,,,,,,,,,,," -@testset "unrecognized escapes in string/char literals" begin - @test_throws Meta.ParseError Meta.parse("\"\\.\"") - @test_throws Meta.ParseError Meta.parse("\'\\.\'") -end - @testset "thisind" begin let strs = Any["∀α>β:α+1>β", s"∀α>β:α+1>β", SubString("123∀α>β:α+1>β123", 4, 18), diff --git a/test/subtype.jl b/test/subtype.jl index 4a3e55c039e94..de11689e9e7c4 100644 --- a/test/subtype.jl +++ b/test/subtype.jl @@ -2547,3 +2547,13 @@ let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val) @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)} end + +#issue #49857 +@test !<:(Type{Vector{Union{Base.BitInteger, Base.IEEEFloat, StridedArray, Missing, Nothing, Val{T}}}} where {T}, Type{Array{T}} where {T}) + +#issue 50195 +T50195{S} = Pair{S,Set{S}} +let a = Tuple{Type{X} where X<:Union{Nothing, Val{X1} where {X4, X1<:(Pair{X2, Val{X2}} where X2<:Val{X4})}}}, + b = Tuple{Type{Y} where Y<:(Val{Y1} where {Y4<:Src, Y1<:(Pair{Y2, Val{Y2}} where Y2<:Union{Val{Y4}, Y4})})} where Src + @test typeintersect(a, b) <: Any +end diff --git a/test/syntax.jl b/test/syntax.jl index 8bba5f9205613..4d1b167693adb 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3,23 +3,31 @@ # tests for parser and syntax lowering using Random +using Base: remove_linenums! -import Base.Meta.ParseError +using_JuliaSyntax = parentmodule(Core._parse) != Core.Compiler -function parseall(str) - pos = firstindex(str) - exs = [] - while pos <= lastindex(str) - ex, pos = Meta.parse(str, pos) - push!(exs, ex) - end - if length(exs) == 0 - throw(ParseError("end of input")) - elseif length(exs) == 1 - return exs[1] +macro test_parseerror(str, msg) + if using_JuliaSyntax + # Diagnostics are tested separately in JuliaSyntax + ex = :(@test_throws Meta.ParseError Meta.parse($(esc(str)))) else - return Expr(:block, exs...) + ex = :(@test_throws Meta.ParseError($(esc(msg))) Meta.parse($(esc(str)))) end + ex.args[2] = __source__ + return ex +end + +macro test_parseerror(str) + ex = :(@test_throws Meta.ParseError Meta.parse($(esc(str)))) + ex.args[2] = __source__ + return ex +end + +function parseall_nolines(str) + ex = Meta.parseall(str) + filter!(e->!(e isa LineNumberNode), ex.args) + return ex end # issue #9684 @@ -38,13 +46,8 @@ end # issue #9704 let a = :a - @test :(try - catch $a - end) == :(try - catch a - end) - @test :(module $a end) == :(module a - end) + @test :(try catch $a end) == :(try catch a end) + @test :(module $a end) == :(module a end) end # string literals @@ -64,19 +67,19 @@ macro test999_str(args...); args; end @test test999"foo"123 == ("foo", 123) # issue #5997 -@test_throws ParseError Meta.parse(": x") -@test_throws ParseError Meta.parse("""begin +@test_parseerror ": x" +@test_parseerror """begin : - x""") -@test_throws ParseError Meta.parse("d[: 2]") + x""" +@test_parseerror "d[: 2]" # issue #6770 -@test_throws ParseError Meta.parse("x.3") +@test_parseerror "x.3" # issue #8763 -@test_throws ParseError Meta.parse("sqrt(16)2") -@test_throws ParseError Meta.parse("x' y") -@test_throws ParseError Meta.parse("x 'y") +@test_parseerror "sqrt(16)2" +@test_parseerror "x' y" +@test_parseerror "x 'y" @test Meta.parse("x'y") == Expr(:call, :*, Expr(Symbol("'"), :x), :y) # issue #18851 @@ -88,22 +91,22 @@ macro test999_str(args...); args; end @test Meta.parse("-2(m)") == Expr(:call, :*, -2, :m) # issue #8301 -@test_throws ParseError Meta.parse("&*s") +@test_parseerror "&*s" # issue #10677 -@test_throws ParseError Meta.parse("/1") -@test_throws ParseError Meta.parse("/pi") +@test_parseerror "/1" +@test_parseerror "/pi" @test Meta.parse("- = 2") == Expr(:(=), :(-), 2) @test Meta.parse("/ = 2") == Expr(:(=), :(/), 2) -@test_throws ParseError Meta.parse("< : 2") -@test_throws ParseError Meta.parse("+ : 2") -@test_throws ParseError Meta.parse("< :2") +@test_parseerror "< : 2" +@test_parseerror "+ : 2" +@test_parseerror "< :2" @test Meta.parse("+ :2") == Expr(:call, :(+), QuoteNode(2)) # issue #10900 -@test_throws ParseError Meta.parse("+=") -@test_throws ParseError Meta.parse(".") -@test_throws ParseError Meta.parse("...") +@test_parseerror "+=" +@test_parseerror "." +@test_parseerror "..." # issue #10901 @test Meta.parse("/([1], 1)[1]") == :(([1] / 1)[1]) @@ -156,35 +159,35 @@ macro test999_str(args...); args; end Expr(:., Expr(:$, :c), Expr(:$, :d)))) # fix pr #11338 and test for #11497 -@test parseall("using \$\na") == Expr(:block, Expr(:using, Expr(:., :$)), :a) -@test parseall("using \$,\na") == Expr(:using, Expr(:., :$), Expr(:., :a)) -@test parseall("using &\na") == Expr(:block, Expr(:using, Expr(:., :&)), :a) +@test parseall_nolines("using \$\na") == Expr(:toplevel, Expr(:using, Expr(:., :$)), :a) +@test parseall_nolines("using \$,\na") == Expr(:toplevel, Expr(:using, Expr(:., :$), Expr(:., :a))) +@test parseall_nolines("using &\na") == Expr(:toplevel, Expr(:using, Expr(:., :&)), :a) -@test parseall("a = &\nb") == Expr(:block, Expr(:(=), :a, :&), :b) -@test parseall("a = \$\nb") == Expr(:block, Expr(:(=), :a, :$), :b) -@test parseall(":(a = &\nb)") == Expr(:quote, Expr(:(=), :a, Expr(:&, :b))) -@test parseall(":(a = \$\nb)") == Expr(:quote, Expr(:(=), :a, Expr(:$, :b))) +@test parseall_nolines("a = &\nb") == Expr(:toplevel, Expr(:(=), :a, :&), :b) +@test parseall_nolines("a = \$\nb") == Expr(:toplevel, Expr(:(=), :a, :$), :b) +@test parseall_nolines(":(a = &\nb)") == Expr(:toplevel, Expr(:quote, Expr(:(=), :a, Expr(:&, :b)))) +@test parseall_nolines(":(a = \$\nb)") == Expr(:toplevel, Expr(:quote, Expr(:(=), :a, Expr(:$, :b)))) # issue 12027 - short macro name parsing vs _str suffix -@test parseall(""" - macro f(args...) end; @f "macro argument" +@test parseall_nolines(""" + macro f(args...) end\n@f "macro argument" """) == Expr(:toplevel, Expr(:macro, Expr(:call, :f, Expr(:..., :args)), Expr(:block, LineNumberNode(1, :none), LineNumberNode(1, :none))), - Expr(:macrocall, Symbol("@f"), LineNumberNode(1, :none), "macro argument")) + Expr(:macrocall, Symbol("@f"), LineNumberNode(2, :none), "macro argument")) # blocks vs. tuples @test Meta.parse("()") == Expr(:tuple) @test Meta.parse("(;)") == Expr(:tuple, Expr(:parameters)) @test Meta.parse("(;;)") == Expr(:block) @test Meta.parse("(;;;;)") == Expr(:block) -@test_throws ParseError Meta.parse("(,)") -@test_throws ParseError Meta.parse("(;,)") -@test_throws ParseError Meta.parse("(,;)") +@test_parseerror "(,)" +@test_parseerror "(;,)" +@test_parseerror "(,;)" # TODO: would be nice to make these errors, but needed to parse e.g. `(x;y,)->x` -#@test_throws ParseError Meta.parse("(1;2,)") -#@test_throws ParseError Meta.parse("(1;2,;)") -#@test_throws ParseError Meta.parse("(1;2,;3)") +#@test_parseerror "(1;2,)" +#@test_parseerror "(1;2,;)" +#@test_parseerror "(1;2,;3)" @test Meta.parse("(x;)") == Expr(:block, :x) @test Meta.parse("(;x)") == Expr(:tuple, Expr(:parameters, :x)) @test Meta.parse("(;x,)") == Expr(:tuple, Expr(:parameters, :x)) @@ -201,7 +204,7 @@ macro test999_str(args...); args; end @test Meta.parse("(x,a;y=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :y, 1)), :x, :a) @test Meta.parse("(x,a;y=1,z=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw,:y,1), Expr(:kw,:z,2)), :x, :a) @test Meta.parse("(a=1, b=2)") == Expr(:tuple, Expr(:(=), :a, 1), Expr(:(=), :b, 2)) -@test_throws ParseError Meta.parse("(1 2)") # issue #15248 +@test_parseerror "(1 2)" # issue #15248 @test Meta.parse("f(x;)") == Expr(:call, :f, Expr(:parameters), :x) @@ -272,13 +275,16 @@ end @test_throws BoundsError Meta.parse("x = 1", 7) # issue #14683 -@test_throws ParseError Meta.parse("'\\A\"'") +@test_parseerror "'\\A\"'" @test Meta.parse("'\"'") == Meta.parse("'\\\"'") == '"' == "\""[1] == '\42' # issue #24558 @test '\u2200' == "\u2200"[1] -@test_throws ParseError Meta.parse("f(2x for x=1:10, y") +if !using_JuliaSyntax + # This should be Expr(:incomplete) + @test_parseerror "f(2x for x=1:10, y" +end # issue #15223 call0(f) = f() @@ -314,11 +320,6 @@ let p = 15 @test 2p+1 == 31 # not a hex float literal end -macro test_parseerror(str, msg) - ex = :(@test_throws ParseError($(esc(msg))) Meta.parse($(esc(str)))) - ex.args[2] = __source__ - return ex -end @test_parseerror("0x", "invalid numeric constant \"0x\"") @test_parseerror("0b", "invalid numeric constant \"0b\"") @test_parseerror("0o", "invalid numeric constant \"0o\"") @@ -326,9 +327,8 @@ end @test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"") # issue #15798 -@test Meta.lower(Main, Base.parse_input_line(""" - try = "No" - """)) == Expr(:error, "unexpected \"=\"") +# lowering preserves Expr(:error) +@test Meta.lower(Main, Expr(:error, "no")) == Expr(:error, "no") # issue #19861 make sure macro-expansion happens in the newest world for top-level expression @test eval(Base.parse_input_line(""" @@ -372,9 +372,9 @@ add_method_to_glob_fn!() @test f15844(Int64(1)) == 3 # issue #15661 -@test_throws ParseError Meta.parse("function catch() end") -@test_throws ParseError Meta.parse("function end() end") -@test_throws ParseError Meta.parse("function finally() end") +@test_parseerror "function catch() end" +@test_parseerror "function end() end" +@test_parseerror "function finally() end" # PR #16170 @test Meta.lower(Main, Meta.parse("true(x) = x")) == Expr(:error, "invalid function name \"true\"") @@ -425,18 +425,18 @@ end :y)) # test that pre 0.5 deprecated syntax is a parse error -@test_throws ParseError Meta.parse("Int [1,2,3]") -@test_throws ParseError Meta.parse("Int [x for x in 1:10]") -@test_throws ParseError Meta.parse("foo (x) = x") -@test_throws ParseError Meta.parse("foo {T<:Int}(x::T) = x") +@test_parseerror "Int [1,2,3]" +@test_parseerror "Int [x for x in 1:10]" +@test_parseerror "foo (x) = x" +@test_parseerror "foo {T<:Int}(x::T) = x" -@test_throws ParseError Meta.parse("Foo .bar") +@test_parseerror "Foo .bar" -@test_throws ParseError Meta.parse("import x .y") -@test_throws ParseError Meta.parse("using x .y") +@test_parseerror "import x .y" +@test_parseerror "using x .y" -@test_throws ParseError Meta.parse("--x") -@test_throws ParseError Meta.parse("stagedfunction foo(x); end") +@test_parseerror "--x" +@test_parseerror "stagedfunction foo(x); end" @test Meta.parse("A=>B") == Expr(:call, :(=>), :A, :B) @@ -452,7 +452,7 @@ end @test Meta.parse("[a,;c]") == Expr(:vect, Expr(:parameters, :c), :a) @test Meta.parse("a[b,c;d]") == Expr(:ref, :a, Expr(:parameters, :d), :b, :c) @test Meta.parse("a[b,;d]") == Expr(:ref, :a, Expr(:parameters, :d), :b) -@test_throws ParseError Meta.parse("[a,;,b]") +@test_parseerror "[a,;,b]" @test Meta.parse("{a,b;c}") == Expr(:braces, Expr(:parameters, :c), :a, :b) @test Meta.parse("{a,;c}") == Expr(:braces, Expr(:parameters, :c), :a) @test Meta.parse("a{b,c;d}") == Expr(:curly, :a, Expr(:parameters, :d), :b, :c) @@ -538,10 +538,13 @@ for (str, tag) in Dict("" => :none, "\"" => :string, "#=" => :comment, "'" => :c "let;" => :block, "for i=1;" => :block, "function f();" => :block, "f() do x;" => :block, "module X;" => :block, "mutable struct X;" => :block, "struct X;" => :block, "(" => :other, "[" => :other, - "begin" => :other, "quote" => :other, - "let" => :other, "for" => :other, "function" => :other, + "for" => :other, "function" => :other, "f() do" => :other, "module" => :other, "mutable struct" => :other, - "struct" => :other) + "struct" => :other, + "quote" => using_JuliaSyntax ? :block : :other, + "let" => using_JuliaSyntax ? :block : :other, + "begin" => using_JuliaSyntax ? :block : :other, + ) @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag end @@ -626,7 +629,7 @@ end # issue 10046 for op in ["+", "-", "\$", "|", ".+", ".-", "*", ".*"] - @test_throws ParseError Meta.parse("$op in [+, -]") + @test_parseerror "$op in [+, -]" end # issue #17701 @@ -638,7 +641,7 @@ end # PR #15592 let str = "[1] [2]" - @test_throws ParseError Meta.parse(str) + @test_parseerror str end # issue 15896 and PR 15913 @@ -706,7 +709,7 @@ m1_exprs = get_expr_list(Meta.lower(@__MODULE__, quote @m1 end)) let low3 = Meta.lower(@__MODULE__, quote @m3 end) m3_exprs = get_expr_list(low3) ci = low3.args[1]::Core.CodeInfo - @test ci.codelocs == [3, 1] + @test ci.codelocs == [4, 2] @test is_return_ssavalue(m3_exprs[end]) end @@ -1001,14 +1004,14 @@ end @test Test21604.X(1.0) === Test21604.X(1.0) # issue #20575 -@test_throws ParseError Meta.parse("\"a\"x") -@test_throws ParseError Meta.parse("\"a\"begin end") -@test_throws ParseError Meta.parse("\"a\"begin end\"b\"") +@test_parseerror "\"a\"x" +@test_parseerror "\"a\"begin end" +@test_parseerror "\"a\"begin end\"b\"" # issue #16427 -@test_throws ParseError Meta.parse("for i=1:1 end(3)") -@test_throws ParseError Meta.parse("begin end(3)") -@test_throws ParseError Meta.parse("while false end(3)") +@test_parseerror "for i=1:1 end(3)" +@test_parseerror "begin end(3)" +@test_parseerror "while false end(3)" # comment 298107224 on pull #21607 module Test21607 @@ -1069,7 +1072,7 @@ end === (3, String) @test Meta.parse("3 +⁽¹⁾ 4") == Expr(:call, :+⁽¹⁾, 3, 4) @test Meta.parse("3 +₍₀₎ 4") == Expr(:call, :+₍₀₎, 3, 4) for bad in ('=', '$', ':', "||", "&&", "->", "<:") - @test_throws ParseError Meta.parse("3 $(bad)⁽¹⁾ 4") + @test_parseerror "3 $(bad)⁽¹⁾ 4" end @test Base.operator_precedence(:+̂) == Base.operator_precedence(:+) @@ -1084,20 +1087,20 @@ end Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) -@test_throws ParseError Meta.parse("[2for i=1:10]") -@test_throws ParseError Meta.parse("[1 for i in 1:2for j in 2]") -@test_throws ParseError Meta.parse("(1 for i in 1:2for j in 2)") +@test_parseerror "[2for i=1:10]" +@test_parseerror "[1 for i in 1:2for j in 2]" +@test_parseerror "(1 for i in 1:2for j in 2)" # issue #20441 -@test_throws ParseError Meta.parse("[x.2]") -@test_throws ParseError Meta.parse("x.2") +@test_parseerror "[x.2]" +@test_parseerror "x.2" @test Meta.parse("[x;.2]") == Expr(:vcat, :x, 0.2) # issue #22840 @test Meta.parse("[:a :b]") == Expr(:hcat, QuoteNode(:a), QuoteNode(:b)) # issue #22868 -@test_throws ParseError Meta.parse("x@time 2") -@test_throws ParseError Meta.parse("@ time") +@test_parseerror "x@time 2" +@test_parseerror "@ time" # issue #7479 @test Meta.lower(Main, Meta.parse("(true &&& false)")) == Expr(:error, "invalid syntax &false") @@ -1106,9 +1109,9 @@ end @test Meta.lower(Main, :(&(1, 2))) == Expr(:error, "invalid syntax &(1, 2)") # if an indexing expression becomes a cat expression, `end` is not special -@test_throws ParseError Meta.parse("a[end end]") -@test_throws ParseError Meta.parse("a[end;end]") -#@test_throws ParseError Meta.parse("a[end;]") # this is difficult to fix +@test_parseerror "a[end end]" +@test_parseerror "a[end;end]" +#@test_parseerror "a[end;]" # this is difficult to fix let a = rand(8), i = 3 @test a[[1:i-1; i+1:end]] == a[[1,2,4,5,6,7,8]] end @@ -1119,12 +1122,12 @@ end end for i = 1:5] == fill(nothing, 5) # issue #18912 -@test_throws ParseError Meta.parse("(::)") +@test_parseerror "(::)" @test Meta.parse(":(::)") == QuoteNode(Symbol("::")) -@test_throws ParseError Meta.parse("f(::) = ::") +@test_parseerror "f(::) = ::" @test Meta.parse("(::A)") == Expr(Symbol("::"), :A) -@test_throws ParseError Meta.parse("(::, 1)") -@test_throws ParseError Meta.parse("(1, ::)") +@test_parseerror "(::, 1)" +@test_parseerror "(1, ::)" # issue #18650 let ex = Meta.parse("maximum(@elapsed sleep(1) for k = 1:10)") @@ -1186,17 +1189,20 @@ end @test Meta.parse("@Mdl.foo [1] + [2]") == Meta.parse("@Mdl.foo([1] + [2])") # issue #24289 +module M24289 macro m24289() :(global $(esc(:x24289)) = 1) end -@test (@macroexpand @m24289) == :(global x24289 = 1) +end +M24289.@m24289 +@test x24289 === 1 # parsing numbers with _ and . @test Meta.parse("1_2.3_4") == 12.34 -@test_throws ParseError Meta.parse("1._") -@test_throws ParseError Meta.parse("1._5") -@test_throws ParseError Meta.parse("1e.3") -@test_throws ParseError Meta.parse("1e3.") +@test_parseerror "1._" +@test_parseerror "1._5" +@test_parseerror "1e.3" +@test_parseerror "1e3." @test Meta.parse("2e_1") == Expr(:call, :*, 2, :e_1) # issue #17705 @test Meta.parse("2e3_") == Expr(:call, :*, 2e3, :_) @@ -1262,8 +1268,10 @@ end @test raw"x \\\ y" == "x \\\\\\ y" end -@test_throws ParseError("expected \"}\" or separator in arguments to \"{ }\"; got \"V)\"") Meta.parse("f(x::V) where {V) = x") -@test_throws ParseError("expected \"]\" or separator in arguments to \"[ ]\"; got \"1)\"") Meta.parse("[1)") +@test_parseerror("f(x::V) where {V) = x", + "expected \"}\" or separator in arguments to \"{ }\"; got \"V)\"") +@test_parseerror("[1)", + "expected \"]\" or separator in arguments to \"[ ]\"; got \"1)\"") # issue #9972 @test Meta.lower(@__MODULE__, :(f(;3))) == Expr(:error, "invalid keyword argument syntax \"3\"") @@ -1311,7 +1319,7 @@ let getindex = 0, setindex! = 1, colon = 2, vcat = 3, hcat = 4, hvcat = 5 end # issue #25020 -@test_throws ParseError Meta.parse("using Colors()") +@test_parseerror "using Colors()" let ex = Meta.parse("md\"x\" f(x) = x", 1)[1] # custom string literal is not a docstring @@ -1365,18 +1373,18 @@ end @test Meta.parse("-(x;;;)^2") == Expr(:call, :-, Expr(:call, :^, Expr(:block, :x), 2)) @test Meta.parse("+((1,2))") == Expr(:call, :+, Expr(:tuple, 1, 2)) -@test_throws ParseError("space before \"(\" not allowed in \"+ (\" at none:1") Meta.parse("1 -+ (a=1, b=2)") +@test_parseerror "1 -+ (a=1, b=2)" "space before \"(\" not allowed in \"+ (\" at none:1" # issue #29781 -@test_throws ParseError("space before \"(\" not allowed in \"sin. (\" at none:1") Meta.parse("sin. (1)") +@test_parseerror "sin. (1)" "space before \"(\" not allowed in \"sin. (\" at none:1" # Parser errors for disallowed space contain line numbers -@test_throws ParseError("space before \"[\" not allowed in \"f() [\" at none:2") Meta.parse("\nf() [i]") -@test_throws ParseError("space before \"(\" not allowed in \"f() (\" at none:2") Meta.parse("\nf() (i)") -@test_throws ParseError("space before \".\" not allowed in \"f() .\" at none:2") Meta.parse("\nf() .i") -@test_throws ParseError("space before \"{\" not allowed in \"f() {\" at none:2") Meta.parse("\nf() {i}") -@test_throws ParseError("space before \"m\" not allowed in \"@ m\" at none:2") Meta.parse("\n@ m") -@test_throws ParseError("space before \".\" not allowed in \"a .\" at none:2") Meta.parse("\nusing a .b") -@test_throws ParseError("space before \".\" not allowed in \"a .\" at none:2") Meta.parse("\nusing a .b") -@test_throws ParseError("space before \"(\" not allowed in \"+ (\" at none:2") Meta.parse("\n+ (x, y)") +@test_parseerror "\nf() [i]" "space before \"[\" not allowed in \"f() [\" at none:2" +@test_parseerror "\nf() (i)" "space before \"(\" not allowed in \"f() (\" at none:2" +@test_parseerror "\nf() .i" "space before \".\" not allowed in \"f() .\" at none:2" +@test_parseerror "\nf() {i}" "space before \"{\" not allowed in \"f() {\" at none:2" +@test_parseerror "\n@ m" "space before \"m\" not allowed in \"@ m\" at none:2" +@test_parseerror "\nusing a .b" "space before \".\" not allowed in \"a .\" at none:2" +@test_parseerror "\nusing a .b" "space before \".\" not allowed in \"a .\" at none:2" +@test_parseerror "\n+ (x, y)" "space before \"(\" not allowed in \"+ (\" at none:2" @test Meta.parse("1 -+(a=1, b=2)") == Expr(:call, :-, 1, Expr(:call, :+, Expr(:kw, :a, 1), Expr(:kw, :b, 2))) @@ -1398,7 +1406,7 @@ end @test Meta.parse("-√2") == Expr(:call, :-, Expr(:call, :√, 2)) @test Meta.parse("√3x^2") == Expr(:call, :*, Expr(:call, :√, 3), Expr(:call, :^, :x, 2)) @test Meta.parse("-3x^2") == Expr(:call, :*, -3, Expr(:call, :^, :x, 2)) -@test_throws ParseError Meta.parse("2!3") +@test_parseerror "2!3" # issue #27914 @test Meta.parse("2f(x)") == Expr(:call, :*, 2, Expr(:call, :f, :x)) @@ -1408,7 +1416,7 @@ end @test Meta.parse("2(x)") == Expr(:call, :*, 2, :x) @test Meta.parse("2(x)y") == Expr(:call, :*, 2, :x, :y) -@test_throws ParseError Meta.parse("a.: b") +@test_parseerror "a.: b" @test Meta.parse("a.:end") == Expr(:., :a, QuoteNode(:end)) @test Meta.parse("a.:catch") == Expr(:., :a, QuoteNode(:catch)) @test Meta.parse("a.end") == Expr(:., :a, QuoteNode(:end)) @@ -1424,7 +1432,7 @@ let len = 10 end # Module name cannot be a reserved word. -@test_throws ParseError Meta.parse("module module end") +@test_parseerror "module module end" @test Meta.lower(@__MODULE__, :(global true)) == Expr(:error, "invalid syntax in \"global\" declaration") @test Meta.lower(@__MODULE__, :(let ccall end)) == Expr(:error, "invalid identifier name \"ccall\"") @@ -1441,7 +1449,7 @@ end # issue #27690 # previously, this was allowed since it thought `end` was being used for indexing. # however the quote should disable that context. -@test_throws ParseError Meta.parse("Any[:(end)]") +@test_parseerror "Any[:(end)]" # issue #17781 let ex = Meta.lower(@__MODULE__, Meta.parse(" @@ -1664,26 +1672,28 @@ end macro foo28244(sym) x = :(bar()) push!(x.args, Expr(sym)) - x + esc(x) +end +@test @macroexpand(@foo28244(kw)) == Expr(:call, :bar, Expr(:kw)) +let x = @macroexpand @foo28244(var"let") + @test Meta.lower(@__MODULE__, x) == Expr(:error, "malformed expression") end -@test (@macroexpand @foo28244(kw)) == Expr(:call, GlobalRef(@__MODULE__,:bar), Expr(:kw)) -@test eval(:(@macroexpand @foo28244($(Symbol("let"))))) == Expr(:error, "malformed expression") # #16356 -@test_throws ParseError Meta.parse("0xapi") +@test_parseerror "0xapi" # #22523 #22712 -@test_throws ParseError Meta.parse("a?b:c") -@test_throws ParseError Meta.parse("a ?b:c") -@test_throws ParseError Meta.parse("a ? b:c") -@test_throws ParseError Meta.parse("a ? b :c") -@test_throws ParseError Meta.parse("?") +@test_parseerror "a?b:c" +@test_parseerror "a ?b:c" +@test_parseerror "a ? b:c" +@test_parseerror "a ? b :c" +@test_parseerror "?" # #13079 @test Meta.parse("1<<2*3") == :((1<<2)*3) # #19987 -@test_throws ParseError Meta.parse("try ; catch f() ; end") +@test_parseerror "try ; catch f() ; end" # #23076 @test :([1,2;]) == Expr(:vect, Expr(:parameters), 1, 2) @@ -1720,8 +1730,8 @@ end @test Meta.lower(@__MODULE__, :(f(x) = (y = x + 1; ccall((:a, y), Cvoid, ())))) == Expr(:error, "ccall function name and library expression cannot reference local variables") -@test_throws ParseError Meta.parse("x.'") -@test_throws ParseError Meta.parse("0.+1") +@test_parseerror "x.'" +@test_parseerror "0.+1" # #24221 @test Meta.isexpr(Meta.lower(@__MODULE__, :(a=_)), :error) @@ -1815,7 +1825,7 @@ end @test Meta.parse("1⁝2") == Expr(:call, :⁝, 1, 2) @test Meta.parse("1..2") == Expr(:call, :.., 1, 2) # we don't parse chains of these since the associativity and meaning aren't clear -@test_throws ParseError Meta.parse("1..2..3") +@test_parseerror "1..2..3" # issue #30048 @test Meta.isexpr(Meta.lower(@__MODULE__, :(for a in b @@ -1932,8 +1942,8 @@ macro id28992(x) x end @test Meta.@lower(.+(a,b) = 0) == Expr(:error, "invalid function name \".+\"") @test Meta.@lower((.+)(a,b) = 0) == Expr(:error, "invalid function name \"(.+)\"") let m = @__MODULE__ - @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\"") - @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\"") + @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\" around $(@__FILE__):$(@__LINE__)") + @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\" around $(@__FILE__):$(@__LINE__)") end @test @id28992([1] .< [2] .< [3]) == [true] @test @id28992(2 ^ -2) == 0.25 @@ -1989,9 +1999,9 @@ end @test Meta.parse("var\"#\"") === Symbol("#") @test Meta.parse("var\"true\"") === Symbol("true") @test Meta.parse("var\"false\"") === Symbol("false") -@test_throws ParseError Meta.parse("var\"#\"x") # Reject string macro-like suffix -@test_throws ParseError Meta.parse("var \"#\"") -@test_throws ParseError Meta.parse("var\"for\" i = 1:10; end") +@test_parseerror "var\"#\"x" # Reject string macro-like suffix +@test_parseerror "var \"#\"" +@test_parseerror "var\"for\" i = 1:10; end" # A few cases which would be ugly to deal with if var"#" were a string macro: @test Meta.parse("var\"#\".var\"a-b\"") == Expr(:., Symbol("#"), QuoteNode(Symbol("a-b"))) @test Meta.parse("export var\"#\"") == Expr(:export, Symbol("#")) @@ -2216,7 +2226,7 @@ end end # line break in : expression disallowed -@test_throws Meta.ParseError Meta.parse("[1 :\n2] == [1:2]") +@test_parseerror "[1 :\n2] == [1:2]" # added ⟂ to operator precedence (#24404) @test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c) @@ -2237,7 +2247,8 @@ end end # only allow certain characters after interpolated vars (#25231) -@test Meta.parse("\"\$x෴ \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.") +@test_parseerror("\"\$x෴ \"", + "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.") @test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) === :string @testset "issue #30341" begin @@ -2276,14 +2287,11 @@ end err = Expr( :error, - "\":\" in \"$imprt\" syntax can only be used when importing a single module. " * - "Split imports into multiple lines." ) - ex = Meta.parse("$imprt A, B: x, y", raise=false) - @test ex == err - - ex = Meta.parse("$imprt A: x, B: y", raise=false) - @test ex == err + @test_parseerror("$imprt A, B: x, y", + "\":\" in \"$imprt\" syntax can only be used when importing a single module. Split imports into multiple lines.") + @test_parseerror("$imprt A: x, B: y", + "\":\" in \"$imprt\" syntax can only be used when importing a single module. Split imports into multiple lines.") end end @@ -2303,24 +2311,31 @@ let exc = try eval(:(f(x,x)=1)) catch e ; e ; end @test !occursin("incorrect_file", exc.msg) end -# issue #34967 -@test_throws LoadError("string", 2, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__, - "x34967 = 1\n# Halloa\xf5b\nx34967 = 2") -@test x34967 == 1 -@test_throws LoadError("string", 1, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__, - "x\xf5 = 3\n# Halloa\xf5b\nx34967 = 4") -@test_throws LoadError("string", 3, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__, - """ - # line 1 - # line 2 - # Hello\xf5b - x34967 = 6 - """) - -@test Meta.parse("aa\u200b_", raise=false) == - Expr(:error, "invisible character \\u200b near column 3") -@test Meta.parse("aa\UE0080", raise=false) == - Expr(:error, "invalid character \"\Ue0080\" near column 3") +@testset "issue #34967" begin + @test_parseerror "#\xf5b\nx" "invalid UTF-8 sequence" + + # Test line UTF-8 errors with line numbers + let ex = Meta.parseall("x\n#\xf5b\ny") + @test Meta.isexpr(ex, :toplevel, 4) && Meta.isexpr(last(ex.args), :error) + @test ex.args[3] == LineNumberNode(2,:none) + end + let ex = Meta.parseall("x\xf5\n#\xf5b\ny") + @test Meta.isexpr(ex, :toplevel, 2) && Meta.isexpr(last(ex.args), :error) + @test ex.args[1] == LineNumberNode(1,:none) + end + let ex = Meta.parseall("#line1\n#line2\n#\xf5b\ny") + @test Meta.isexpr(ex, :toplevel, 2) && Meta.isexpr(last(ex.args), :error) + @test ex.args[1] == LineNumberNode(3,:none) + end +end + +@test_parseerror "aa\u200b_" "invisible character \\u200b near column 3" +@test_parseerror "aa\UE0080" "invalid character \"\Ue0080\" near column 3" + +@testset "unrecognized escapes in string/char literals" begin + @test_parseerror "\"\\.\"" + @test_parseerror "\'\\.\'" +end # issue #31238 a31238, b31238 = let x @@ -2389,8 +2404,8 @@ end @test x == 6 # issue #36196 -@test_throws ParseError("\"for\" at none:1 expected \"end\", got \")\"") Meta.parse("(for i=1; println())") -@test_throws ParseError("\"try\" at none:1 expected \"end\", got \")\"") Meta.parse("(try i=1; println())") +@test_parseerror "(for i=1; println())" "\"for\" at none:1 expected \"end\", got \")\"" +@test_parseerror "(try i=1; println())" "\"try\" at none:1 expected \"end\", got \")\"" # issue #36272 macro m36272() @@ -2437,10 +2452,10 @@ end let (-->) = (+) @test (40 --> 2) == 42 end -@test_throws ParseError("invalid operator \"<---\"") Meta.parse("1<---2") -@test_throws ParseError("invalid operator \".<---\"") Meta.parse("1 .<--- 2") -@test_throws ParseError("invalid operator \"--\"") Meta.parse("a---b") -@test_throws ParseError("invalid operator \".--\"") Meta.parse("a.---b") +@test_parseerror("1<---2", "invalid operator \"<---\"") +@test_parseerror("1 .<--- 2", "invalid operator \".<---\"") +@test_parseerror("a---b", "invalid operator \"--\"") +@test_parseerror("a.---b", "invalid operator \".--\"") # issue #37228 # NOTE: the `if` needs to be at the top level @@ -2475,15 +2490,14 @@ end @test :(if true 'a' else 1 end) == Expr(:if, true, quote 'a' end, quote 1 end) # issue #37664 -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a b") -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==#b") -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a #==#b") -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==# b") - -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 2") -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==#2") -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 #==#2") -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==# 2") +@test_parseerror("a b", "extra token \"b\" after end of expression") +@test_parseerror("a#==#b", "extra token \"b\" after end of expression") +@test_parseerror("a #==#b", "extra token \"b\" after end of expression") +@test_parseerror("a#==# b", "extra token \"b\" after end of expression") +@test_parseerror("1 2", "extra token \"2\" after end of expression") +@test_parseerror("1#==#2", "extra token \"2\" after end of expression") +@test_parseerror("1 #==#2", "extra token \"2\" after end of expression") +@test_parseerror("1#==# 2", "extra token \"2\" after end of expression") @test size([1#==#2#==#3]) == size([1 2 3]) @test size([1#==#2#==#3]) == size([1 2 3]) # tabs @@ -2506,9 +2520,7 @@ end Meta.parse("if#==#x0#==#y+1#==#else#==#z#==#end") @test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end") @test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c") -@test_throws ParseError("space before \"(\" not allowed in \"f (\" at none:1") begin - Meta.parse("f#==#(x)=x") -end +@test_parseerror("f#==#(x)=x", "space before \"(\" not allowed in \"f (\" at none:1") @test Meta.parse("try f() catch e g() finally h() end") == Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end") @test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b") @@ -2540,11 +2552,11 @@ end @test B37890(1.0, 2.0f0) isa B37890{Int, Int8} # import ... as -@test_throws ParseError("invalid syntax \"using A as ...\"") Meta.parse("using A as B") -@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using A.b as B") -@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using X, A.b as B") -@test_throws ParseError("invalid syntax \"import A as B:\"") Meta.parse("import A as B: c") -@test_throws ParseError("invalid syntax \"import A.b as B:\"") Meta.parse("import A.b as B: c") +@test_parseerror("using A as B", "invalid syntax \"using A as ...\"") +@test_parseerror("using A.b as B", "invalid syntax \"using A.b as ...\"") +@test_parseerror("using X, A.b as B", "invalid syntax \"using A.b as ...\"") +@test_parseerror("import A as B: c", "invalid syntax \"import A as B:\"") +@test_parseerror("import A.b as B: c", "invalid syntax \"import A.b as B:\"") module TestImportAs using Test @@ -2583,7 +2595,9 @@ import .Mod2.y as y2 @test y2 == 2 @test !@isdefined(y) -@test_throws ErrorException eval(:(import .Mod.x as (a.b))) +# Test that eval rejects the invalid syntax `import .Mod.x as (a.b)` +@test_throws ErrorException eval( + Expr(:import, Expr(:as, Expr(:., :., :Mod, :x), Expr(:., :a, QuoteNode(:b))))) import .Mod.maybe_undef as mu @test_throws UndefVarError mu @@ -2639,10 +2653,10 @@ import .TestImportAs.Mod2 as M2 end @testset "issue #37393" begin - @test :(for outer i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) + @test remove_linenums!(:(for outer i = 1:3; end)) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) i = :i - @test :(for outer $i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) - @test :(for outer = 1:3; end) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;)) + @test remove_linenums!(:(for outer $i = 1:3; end)) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) + @test remove_linenums!(:(for outer = 1:3; end)) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;)) # TIL that this is possible for outer $ i = 1:3 @test 1 $ 2 in 1:3 @@ -2661,10 +2675,10 @@ end @test Meta.isexpr(Meta.parse(""" f(i for i in 1:3)""").args[2], :generator) - @test_throws Meta.ParseError Meta.parse(""" + @test_parseerror """ for i in 1:3 - end""") + end""" end # PR #37973 @@ -2819,7 +2833,7 @@ end Expr(:nrow, 1, Expr(:row, 0, 9, 3), Expr(:row, 4, 5, 4))) @test :([1 ; 2 ;; 3 ; 4]) == Expr(:ncat, 2, Expr(:nrow, 1, 1, 2), Expr(:nrow, 1, 3, 4)) - @test_throws ParseError Meta.parse("[1 2 ;; 3 4]") # cannot mix spaces and ;; except as line break + @test_parseerror "[1 2 ;; 3 4]" # cannot mix spaces and ;; except as line break @test :([1 2 ;; 3 4]) == :([1 2 3 4]) @test :([1 2 ;; @@ -2829,8 +2843,8 @@ end @test Meta.parse("[1;\n\n]") == :([1;]) @test Meta.parse("[1\n;]") == :([1;]) # semicolons following a linebreak are fine @test Meta.parse("[1\n;;; 2]") == :([1;;; 2]) - @test_throws ParseError Meta.parse("[1;\n;2]") # semicolons cannot straddle a line break - @test_throws ParseError Meta.parse("[1; ;2]") # semicolons cannot be separated by a space + @test_parseerror "[1;\n;2]" # semicolons cannot straddle a line break + @test_parseerror "[1; ;2]" # semicolons cannot be separated by a space end # issue #25652 @@ -2900,13 +2914,13 @@ macro m_underscore_hygiene() return :(_ = 1) end -@test @macroexpand(@m_underscore_hygiene()) == :(_ = 1) +@test Meta.@lower(@m_underscore_hygiene()) === 1 macro m_begin_hygiene(a) return :($(esc(a))[begin]) end -@test @m_begin_hygiene([1, 2, 3]) == 1 +@test @m_begin_hygiene([1, 2, 3]) === 1 # issue 40258 @test "a $("b $("c")")" == "a b c" @@ -3103,10 +3117,10 @@ end @test fails(error) @test !fails(() -> 1 + 2) - @test_throws ParseError Meta.parse("try foo() else bar() end") - @test_throws ParseError Meta.parse("try foo() else bar() catch; baz() end") - @test_throws ParseError Meta.parse("try foo() catch; baz() finally foobar() else bar() end") - @test_throws ParseError Meta.parse("try foo() finally foobar() else bar() catch; baz() end") + @test_parseerror "try foo() else bar() end" + @test_parseerror "try foo() else bar() catch; baz() end" + @test_parseerror "try foo() catch; baz() finally foobar() else bar() end" + @test_parseerror "try foo() finally foobar() else bar() catch; baz() end" err = try try @@ -3171,23 +3185,23 @@ end @test x == 1 end -@test_throws ParseError Meta.parse(""" +@test_parseerror """ function checkUserAccess(u::User) if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066" return true end return false end -""") +""" -@test_throws ParseError Meta.parse(""" +@test_parseerror """ function checkUserAccess(u::User) #=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =# return true #= end admin only \u202e \u2066end\u2069 \u2066=# return false end -""") +""" @testset "empty nd arrays" begin @test :([]) == Expr(:vect) @@ -3218,16 +3232,22 @@ end ;; ]) == Expr(:ncat, 2) - @test_throws ParseError Meta.parse("[; ;]") - @test_throws ParseError Meta.parse("[;; ;]") - @test_throws ParseError Meta.parse("[;\n;]") + @test_parseerror "[; ;]" + @test_parseerror "[;; ;]" + @test_parseerror "[;\n;]" end @test Meta.parseatom("@foo", 1; filename="foo", lineno=7) == (Expr(:macrocall, :var"@foo", LineNumberNode(7, :foo)), 5) @test Meta.parseall("@foo"; filename="foo", lineno=3) == Expr(:toplevel, LineNumberNode(3, :foo), Expr(:macrocall, :var"@foo", LineNumberNode(3, :foo))) -let ex = :(const $(esc(:x)) = 1; (::typeof(2))() = $(esc(:x))) - @test macroexpand(Main, Expr(:var"hygienic-scope", ex, Main)).args[3].args[1] == :((::$(GlobalRef(Main, :typeof))(2))()) +module M43993 +function foo43993 end +const typeof = error +end +let ex = :(const $(esc(:x)) = 1; (::typeof($(esc(:foo43993))))() = $(esc(:x))) + Core.eval(M43993, Expr(:var"hygienic-scope", ex, Core)) + @test M43993.x === 1 + @test invokelatest(M43993.foo43993) === 1 end struct Foo44013 @@ -3408,14 +3428,12 @@ f45162(f) = f(x=1) @test first(methods(f45162)).called != 0 # issue #45024 -@test_throws ParseError("expected assignment after \"const\"") Meta.parse("const x") -@test_throws ParseError("expected assignment after \"const\"") Meta.parse("const x::Int") +@test_parseerror "const x" "expected assignment after \"const\"" +@test_parseerror "const x::Int" "expected assignment after \"const\"" # these cases have always been caught during lowering, since (const (global x)) is not # ambiguous with the lowered form (const x), but that could probably be changed. -@test Meta.lower(@__MODULE__, :(global const x)) == Expr(:error, "expected assignment after \"const\"") -@test Meta.lower(@__MODULE__, :(global const x::Int)) == Expr(:error, "expected assignment after \"const\"") -@test Meta.lower(@__MODULE__, :(const global x)) == Expr(:error, "expected assignment after \"const\"") -@test Meta.lower(@__MODULE__, :(const global x::Int)) == Expr(:error, "expected assignment after \"const\"") +@test Meta.lower(@__MODULE__, Expr(:const, Expr(:global, :x))) == Expr(:error, "expected assignment after \"const\"") +@test Meta.lower(@__MODULE__, Expr(:const, Expr(:global, Expr(:(::), :x, :Int)))) == Expr(:error, "expected assignment after \"const\"") @testset "issue 25072" begin @test '\xc0\x80' == reinterpret(Char, 0xc0800000) @@ -3472,3 +3490,20 @@ end @test @_macroexpand(global (; x::S, $(esc(:y))::$(esc(:T))) = a) == :(global (; x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a))) end + +# issue #49920 +let line1 = (quote end).args[1], + line2 = (quote end).args[1], + line3 = (quote end).args[1] + @test 1 === eval(Meta.lower(Main, Expr(:block, line1, 1, line2, line3))) +end + +# issue #49984 +macro z49984(s); :(let a; $(esc(s)); end); end +@test let a = 1; @z49984(a) === 1; end + +# issues #37783, #39929, #42552, #43379, and #48332 +let x = 1 => 2 + @test_throws ErrorException @eval a => b = 2 + @test_throws "function Base.=> must be explicitly imported to be extended" @eval a => b = 2 +end diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl index 17970f0b0572e..f63b5460c7c16 100644 --- a/test/testhelpers/Furlongs.jl +++ b/test/testhelpers/Furlongs.jl @@ -74,7 +74,7 @@ for op in (:+, :-) end end for op in (:(==), :(!=), :<, :<=, :isless, :isequal) - @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val) + @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val)::Bool end for (f,op) in ((:_plus,:+),(:_minus,:-),(:_times,:*),(:_div,://)) @eval function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q} diff --git a/test/threadpool_use.jl b/test/threadpool_use.jl index e5ea5f95cf4ff..7523991fdf6a7 100644 --- a/test/threadpool_use.jl +++ b/test/threadpool_use.jl @@ -9,5 +9,11 @@ using Base.Threads @test fetch(Threads.@spawn Threads.threadpool()) === :default @test fetch(Threads.@spawn :default Threads.threadpool()) === :default @test fetch(Threads.@spawn :interactive Threads.threadpool()) === :interactive +tp = :default +@test fetch(Threads.@spawn tp Threads.threadpool()) === :default +tp = :interactive +@test fetch(Threads.@spawn tp Threads.threadpool()) === :interactive +tp = :foo +@test_throws ArgumentError Threads.@spawn tp Threads.threadpool() @test Threads.threadpooltids(:interactive) == [1] @test Threads.threadpooltids(:default) == [2] diff --git a/test/threads.jl b/test/threads.jl index af752fe715b0e..8189311739e31 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -312,7 +312,7 @@ close(proc.in) if ( !success(proc) ) || ( timeout ) @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout end - if Sys.iswindows() + if Sys.iswindows() || Sys.isapple() # Known failure: https://github.com/JuliaLang/julia/issues/43124 @test_skip success(proc) else