From 459c79ce363690a1068827234baf2b7c63a03559 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Thu, 1 Sep 2022 07:56:09 +0300 Subject: [PATCH 01/24] make pending_perm a heap instead of sorted vector in optimizer (#46586) --- base/compiler/ssair/ir.jl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index df975bf17d205..587c64f11062d 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -560,9 +560,8 @@ mutable struct IncrementalCompact # This supports insertion while compacting new_new_nodes::NewNodeStream # New nodes that were before the compaction point at insertion time new_new_used_ssas::Vector{Int} - # TODO: Switch these two to a min-heap of some sort pending_nodes::NewNodeStream # New nodes that were after the compaction point at insertion time - pending_perm::Vector{Int} + pending_perm::Vector{Int} # pending_nodes.info[pending_perm] is in min-heap order by pos # State idx::Int @@ -769,9 +768,7 @@ end function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool) node = add!(compact.pending_nodes, pos, attach_after) - # TODO: switch this to `l = length(pending_nodes); splice!(pending_perm, searchsorted(pending_perm, l), l)` - push!(compact.pending_perm, length(compact.pending_nodes)) - sort!(compact.pending_perm, DEFAULT_STABLE, Order.By(x->compact.pending_nodes.info[x].pos, Order.Forward)) + heappush!(compact.pending_perm, length(compact.pending_nodes), By(x -> compact.pending_nodes.info[x].pos)) return node end @@ -1451,7 +1448,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= if !(info.attach_after ? info.pos <= compact.idx - 1 : info.pos <= compact.idx) break end - popfirst!(compact.pending_perm) + heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos)) end # Move to next block compact.idx += 1 @@ -1473,7 +1470,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= elseif !isempty(compact.pending_perm) && (info = compact.pending_nodes.info[compact.pending_perm[1]]; info.attach_after ? info.pos == idx - 1 : info.pos == idx) - new_idx = popfirst!(compact.pending_perm) + new_idx = heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos)) new_node_entry = compact.pending_nodes.stmts[new_idx] new_node_info = compact.pending_nodes.info[new_idx] new_idx += length(compact.ir.stmts) + length(compact.ir.new_nodes) From 5154cf0bba77eac1c990acbf231b5f63a58ef145 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Fri, 2 Sep 2022 07:42:27 +0700 Subject: [PATCH 02/24] Replace Vector with BitSet in optimization passes (#46587) --- base/compiler/ssair/passes.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 70ae94e611a1f..9dc918a865c13 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -72,7 +72,7 @@ function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::E return try_compute_fieldidx(typ, field) end -function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int) +function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int) # TODO: This can be much faster by looking at current level and only # searching for those blocks in a sorted order while !(curblock in allblocks) && curblock !== 0 @@ -92,7 +92,7 @@ function val_for_def_expr(ir::IRCode, def::Int, fidx::Int) end end -function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) +function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) curblock = find_curblock(domtree, allblocks, curblock) def = 0 for stmt in du.defs @@ -103,7 +103,7 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx) end -function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, +function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use::Int) def, useblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use) if def == 0 @@ -122,7 +122,7 @@ end # even when the allocation contains an uninitialized field, we try an extra effort to check # if this load at `idx` have any "safe" `setfield!` calls that define the field function has_safe_def( - ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, + ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, newidx::Int, idx::Int) def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx) # will throw since we already checked this `:new` site doesn't define this field @@ -157,7 +157,7 @@ end # find the first dominating def for the given use function find_def_for_use( - ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use::Int, inclusive::Bool=false) + ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, use::Int, inclusive::Bool=false) useblock = block_for_inst(ir.cfg, use) curblock = find_curblock(domtree, allblocks, useblock) local def = 0 @@ -1226,7 +1226,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse # but we should come up with semantics for well defined semantics # for uninitialized fields first. ndefuse = length(fielddefuse) - blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse) + blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse) for fidx in 1:ndefuse du = fielddefuse[fidx] isempty(du.uses) && continue @@ -1237,7 +1237,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse else phiblocks = iterated_dominance_frontier(ir.cfg, ldu, get!(lazydomtree)) end - allblocks = sort!(vcat(phiblocks, ldu.def_bbs); alg=QuickSort) + allblocks = union!(BitSet(phiblocks), ldu.def_bbs) blocks[fidx] = phiblocks, allblocks if fidx + 1 > length(defexpr.args) for i = 1:length(du.uses) From e3d30d73100629738e8fb0708b02efc286fe91d4 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Thu, 8 Sep 2022 08:47:51 +0600 Subject: [PATCH 03/24] inline final use of sort! in compiler --- base/compiler/ssair/ir.jl | 42 ++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 587c64f11062d..b2a0852a08302 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -535,13 +535,31 @@ insert_node!(ir::IRCode, pos::Int, inst::NewInstruction, attach_after::Bool=fals insert_node!(ir, SSAValue(pos), inst, attach_after) # For bootstrapping -function my_sortperm(v) - p = Vector{Int}(undef, length(v)) - for i = 1:length(v) - p[i] = i +function my_sort!(v::Vector; by::Function) + isempty(v) && return v # This branch is hit 95% of the time + + if length(v) > 30 # Comb pass avoids quadratic runtime + interval = (3 * length(v)) >> 2 + while interval > 1 + @inbounds for j in 1:length(v)-interval + a, b = v[j], v[j+interval] + v[j], v[j+interval] = by(b) < by(a) ? (b, a) : (a, b) + end + interval = (3 * interval) >> 2 + end + end + + @inbounds for i in 2:length(v) # Insertion sort + x = v[i] + y = by(x) + while i > 1 && y < by(v[i-1]) + v[i] = v[i-1] + i -= 1 + end + v[i] = x end - sort!(p, Sort.DEFAULT_UNSTABLE, Order.Perm(Sort.Forward,v)) - p + + v end mutable struct IncrementalCompact @@ -573,10 +591,9 @@ mutable struct IncrementalCompact function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) # Sort by position with attach after nodes after regular ones - perm = my_sortperm(Int[let new_node = code.new_nodes.info[i] - (new_node.pos * 2 + Int(new_node.attach_after)) - end for i in 1:length(code.new_nodes)]) - new_len = length(code.stmts) + length(code.new_nodes) + info = code.new_nodes.info + perm = my_sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i)) + new_len = length(code.stmts) + length(info) result = InstructionStream(new_len) used_ssas = fill(0, new_len) new_new_used_ssas = Vector{Int}() @@ -628,8 +645,9 @@ mutable struct IncrementalCompact # For inlining function IncrementalCompact(parent::IncrementalCompact, code::IRCode, result_offset) - perm = my_sortperm(Int[code.new_nodes.info[i].pos for i in 1:length(code.new_nodes)]) - new_len = length(code.stmts) + length(code.new_nodes) + info = code.new_nodes.info + perm = my_sort!(collect(eachindex(info)); by=i->(info[i].pos, i)) + new_len = length(code.stmts) + length(info) ssa_rename = Any[SSAValue(i) for i = 1:new_len] bb_rename = Vector{Int}() pending_nodes = NewNodeStream() From 52526994c975bd730fe5a4c307edddb0601c94c6 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 15:12:26 +0600 Subject: [PATCH 04/24] Make Sort a stdlib (big diff) --- base/Base.jl | 8 +- base/bitset.jl | 1 - base/compiler/compiler.jl | 4 +- base/compiler/sort.jl | 97 +++++++++++++++++++ base/compiler/ssair/ir.jl | 32 +----- base/cpuid.jl | 2 +- base/range.jl | 13 --- base/reflection.jl | 4 +- base/sysimg.jl | 5 +- base/util.jl | 7 +- contrib/print_sorted_stdlibs.jl | 4 +- stdlib/LibGit2/Project.toml | 1 + stdlib/Sort/Project.toml | 12 +++ stdlib/Sort/docs/src/index.md | 3 + base/sort.jl => stdlib/Sort/src/Sort.jl | 35 +++---- stdlib/Sort/src/ranges.jl | 10 ++ .../Sort/test/runtests.jl | 4 +- 17 files changed, 155 insertions(+), 87 deletions(-) create mode 100644 base/compiler/sort.jl create mode 100644 stdlib/Sort/Project.toml create mode 100644 stdlib/Sort/docs/src/index.md rename base/sort.jl => stdlib/Sort/src/Sort.jl (99%) create mode 100644 stdlib/Sort/src/ranges.jl rename test/sorting.jl => stdlib/Sort/test/runtests.jl (99%) diff --git a/base/Base.jl b/base/Base.jl index 63728fdba3e4e..f74476ce03aa8 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -346,10 +346,8 @@ include("ordering.jl") using .Order # Combinatorics -include("sort.jl") -using .Sort -# BinaryPlatforms, used by Artifacts. Needs `Sort`. +# BinaryPlatforms, used by Artifacts. include("binaryplatforms.jl") # Fast math @@ -465,8 +463,6 @@ for match = _methods(+, (Int, Int), -1, get_world_counter()) Dict("abc" => Set())["abc"] pushfirst!([], sum) get(Base.pkgorigins, Base.PkgId(Base), nothing) - sort!([1,2,3]) - unique!([1,2,3]) cumsum([1,2,3]) append!(Int[], BitSet()) isempty(BitSet()) @@ -477,7 +473,7 @@ for match = _methods(+, (Int, Int), -1, get_world_counter()) any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))]) # Code loading uses this - sortperm(mtime.(readdir(".")), rev=true) + mtime.(readdir(".", sort=false)) # JLLWrappers uses these Dict{UUID,Set{String}}()[UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}() get!(Set{String}, Dict{UUID,Set{String}}(), UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")) diff --git a/base/bitset.jl b/base/bitset.jl index 8727b857bd36b..435898e984b8a 100644 --- a/base/bitset.jl +++ b/base/bitset.jl @@ -430,4 +430,3 @@ end minimum(s::BitSet) = first(s) maximum(s::BitSet) = last(s) extrema(s::BitSet) = (first(s), last(s)) -issorted(s::BitSet) = true diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl index 3c41c353e86ad..41f1eeedd3988 100644 --- a/base/compiler/compiler.jl +++ b/base/compiler/compiler.jl @@ -121,12 +121,10 @@ import Core.Compiler.CoreDocs Core.atdoc!(CoreDocs.docm) # sorting -function sort! end function issorted end include("ordering.jl") using .Order -include("sort.jl") -using .Sort +include("compiler/sort.jl") # We don't include some.jl, but this definition is still useful. something(x::Nothing, y...) = something(y...) diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl new file mode 100644 index 0000000000000..5c5e2016f26ad --- /dev/null +++ b/base/compiler/sort.jl @@ -0,0 +1,97 @@ +# reference on sorted binary search: +# http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary + +# index of the first value of vector a that is greater than or equal to x; +# returns lastindex(v)+1 if x is greater than all values in v. +function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer + hi = hi + T(1) + len = hi - lo + @inbounds while len != 0 + half_len = len >>> 0x01 + m = lo + half_len + if lt(o, v[m], x) + lo = m + 1 + len -= half_len + 1 + else + hi = m + len = half_len + end + end + return lo +end + +# index of the last value of vector a that is less than or equal to x; +# returns firstindex(v)-1 if x is less than all values of v. +function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer + u = T(1) + lo = lo - u + hi = hi + u + @inbounds while lo < hi - u + m = midpoint(lo, hi) + if lt(o, x, v[m]) + hi = m + else + lo = m + end + end + return lo +end + +# returns the range of indices of v equal to x +# if v does not contain x, returns a 0-length range +# indicating the insertion point of x +function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRange{keytype(v)} where T<:Integer + u = T(1) + lo = ilo - u + hi = ihi + u + @inbounds while lo < hi - u + m = midpoint(lo, hi) + if lt(o, v[m], x) + lo = m + elseif lt(o, x, v[m]) + hi = m + else + a = searchsortedfirst(v, x, max(lo,ilo), m, o) + b = searchsortedlast(v, x, m, min(hi,ihi), o) + return a : b + end + end + return (lo + 1) : (hi - 1) +end + +for s in [:searchsortedfirst, :searchsortedlast, :searchsorted] + @eval begin + $s(v::AbstractVector, x, o::Ordering) = $s(v,x,firstindex(v),lastindex(v),o) + $s(v::AbstractVector, x; + lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) = + $s(v,x,ord(lt,by,rev,order)) + end +end + +# An unstable sorting algorithm for constructing IncrementalCompact. +function sort!(v::Vector; by::Function=identity, (<)::Function=<) + isempty(v) && return v # This branch is hit 95% of the time + + if length(v) > 30 # Comb pass avoids quadratic runtime + interval = (3 * length(v)) >> 2 + while interval > 1 + @inbounds for j in 1:length(v)-interval + a, b = v[j], v[j+interval] + v[j], v[j+interval] = by(b) < by(a) ? (b, a) : (a, b) + end + interval = (3 * interval) >> 2 + end + end + + @inbounds for i in 2:length(v) # Insertion sort + x = v[i] + y = by(x) + while i > 1 && y < by(v[i-1]) + v[i] = v[i-1] + i -= 1 + end + v[i] = x + end + + v +end diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index b2a0852a08302..91143d719e909 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -534,34 +534,6 @@ end insert_node!(ir::IRCode, pos::Int, inst::NewInstruction, attach_after::Bool=false) = insert_node!(ir, SSAValue(pos), inst, attach_after) -# For bootstrapping -function my_sort!(v::Vector; by::Function) - isempty(v) && return v # This branch is hit 95% of the time - - if length(v) > 30 # Comb pass avoids quadratic runtime - interval = (3 * length(v)) >> 2 - while interval > 1 - @inbounds for j in 1:length(v)-interval - a, b = v[j], v[j+interval] - v[j], v[j+interval] = by(b) < by(a) ? (b, a) : (a, b) - end - interval = (3 * interval) >> 2 - end - end - - @inbounds for i in 2:length(v) # Insertion sort - x = v[i] - y = by(x) - while i > 1 && y < by(v[i-1]) - v[i] = v[i-1] - i -= 1 - end - v[i] = x - end - - v -end - mutable struct IncrementalCompact ir::IRCode result::InstructionStream @@ -592,7 +564,7 @@ mutable struct IncrementalCompact function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) # Sort by position with attach after nodes after regular ones info = code.new_nodes.info - perm = my_sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i)) + perm = sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i)) new_len = length(code.stmts) + length(info) result = InstructionStream(new_len) used_ssas = fill(0, new_len) @@ -646,7 +618,7 @@ mutable struct IncrementalCompact # For inlining function IncrementalCompact(parent::IncrementalCompact, code::IRCode, result_offset) info = code.new_nodes.info - perm = my_sort!(collect(eachindex(info)); by=i->(info[i].pos, i)) + perm = sort!(collect(eachindex(info)); by=i->(info[i].pos, i)) new_len = length(code.stmts) + length(info) ssa_rename = Any[SSAValue(i) for i = 1:new_len] bb_rename = Vector{Int}() diff --git a/base/cpuid.jl b/base/cpuid.jl index 48930d8064ba9..8831a33799fb0 100644 --- a/base/cpuid.jl +++ b/base/cpuid.jl @@ -95,7 +95,7 @@ let arch = normalize_arch(String(Sys.ARCH)) if arch in keys(ISAs_by_family) for isa in ISAs_by_family[arch] - unique!(append!(FEATURES, last(isa).features)) + Base._unique!(append!(FEATURES, last(isa).features)) end end diff --git a/base/range.jl b/base/range.jl index 4765503668dd4..8236a50e175ae 100644 --- a/base/range.jl +++ b/base/range.jl @@ -1375,19 +1375,6 @@ function _reverse(r::StepRangeLen, ::Colon) end _reverse(r::LinRange{T}, ::Colon) where {T} = typeof(r)(r.stop, r.start, length(r)) -## sorting ## - -issorted(r::AbstractUnitRange) = true -issorted(r::AbstractRange) = length(r) <= 1 || step(r) >= zero(step(r)) - -sort(r::AbstractUnitRange) = r -sort!(r::AbstractUnitRange) = r - -sort(r::AbstractRange) = issorted(r) ? r : reverse(r) - -sortperm(r::AbstractUnitRange) = 1:length(r) -sortperm(r::AbstractRange) = issorted(r) ? (1:1:length(r)) : (length(r):-1:1) - function sum(r::AbstractRange{<:Real}) l = length(r) # note that a little care is required to avoid overflow in l*(l-1)/2 diff --git a/base/reflection.jl b/base/reflection.jl index 0ed8cfdb0caf4..7c4c319e3f9d9 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -100,7 +100,9 @@ since it is not idiomatic to explicitly export names from `Main`. See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref). """ names(m::Module; all::Bool = false, imported::Bool = false) = - sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported)) + sort!(unsorted_names(m; all, imported)) +unsorted_names(m::Module; all::Bool = false, imported::Bool = false) = + ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported) isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0 isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0 diff --git a/base/sysimg.jl b/base/sysimg.jl index 5a14bf5bfd3b9..fe3ebfd22f99e 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -49,14 +49,15 @@ let :Distributed, :Future, :InteractiveUtils, - :LibGit2, :Profile, - :SparseArrays, + :Sort, :UUIDs, # 3-depth packages + :LibGit2, :REPL, :SharedArrays, + :SparseArrays, :TOML, :Test, diff --git a/base/util.jl b/base/util.jl index f26ed0717a1fd..91a3672b9512d 100644 --- a/base/util.jl +++ b/base/util.jl @@ -48,10 +48,9 @@ const disable_text_style = Dict{Symbol,String}( # of colors. let color_syms = collect(Iterators.filter(x -> !isa(x, Integer), keys(text_colors))), formatting_syms = [:normal, :bold, :default] - global const available_text_colors = cat( - sort!(intersect(color_syms, formatting_syms), rev=true), - sort!(setdiff( color_syms, formatting_syms)); - dims=1) + global const available_text_colors = vcat( + Core.Compiler.sort!(intersect(color_syms, formatting_syms); < = >), #reverse + Core.Compiler.sort!( setdiff(color_syms, formatting_syms); <)) end const available_text_colors_docstring = diff --git a/contrib/print_sorted_stdlibs.jl b/contrib/print_sorted_stdlibs.jl index bbf890328cb4e..c9c24f921879c 100644 --- a/contrib/print_sorted_stdlibs.jl +++ b/contrib/print_sorted_stdlibs.jl @@ -27,8 +27,8 @@ end project_deps = Dict{String,Set{String}}() for project_dir in readdir(STDLIB_DIR, join=true) - files = readdir(project_dir) - if "Project.toml" in files + project_file = joinpath(project_dir, "Project.toml") + if isfile(project_file) project = TOML.parsefile(joinpath(project_dir, "Project.toml")) if !haskey(project, "name") diff --git a/stdlib/LibGit2/Project.toml b/stdlib/LibGit2/Project.toml index da78f70fa1005..3398d93f7d42d 100644 --- a/stdlib/LibGit2/Project.toml +++ b/stdlib/LibGit2/Project.toml @@ -5,6 +5,7 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Sort = "a93dcad4-a5b3-87f1-1599-dc5d35b4bf41" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" [extras] diff --git a/stdlib/Sort/Project.toml b/stdlib/Sort/Project.toml new file mode 100644 index 0000000000000..4b211cc603527 --- /dev/null +++ b/stdlib/Sort/Project.toml @@ -0,0 +1,12 @@ +name = "Sort" +uuid = "7744cb9a-8a56-1d63-a5da-e2fdf8a12fa2" + +[deps] +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" + +[targets] +test = ["Test", "OffsetArrays"] diff --git a/stdlib/Sort/docs/src/index.md b/stdlib/Sort/docs/src/index.md new file mode 100644 index 0000000000000..b28a7d9b642f7 --- /dev/null +++ b/stdlib/Sort/docs/src/index.md @@ -0,0 +1,3 @@ +# Sorting + +documentation diff --git a/base/sort.jl b/stdlib/Sort/src/Sort.jl similarity index 99% rename from base/sort.jl rename to stdlib/Sort/src/Sort.jl index d668424a641b0..5656a9a9ee317 100644 --- a/base/sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -2,10 +2,8 @@ module Sort -import ..@__MODULE__, ..parentmodule -const Base = parentmodule(@__MODULE__) -using .Base.Order -using .Base: copymutable, LinearIndices, length, (:), iterate, OneTo, +using Base.Order +using Base: copymutable, LinearIndices, length, (:), iterate, OneTo, eachindex, axes, first, last, similar, zip, OrdinalRange, firstindex, lastindex, AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline, AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !, @@ -14,14 +12,7 @@ using .Base: copymutable, LinearIndices, length, (:), iterate, OneTo, min, max, reinterpret, signed, unsigned, Signed, Unsigned, typemin, xor, Type, BitSigned, Val, midpoint, @boundscheck, checkbounds -using .Base: >>>, !==, != - -import .Base: - sort, - sort!, - issorted, - sortperm, - to_indices +using Base: >>>, !==, != export # also exported by Base # order-only: @@ -43,16 +34,14 @@ export # also exported by Base InsertionSort, QuickSort, MergeSort, - PartialQuickSort - -export # not exported by Base + PartialQuickSort, + # not exported by Base Algorithm, DEFAULT_UNSTABLE, DEFAULT_STABLE, SMALL_ALGORITHM, SMALL_THRESHOLD - ## functions requiring only ordering ## function issorted(itr, order::Ordering) @@ -94,6 +83,8 @@ issorted(itr; lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) = issorted(itr, ord(lt,by,rev,order)) +issorted(::BitSet) = true + function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering) sort!(v, firstindex(v), lastindex(v), PartialQuickSort(k), o) maybeview(v, k) @@ -1461,12 +1452,10 @@ end module Float using ..Sort using ...Order -using ..Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret - +using Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret import Core.Intrinsics: slt_int import ..Sort: sort!, UIntMappable, uint_map, uint_unmap import ...Order: lt, DirectOrdering - # IEEEFloat is not available in Core.Compiler const Floats = Union{Float16, Float32, Float64} # fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}. @@ -1589,7 +1578,6 @@ specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrder issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x)) issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x)) issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i]) - function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn). @@ -1609,7 +1597,6 @@ function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, return v end - fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) @@ -1624,4 +1611,10 @@ end end # module Sort.Float +include("ranges.jl") + +for sym in Base.unsorted_names(Sort) + @eval Base const $sym = $(eval(sym)) +end + end # module Sort diff --git a/stdlib/Sort/src/ranges.jl b/stdlib/Sort/src/ranges.jl new file mode 100644 index 0000000000000..8812e09944888 --- /dev/null +++ b/stdlib/Sort/src/ranges.jl @@ -0,0 +1,10 @@ +issorted(r::AbstractUnitRange) = true +issorted(r::AbstractRange) = length(r) <= 1 || step(r) >= zero(step(r)) + +sort(r::AbstractUnitRange) = r +sort!(r::AbstractUnitRange) = r + +sort(r::AbstractRange) = issorted(r) ? r : reverse(r) + +sortperm(r::AbstractUnitRange) = 1:length(r) +sortperm(r::AbstractRange) = issorted(r) ? (1:1:length(r)) : (length(r):-1:1) diff --git a/test/sorting.jl b/stdlib/Sort/test/runtests.jl similarity index 99% rename from test/sorting.jl rename to stdlib/Sort/test/runtests.jl index 9766ee99ce751..48297ce77325d 100644 --- a/test/sorting.jl +++ b/stdlib/Sort/test/runtests.jl @@ -5,9 +5,7 @@ module SortingTests using Base.Order using Random using Test - -isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl") -using .Main.OffsetArrays +using OffsetArrays @testset "Order" begin @test Forward == ForwardOrdering() From bbcbdc483442b5a681bf3d832b7db118fc28314a Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 15:21:41 +0600 Subject: [PATCH 05/24] remove no longer needed using statements --- stdlib/Sort/src/Sort.jl | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/stdlib/Sort/src/Sort.jl b/stdlib/Sort/src/Sort.jl index 5656a9a9ee317..c9c8a45d3a249 100644 --- a/stdlib/Sort/src/Sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -3,16 +3,8 @@ module Sort using Base.Order -using Base: copymutable, LinearIndices, length, (:), iterate, OneTo, - eachindex, axes, first, last, similar, zip, OrdinalRange, firstindex, lastindex, - AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline, - AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !, - extrema, sub_with_overflow, add_with_overflow, oneunit, div, getindex, setindex!, - length, resize!, fill, Missing, require_one_based_indexing, keytype, UnitRange, - min, max, reinterpret, signed, unsigned, Signed, Unsigned, typemin, xor, Type, BitSigned, Val, - midpoint, @boundscheck, checkbounds - -using Base: >>>, !==, != +using Base: copymutable, OneTo, sub_with_overflow, add_with_overflow, + require_one_based_indexing, BitSigned, midpoint export # also exported by Base # order-only: @@ -1452,7 +1444,6 @@ end module Float using ..Sort using ...Order -using Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret import Core.Intrinsics: slt_int import ..Sort: sort!, UIntMappable, uint_map, uint_unmap import ...Order: lt, DirectOrdering From 18debc351a08b32afb73038c2e0862a5f132b8e2 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 15:28:56 +0600 Subject: [PATCH 06/24] switch from Floats to IEEEFloat --- stdlib/Sort/src/Sort.jl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/stdlib/Sort/src/Sort.jl b/stdlib/Sort/src/Sort.jl index c9c8a45d3a249..705c63534c13d 100644 --- a/stdlib/Sort/src/Sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -1444,11 +1444,10 @@ end module Float using ..Sort using ...Order +using Base: IEEEFloat import Core.Intrinsics: slt_int import ..Sort: sort!, UIntMappable, uint_map, uint_unmap import ...Order: lt, DirectOrdering -# IEEEFloat is not available in Core.Compiler -const Floats = Union{Float16, Float32, Float64} # fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}. # This type allows us to dispatch only when it is safe to do so. See #42739 for more info. const FPSortable = Union{ @@ -1469,8 +1468,8 @@ right(::DirectOrdering) = Right() left(o::Perm) = Perm(left(o.order), o.data) right(o::Perm) = Perm(right(o.order), o.data) -lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x) -lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y) +lt(::Left, x::T, y::T) where {T<:IEEEFloat} = slt_int(y, x) +lt(::Right, x::T, y::T) where {T<:IEEEFloat} = slt_int(x, y) uint_map(x::Float16, ::Left) = ~reinterpret(UInt16, x) uint_unmap(::Type{Float16}, u::UInt16, ::Left) = reinterpret(Float16, ~u) @@ -1490,11 +1489,11 @@ uint_map(x::Float64, ::Right) = reinterpret(UInt64, x) uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u) UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64 -isnan(o::DirectOrdering, x::Floats) = (x!=x) +isnan(o::DirectOrdering, x::IEEEFloat) = (x!=x) isnan(o::DirectOrdering, x::Missing) = false isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i]) -ismissing(o::DirectOrdering, x::Floats) = false +ismissing(o::DirectOrdering, x::IEEEFloat) = false ismissing(o::DirectOrdering, x::Missing) = true ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i]) @@ -1566,8 +1565,8 @@ specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrder specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) = specials2left!(v, a, o) -issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x)) -issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x)) +issignleft(o::ForwardOrdering, x::IEEEFloat) = lt(o, x, zero(x)) +issignleft(o::ReverseOrdering, x::IEEEFloat) = lt(o, x, -zero(x)) issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i]) function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T From 906c3d903b4953822dde407ebc1a25bf258fb462 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 15:32:37 +0600 Subject: [PATCH 07/24] replace metaprogramming with `unsigned` --- stdlib/Sort/src/Sort.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/stdlib/Sort/src/Sort.jl b/stdlib/Sort/src/Sort.jl index 705c63534c13d..b637b1fb08880 100644 --- a/stdlib/Sort/src/Sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -1399,10 +1399,7 @@ uint_map(x::Signed, ::ForwardOrdering) = uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed = xor(signed(u), typemin(T)) -# unsigned(Int) is not available during bootstrapping. -for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)] - @eval UIntMappable(::Union{Type{$U}, Type{$S}}, ::ForwardOrdering) = $U -end +UIntMappable(T::Base.BitIntegerType, ::ForwardOrdering) = unsigned(T) # Floats are not UIntMappable under regular orderings because they fail on NaN edge cases. # uint mappings for floats are defined in Float, where the Left and Right orderings From dece402e841115d13642ea58c1b0d84606b9f0ed Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 16:32:30 +0600 Subject: [PATCH 08/24] revert negligible whitespace changes --- stdlib/Sort/src/Sort.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stdlib/Sort/src/Sort.jl b/stdlib/Sort/src/Sort.jl index b637b1fb08880..673febb85701f 100644 --- a/stdlib/Sort/src/Sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -34,6 +34,7 @@ export # also exported by Base SMALL_ALGORITHM, SMALL_THRESHOLD + ## functions requiring only ordering ## function issorted(itr, order::Ordering) @@ -1442,9 +1443,11 @@ module Float using ..Sort using ...Order using Base: IEEEFloat + import Core.Intrinsics: slt_int import ..Sort: sort!, UIntMappable, uint_map, uint_unmap import ...Order: lt, DirectOrdering + # fpsort is not safe for vectors of mixed bitwidth such as Vector{Union{Float32, Float64}}. # This type allows us to dispatch only when it is safe to do so. See #42739 for more info. const FPSortable = Union{ @@ -1584,6 +1587,7 @@ function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering, return v end + fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) From 9366364cc0fcaee193dc1ea9718357c6bcb5199a Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 17:09:24 +0600 Subject: [PATCH 09/24] add copyright notice to top of new files --- base/compiler/sort.jl | 2 ++ stdlib/Sort/src/ranges.jl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl index 5c5e2016f26ad..db8ec063f3562 100644 --- a/base/compiler/sort.jl +++ b/base/compiler/sort.jl @@ -1,3 +1,5 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + # reference on sorted binary search: # http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary diff --git a/stdlib/Sort/src/ranges.jl b/stdlib/Sort/src/ranges.jl index 8812e09944888..27f118df11443 100644 --- a/stdlib/Sort/src/ranges.jl +++ b/stdlib/Sort/src/ranges.jl @@ -1,3 +1,5 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + issorted(r::AbstractUnitRange) = true issorted(r::AbstractRange) = length(r) <= 1 || step(r) >= zero(step(r)) From 635075947b8e78e429bfb22a57d6676a7debb03d Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 17:14:32 +0600 Subject: [PATCH 10/24] update makefile so that `make cleanall && make` passes --- stdlib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/Makefile b/stdlib/Makefile index d45be468626cd..c4d12dd55d416 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -44,7 +44,7 @@ $(foreach jll,$(JLLS),$(eval $(call download-artifacts-toml,$(jll)))) STDLIBS = Artifacts Base64 CRC32c Dates Distributed FileWatching \ Future InteractiveUtils LazyArtifacts Libdl LibGit2 LinearAlgebra Logging \ Markdown Mmap Printf Profile Random REPL Serialization SHA \ - SharedArrays Sockets SparseArrays SuiteSparse Test TOML Unicode UUIDs \ + SharedArrays Sockets Sort SparseArrays SuiteSparse Test TOML Unicode UUIDs \ $(JLL_NAMES) STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA From d52cbe7b4dd3a732db2652c2a25ee5a4f9020846 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 10 Sep 2022 17:31:25 +0600 Subject: [PATCH 11/24] move docs --- doc/src/base/sort.md | 219 --------------------------------- stdlib/Sort/docs/src/index.md | 220 +++++++++++++++++++++++++++++++++- 2 files changed, 218 insertions(+), 221 deletions(-) delete mode 100644 doc/src/base/sort.md diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md deleted file mode 100644 index 9f00381ab892c..0000000000000 --- a/doc/src/base/sort.md +++ /dev/null @@ -1,219 +0,0 @@ -# Sorting and Related Functions - -Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays of -values. By default, Julia picks reasonable algorithms and sorts in standard ascending order: - -```jldoctest -julia> sort([2,3,1]) -3-element Vector{Int64}: - 1 - 2 - 3 -``` - -You can easily sort in reverse order as well: - -```jldoctest -julia> sort([2,3,1], rev=true) -3-element Vector{Int64}: - 3 - 2 - 1 -``` - -To sort an array in-place, use the "bang" version of the sort function: - -```jldoctest -julia> a = [2,3,1]; - -julia> sort!(a); - -julia> a -3-element Vector{Int64}: - 1 - 2 - 3 -``` - -Instead of directly sorting an array, you can compute a permutation of the array's indices that -puts the array into sorted order: - -```julia-repl -julia> v = randn(5) -5-element Array{Float64,1}: - 0.297288 - 0.382396 - -0.597634 - -0.0104452 - -0.839027 - -julia> p = sortperm(v) -5-element Array{Int64,1}: - 5 - 3 - 4 - 1 - 2 - -julia> v[p] -5-element Array{Float64,1}: - -0.839027 - -0.597634 - -0.0104452 - 0.297288 - 0.382396 -``` - -Arrays can easily be sorted according to an arbitrary transformation of their values: - -```julia-repl -julia> sort(v, by=abs) -5-element Array{Float64,1}: - -0.0104452 - 0.297288 - 0.382396 - -0.597634 - -0.839027 -``` - -Or in reverse order by a transformation: - -```julia-repl -julia> sort(v, by=abs, rev=true) -5-element Array{Float64,1}: - -0.839027 - -0.597634 - 0.382396 - 0.297288 - -0.0104452 -``` - -If needed, the sorting algorithm can be chosen: - -```julia-repl -julia> sort(v, alg=InsertionSort) -5-element Array{Float64,1}: - -0.839027 - -0.597634 - -0.0104452 - 0.297288 - 0.382396 -``` - -All the sorting and order related functions rely on a "less than" relation defining a total order -on the values to be manipulated. The `isless` function is invoked by default, but the relation -can be specified via the `lt` keyword. - -## Sorting Functions - -```@docs -Base.sort! -Base.sort -Base.sortperm -Base.InsertionSort -Base.MergeSort -Base.QuickSort -Base.PartialQuickSort -Base.Sort.sortperm! -Base.Sort.sortslices -``` - -## Order-Related Functions - -```@docs -Base.issorted -Base.Sort.searchsorted -Base.Sort.searchsortedfirst -Base.Sort.searchsortedlast -Base.Sort.insorted -Base.Sort.partialsort! -Base.Sort.partialsort -Base.Sort.partialsortperm -Base.Sort.partialsortperm! -``` - -## Sorting Algorithms - -There are currently four sorting algorithms available in base Julia: - - * [`InsertionSort`](@ref) - * [`QuickSort`](@ref) - * [`PartialQuickSort(k)`](@ref) - * [`MergeSort`](@ref) - -`InsertionSort` is an O(n^2) stable sorting algorithm. It is efficient for very small `n`, and -is used internally by `QuickSort`. - -`QuickSort` is an O(n log n) sorting algorithm which is in-place, very fast, but not stable – -i.e. elements which are considered equal will not remain in the same order in which they originally -appeared in the array to be sorted. `QuickSort` is the default algorithm for numeric values, including -integers and floats. - -`PartialQuickSort(k)` is similar to `QuickSort`, but the output array is only sorted up to index -`k` if `k` is an integer, or in the range of `k` if `k` is an `OrdinalRange`. For example: - -```julia -x = rand(1:500, 100) -k = 50 -k2 = 50:100 -s = sort(x; alg=QuickSort) -ps = sort(x; alg=PartialQuickSort(k)) -qs = sort(x; alg=PartialQuickSort(k2)) -map(issorted, (s, ps, qs)) # => (true, false, false) -map(x->issorted(x[1:k]), (s, ps, qs)) # => (true, true, false) -map(x->issorted(x[k2]), (s, ps, qs)) # => (true, false, true) -s[1:k] == ps[1:k] # => true -s[k2] == qs[k2] # => true -``` - -`MergeSort` is an O(n log n) stable sorting algorithm but is not in-place – it requires a temporary -array of half the size of the input array – and is typically not quite as fast as `QuickSort`. -It is the default algorithm for non-numeric data. - -The default sorting algorithms are chosen on the basis that they are fast and stable, or *appear* -to be so. For numeric types indeed, `QuickSort` is selected as it is faster and indistinguishable -in this case from a stable sort (unless the array records its mutations in some way). The stability -property comes at a non-negligible cost, so if you don't need it, you may want to explicitly specify -your preferred algorithm, e.g. `sort!(v, alg=QuickSort)`. - -The mechanism by which Julia picks default sorting algorithms is implemented via the `Base.Sort.defalg` -function. It allows a particular algorithm to be registered as the default in all sorting functions -for specific arrays. For example, here are the two default methods from [`sort.jl`](https://github.com/JuliaLang/julia/blob/master/base/sort.jl): - -```julia -defalg(v::AbstractArray) = MergeSort -defalg(v::AbstractArray{<:Number}) = QuickSort -``` - -As for numeric arrays, choosing a non-stable default algorithm for array types for which the notion -of a stable sort is meaningless (i.e. when two values comparing equal can not be distinguished) -may make sense. - -## Alternate orderings - -By default, `sort` and related functions use [`isless`](@ref) to compare two -elements in order to determine which should come first. The -[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining -alternate orderings on the same set of elements. Instances of `Ordering` define -a [total order](https://en.wikipedia.org/wiki/Total_order) on a set of elements, -so that for any elements `a`, `b`, `c` the following hold: - -* Exactly one of the following is true: `a` is less than `b`, `b` is less than - `a`, or `a` and `b` are equal (according to [`isequal`](@ref)). -* The relation is transitive - if `a` is less than `b` and `b` is less than `c` - then `a` is less than `c`. - -The [`Base.Order.lt`](@ref) function works as a generalization of `isless` to -test whether `a` is less than `b` according to a given order. - -```@docs -Base.Order.Ordering -Base.Order.lt -Base.Order.ord -Base.Order.Forward -Base.Order.ReverseOrdering -Base.Order.Reverse -Base.Order.By -Base.Order.Lt -Base.Order.Perm -``` diff --git a/stdlib/Sort/docs/src/index.md b/stdlib/Sort/docs/src/index.md index b28a7d9b642f7..9f00381ab892c 100644 --- a/stdlib/Sort/docs/src/index.md +++ b/stdlib/Sort/docs/src/index.md @@ -1,3 +1,219 @@ -# Sorting +# Sorting and Related Functions -documentation +Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays of +values. By default, Julia picks reasonable algorithms and sorts in standard ascending order: + +```jldoctest +julia> sort([2,3,1]) +3-element Vector{Int64}: + 1 + 2 + 3 +``` + +You can easily sort in reverse order as well: + +```jldoctest +julia> sort([2,3,1], rev=true) +3-element Vector{Int64}: + 3 + 2 + 1 +``` + +To sort an array in-place, use the "bang" version of the sort function: + +```jldoctest +julia> a = [2,3,1]; + +julia> sort!(a); + +julia> a +3-element Vector{Int64}: + 1 + 2 + 3 +``` + +Instead of directly sorting an array, you can compute a permutation of the array's indices that +puts the array into sorted order: + +```julia-repl +julia> v = randn(5) +5-element Array{Float64,1}: + 0.297288 + 0.382396 + -0.597634 + -0.0104452 + -0.839027 + +julia> p = sortperm(v) +5-element Array{Int64,1}: + 5 + 3 + 4 + 1 + 2 + +julia> v[p] +5-element Array{Float64,1}: + -0.839027 + -0.597634 + -0.0104452 + 0.297288 + 0.382396 +``` + +Arrays can easily be sorted according to an arbitrary transformation of their values: + +```julia-repl +julia> sort(v, by=abs) +5-element Array{Float64,1}: + -0.0104452 + 0.297288 + 0.382396 + -0.597634 + -0.839027 +``` + +Or in reverse order by a transformation: + +```julia-repl +julia> sort(v, by=abs, rev=true) +5-element Array{Float64,1}: + -0.839027 + -0.597634 + 0.382396 + 0.297288 + -0.0104452 +``` + +If needed, the sorting algorithm can be chosen: + +```julia-repl +julia> sort(v, alg=InsertionSort) +5-element Array{Float64,1}: + -0.839027 + -0.597634 + -0.0104452 + 0.297288 + 0.382396 +``` + +All the sorting and order related functions rely on a "less than" relation defining a total order +on the values to be manipulated. The `isless` function is invoked by default, but the relation +can be specified via the `lt` keyword. + +## Sorting Functions + +```@docs +Base.sort! +Base.sort +Base.sortperm +Base.InsertionSort +Base.MergeSort +Base.QuickSort +Base.PartialQuickSort +Base.Sort.sortperm! +Base.Sort.sortslices +``` + +## Order-Related Functions + +```@docs +Base.issorted +Base.Sort.searchsorted +Base.Sort.searchsortedfirst +Base.Sort.searchsortedlast +Base.Sort.insorted +Base.Sort.partialsort! +Base.Sort.partialsort +Base.Sort.partialsortperm +Base.Sort.partialsortperm! +``` + +## Sorting Algorithms + +There are currently four sorting algorithms available in base Julia: + + * [`InsertionSort`](@ref) + * [`QuickSort`](@ref) + * [`PartialQuickSort(k)`](@ref) + * [`MergeSort`](@ref) + +`InsertionSort` is an O(n^2) stable sorting algorithm. It is efficient for very small `n`, and +is used internally by `QuickSort`. + +`QuickSort` is an O(n log n) sorting algorithm which is in-place, very fast, but not stable – +i.e. elements which are considered equal will not remain in the same order in which they originally +appeared in the array to be sorted. `QuickSort` is the default algorithm for numeric values, including +integers and floats. + +`PartialQuickSort(k)` is similar to `QuickSort`, but the output array is only sorted up to index +`k` if `k` is an integer, or in the range of `k` if `k` is an `OrdinalRange`. For example: + +```julia +x = rand(1:500, 100) +k = 50 +k2 = 50:100 +s = sort(x; alg=QuickSort) +ps = sort(x; alg=PartialQuickSort(k)) +qs = sort(x; alg=PartialQuickSort(k2)) +map(issorted, (s, ps, qs)) # => (true, false, false) +map(x->issorted(x[1:k]), (s, ps, qs)) # => (true, true, false) +map(x->issorted(x[k2]), (s, ps, qs)) # => (true, false, true) +s[1:k] == ps[1:k] # => true +s[k2] == qs[k2] # => true +``` + +`MergeSort` is an O(n log n) stable sorting algorithm but is not in-place – it requires a temporary +array of half the size of the input array – and is typically not quite as fast as `QuickSort`. +It is the default algorithm for non-numeric data. + +The default sorting algorithms are chosen on the basis that they are fast and stable, or *appear* +to be so. For numeric types indeed, `QuickSort` is selected as it is faster and indistinguishable +in this case from a stable sort (unless the array records its mutations in some way). The stability +property comes at a non-negligible cost, so if you don't need it, you may want to explicitly specify +your preferred algorithm, e.g. `sort!(v, alg=QuickSort)`. + +The mechanism by which Julia picks default sorting algorithms is implemented via the `Base.Sort.defalg` +function. It allows a particular algorithm to be registered as the default in all sorting functions +for specific arrays. For example, here are the two default methods from [`sort.jl`](https://github.com/JuliaLang/julia/blob/master/base/sort.jl): + +```julia +defalg(v::AbstractArray) = MergeSort +defalg(v::AbstractArray{<:Number}) = QuickSort +``` + +As for numeric arrays, choosing a non-stable default algorithm for array types for which the notion +of a stable sort is meaningless (i.e. when two values comparing equal can not be distinguished) +may make sense. + +## Alternate orderings + +By default, `sort` and related functions use [`isless`](@ref) to compare two +elements in order to determine which should come first. The +[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining +alternate orderings on the same set of elements. Instances of `Ordering` define +a [total order](https://en.wikipedia.org/wiki/Total_order) on a set of elements, +so that for any elements `a`, `b`, `c` the following hold: + +* Exactly one of the following is true: `a` is less than `b`, `b` is less than + `a`, or `a` and `b` are equal (according to [`isequal`](@ref)). +* The relation is transitive - if `a` is less than `b` and `b` is less than `c` + then `a` is less than `c`. + +The [`Base.Order.lt`](@ref) function works as a generalization of `isless` to +test whether `a` is less than `b` according to a given order. + +```@docs +Base.Order.Ordering +Base.Order.lt +Base.Order.ord +Base.Order.Forward +Base.Order.ReverseOrdering +Base.Order.Reverse +Base.Order.By +Base.Order.Lt +Base.Order.Perm +``` From 4591ce8c63a885ca4927e9a0a85b55ab8eeaff92 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 12 Sep 2022 20:50:41 +0600 Subject: [PATCH 12/24] Switch from combsort to heapsort --- base/compiler/sort.jl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl index db8ec063f3562..4437f769d76a9 100644 --- a/base/compiler/sort.jl +++ b/base/compiler/sort.jl @@ -74,15 +74,16 @@ end function sort!(v::Vector; by::Function=identity, (<)::Function=<) isempty(v) && return v # This branch is hit 95% of the time - if length(v) > 30 # Comb pass avoids quadratic runtime - interval = (3 * length(v)) >> 2 - while interval > 1 - @inbounds for j in 1:length(v)-interval - a, b = v[j], v[j+interval] - v[j], v[j+interval] = by(b) < by(a) ? (b, a) : (a, b) - end - interval = (3 * interval) >> 2 - end + # Of the remaining 5%, this branch is hit less than 1% of the time + if length(v) > 200 # Heap sort prevents quadratic runtime + o = ord(<, by, true) + heapify!(v, o) + for i in lastindex(v):-1:2 + y = v[i] + v[i] = v[1] + percolate_down!(v, 1, y, o, i-1) + end + return v end @inbounds for i in 2:length(v) # Insertion sort From fe406a695636166081a03a38d3b3042947d1a725 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Tue, 13 Sep 2022 10:56:03 +0600 Subject: [PATCH 13/24] make UUID consistent --- stdlib/Sort/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/Sort/Project.toml b/stdlib/Sort/Project.toml index 4b211cc603527..b2283ddc65e6d 100644 --- a/stdlib/Sort/Project.toml +++ b/stdlib/Sort/Project.toml @@ -1,5 +1,5 @@ name = "Sort" -uuid = "7744cb9a-8a56-1d63-a5da-e2fdf8a12fa2" +uuid = "a93dcad4-a5b3-87f1-1599-dc5d35b4bf41" [deps] Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" From 673904e037ff656f4e74429e32964fc1c2806184 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Tue, 13 Sep 2022 17:19:26 +0600 Subject: [PATCH 14/24] fix whitespace --- base/compiler/sort.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl index 4437f769d76a9..1998e0a8d9742 100644 --- a/base/compiler/sort.jl +++ b/base/compiler/sort.jl @@ -82,7 +82,7 @@ function sort!(v::Vector; by::Function=identity, (<)::Function=<) y = v[i] v[i] = v[1] percolate_down!(v, 1, y, o, i-1) - end + end return v end From ff231373e151386a8f7d2aa99ecebaa75b836ed1 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Tue, 13 Sep 2022 17:32:28 +0600 Subject: [PATCH 15/24] try to fix docs --- doc/make.jl | 1 - stdlib/Sort/docs/src/index.md | 36 +++++++++++++++++------------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/doc/make.jl b/doc/make.jl index 61adf2ec603fa..0ecc73d1fcc03 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -116,7 +116,6 @@ BaseDocs = [ "base/file.md", "base/io-network.md", "base/punctuation.md", - "base/sort.md", "base/iterators.md", "base/c.md", "base/libc.md", diff --git a/stdlib/Sort/docs/src/index.md b/stdlib/Sort/docs/src/index.md index 9f00381ab892c..c3a1f77f7fe7a 100644 --- a/stdlib/Sort/docs/src/index.md +++ b/stdlib/Sort/docs/src/index.md @@ -107,29 +107,29 @@ can be specified via the `lt` keyword. ## Sorting Functions ```@docs -Base.sort! -Base.sort -Base.sortperm -Base.InsertionSort -Base.MergeSort -Base.QuickSort -Base.PartialQuickSort -Base.Sort.sortperm! -Base.Sort.sortslices +sort! +sort +sortperm +Base.Sort.InsertionSort +Base.Sort.MergeSort +Base.Sort.QuickSort +Base.Sort.PartialQuickSort +sortperm! +sortslices ``` ## Order-Related Functions ```@docs -Base.issorted -Base.Sort.searchsorted -Base.Sort.searchsortedfirst -Base.Sort.searchsortedlast -Base.Sort.insorted -Base.Sort.partialsort! -Base.Sort.partialsort -Base.Sort.partialsortperm -Base.Sort.partialsortperm! +issorted +searchsorted +searchsortedfirst +searchsortedlast +insorted +partialsort! +partialsort +partialsortperm +partialsortperm! ``` ## Sorting Algorithms From e0a44ad03df0bc94edfd182c4b2f0da869443096 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 07:58:27 +0600 Subject: [PATCH 16/24] remove cross references in sorting docs (TODO: Revert this) --- stdlib/Sort/docs/src/index.md | 8 ++++---- stdlib/Sort/src/Sort.jl | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/stdlib/Sort/docs/src/index.md b/stdlib/Sort/docs/src/index.md index c3a1f77f7fe7a..35f3dd93cc5d0 100644 --- a/stdlib/Sort/docs/src/index.md +++ b/stdlib/Sort/docs/src/index.md @@ -136,10 +136,10 @@ partialsortperm! There are currently four sorting algorithms available in base Julia: - * [`InsertionSort`](@ref) - * [`QuickSort`](@ref) - * [`PartialQuickSort(k)`](@ref) - * [`MergeSort`](@ref) + * `InsertionSort` + * `QuickSort` + * `PartialQuickSort(k)` + * `MergeSort` `InsertionSort` is an O(n^2) stable sorting algorithm. It is efficient for very small `n`, and is used internally by `QuickSort`. diff --git a/stdlib/Sort/src/Sort.jl b/stdlib/Sort/src/Sort.jl index 673febb85701f..fb6f4c27d7398 100644 --- a/stdlib/Sort/src/Sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -55,7 +55,7 @@ end issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) Test whether a vector is in sorted order. The `lt`, `by` and `rev` keywords modify what -order is considered to be sorted just as they do for [`sort`](@ref). +order is considered to be sorted just as they do for `sort`. # Examples ```jldoctest @@ -144,7 +144,7 @@ partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}; """ partialsort(v, k, by=, lt=, rev=false) -Variant of [`partialsort!`](@ref) which copies `v` before partially sorting it, thereby returning the +Variant of `partialsort!` which copies `v` before partially sorting it, thereby returning the same thing as `partialsort!` but leaving `v` unmodified. """ partialsort(v::AbstractVector, k::Union{Integer,OrdinalRange}; kws...) = @@ -289,7 +289,7 @@ according to the order specified by the `by`, `lt` and `rev` keywords, assuming is already sorted in that order. Return an empty range located at the insertion point if `a` does not contain values equal to `x`. -See also: [`insorted`](@ref), [`searchsortedfirst`](@ref), [`sort`](@ref), [`findall`](@ref). +See also: `insorted`, `searchsortedfirst`, `sort`, [`findall`](@ref). # Examples ```jldoctest @@ -319,7 +319,7 @@ specified order. Return `lastindex(a) + 1` if `x` is greater than all values in `insert!`ing `x` at this index will maintain sorted order. -See also: [`searchsortedlast`](@ref), [`searchsorted`](@ref), [`findfirst`](@ref). +See also: `searchsortedlast`, `searchsorted`, [`findfirst`](@ref). # Examples ```jldoctest @@ -372,7 +372,7 @@ julia> searchsortedlast([1, 2, 4, 5, 5, 7], 0) # no match, insert at start Determine whether an item `x` is in the sorted collection `a`, in the sense that it is [`==`](@ref) to one of the values of the collection according to the order specified by the `by`, `lt` and `rev` keywords, assuming that `a` is already -sorted in that order, see [`sort`](@ref) for the keywords. +sorted in that order, see `sort` for the keywords. See also [`in`](@ref). @@ -430,14 +430,14 @@ end Indicate that a sorting function should use the partial quick sort algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest -to largest, finding them and sorting them using [`QuickSort`](@ref). +to largest, finding them and sorting them using `QuickSort`. Characteristics: * *not stable*: does not preserve the ordering of elements which compare equal (e.g. "a" and "A" in a sort of letters which ignores case). * *in-place* in memory. - * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref). + * *divide-and-conquer*: sort strategy similar to `MergeSort`. """ struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm k::T @@ -960,7 +960,7 @@ end """ sort(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) -Variant of [`sort!`](@ref) that returns a sorted copy of `v` leaving `v` itself unmodified. +Variant of `sort!` that returns a sorted copy of `v` leaving `v` itself unmodified. # Examples ```jldoctest @@ -1020,7 +1020,7 @@ partialsortperm(v::AbstractVector, k::Union{Integer,OrdinalRange}; kwargs...) = """ partialsortperm!(ix, v, k; by=, lt=, rev=false, initialized=false) -Like [`partialsortperm`](@ref), but accepts a preallocated index vector `ix` the same size as +Like `partialsortperm`, but accepts a preallocated index vector `ix` the same size as `v`, which is used to store (a permutation of) the indices of `v`. If the index vector `ix` is initialized with the indices of `v` (or a permutation thereof), `initialized` should be set to @@ -1092,11 +1092,11 @@ end Return a permutation vector or array `I` that puts `A[I]` in sorted order along the given dimension. If `A` has more than one dimension, then the `dims` keyword argument must be specified. The order is specified -using the same keywords as [`sort!`](@ref). The permutation is guaranteed to be stable even +using the same keywords as `sort!`. The permutation is guaranteed to be stable even if the sorting algorithm is unstable, meaning that indices of equal elements appear in ascending order. -See also [`sortperm!`](@ref), [`partialsortperm`](@ref), [`invperm`](@ref), [`indexin`](@ref). +See also `sortperm!`, `partialsortperm`, [`invperm`](@ref), [`indexin`](@ref). To sort slices of an array, refer to [`sortslices`](@ref). # Examples @@ -1159,7 +1159,7 @@ end """ sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, initialized::Bool=false, [dims::Integer]) -Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`. If `initialized` is `false` +Like `sortperm`, but accepts a preallocated index vector or array `ix` with the same `axes` as `A`. If `initialized` is `false` (the default), `ix` is initialized to contain the values `LinearIndices(A)`. # Examples @@ -1241,7 +1241,7 @@ end sort(A; dims::Integer, alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) Sort a multidimensional array `A` along the given dimension. -See [`sort!`](@ref) for a description of possible +See `sort!` for a description of possible keyword arguments. To sort slices of an array, refer to [`sortslices`](@ref). @@ -1300,7 +1300,7 @@ end sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) Sort the multidimensional array `A` along dimension `dims`. -See [`sort!`](@ref) for a description of possible keyword arguments. +See `sort!` for a description of possible keyword arguments. To sort slices of an array, refer to [`sortslices`](@ref). From 6225a185d586b5774794be285e3abbbed6ed463b Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 08:29:04 +0600 Subject: [PATCH 17/24] add NEWS.md entry --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 183920f122477..e6e37ccec8ab6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -132,6 +132,9 @@ Standard library changes * An "IPython mode" which mimics the behaviour of the prompts and storing the evaluated result in `Out` can be activated with `REPL.ipython_mode!()`. See the manual for how to enable this at startup. +#### Sort +* `Sort` has moved out of base and is now a standard library. ([#46679]) + #### SparseArrays #### Test From 2d43a0915b1c9b77f39f47cc9160ee615cb8e552 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 12:27:09 +0600 Subject: [PATCH 18/24] add compiler sort tests (needs fixup) --- test/choosetests.jl | 2 +- test/compiler/sort.jl | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 test/compiler/sort.jl diff --git a/test/choosetests.jl b/test/choosetests.jl index 95ca708b1d142..04a35555e0086 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -142,7 +142,7 @@ function choosetests(choices = []) filtertests!(tests, "subarray") filtertests!(tests, "compiler", [ "compiler/datastructures", "compiler/inference", "compiler/effects", - "compiler/validation", "compiler/ssair", "compiler/irpasses", + "compiler/validation", "compiler/sort", "compiler/ssair", "compiler/irpasses", "compiler/codegen", "compiler/inline", "compiler/contextual", "compiler/AbstractInterpreter", "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"]) diff --git a/test/compiler/sort.jl b/test/compiler/sort.jl new file mode 100644 index 0000000000000..c4adbd646f9d1 --- /dev/null +++ b/test/compiler/sort.jl @@ -0,0 +1,46 @@ +@testset "searchsorted" begin + @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Core.Compiler.UnitRange(1, 0) + @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Core.Compiler.UnitRange(1, 2) + @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Core.Compiler.UnitRange(3, 4) + @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Core.Compiler.UnitRange(7, 6) + @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Core.Compiler.UnitRange(5, 4) + + @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 0) === Core.Compiler.UnitRange(1, 0) + @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 1) === Core.Compiler.UnitRange(1, 1) + @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 2) === Core.Compiler.UnitRange(2, 2) + @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 4) === Core.Compiler.UnitRange(4, 3) + + @test Core.Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Core.Compiler.UnitRange(1, 4) + @test Core.Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Core.Compiler.UnitRange(5, 10) + @test Core.Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 6) + @test Core.Compiler.searchsorted(fill(1, 15), 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 10) + + for (rg,I) in Any[(Core.Compiler.UnitRange(49, 57), 47:59), + (Core.Compiler.StepRange(1, 2, 17), -1:19)] + rg_r = Core.Compiler.reverse(rg) + rgv, rgv_r = Core.Compiler.collect(rg), Core.Compiler.collect(rg_r) + for i = I + @test Core.Compiler.searchsorted(rg,i) === Core.Compiler.searchsorted(rgv,i) + @test Core.Compiler.searchsorted(rg_r,i,rev=true) === Core.Compiler.searchsorted(rgv_r,i,rev=true) + end + end +end + +@testset "basic sort" begin + v = [3,1,2] + @test v == [3,1,2] + @test Core.Compiler.sort!(v) === v == [1,2,3] + @test Core.Compiler.sort!(v, by = x -> -x) === v == [3,2,1] + @test Core.Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3] +end + +@testset "randomized sorting tests" begin + for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n] + v = rand(-1:k, n) + for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >] + @test sort(v; by, lt) == Core.Compiler.sort!(copy(v); by, < = lt) + end + end + + println("THE TESTS ACTUALLY RAN") # TODO: deleteme +end From 701daeb81c29ad24decb1a2c7b2e1c4f5937ded4 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 12:29:18 +0600 Subject: [PATCH 19/24] stop running base/sorting tests (moved to stdlib/sort) (needs fixup) --- stdlib/Sort/test/runtests.jl | 2 ++ test/choosetests.jl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/stdlib/Sort/test/runtests.jl b/stdlib/Sort/test/runtests.jl index 48297ce77325d..55aa11851d355 100644 --- a/stdlib/Sort/test/runtests.jl +++ b/stdlib/Sort/test/runtests.jl @@ -870,6 +870,8 @@ end @test searchsorted(v, 0.1, rev=true) === 4:3 end end + + println("TESTS ACTUALLY RAN 2") # TODO: deleteme end # The "searchsorted" testset is at the end of the file because it is slow. diff --git a/test/choosetests.jl b/test/choosetests.jl index 04a35555e0086..e9a3ad6c3d0b4 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -14,7 +14,7 @@ const TESTNAMES = [ "intfuncs", "simdloop", "vecelement", "rational", "bitarray", "copy", "math", "fastmath", "functional", "iterators", "operators", "ordering", "path", "ccall", "parse", "loading", "gmp", - "sorting", "spawn", "backtrace", "exceptions", + "spawn", "backtrace", "exceptions", "file", "read", "version", "namedtuple", "mpfr", "broadcast", "complex", "floatapprox", "stdlib", "reflection", "regex", "float16", From 5f000e6c6d620bfe6d4746b16f97d25053a9e4a1 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 14:55:45 +0600 Subject: [PATCH 20/24] fix test usage of OffsetArrays (following pattern of Random) --- stdlib/Sort/Project.toml | 3 +-- stdlib/Sort/test/runtests.jl | 9 ++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/stdlib/Sort/Project.toml b/stdlib/Sort/Project.toml index b2283ddc65e6d..69d4c49fe58f6 100644 --- a/stdlib/Sort/Project.toml +++ b/stdlib/Sort/Project.toml @@ -6,7 +6,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" [targets] -test = ["Test", "OffsetArrays"] +test = ["Test"] diff --git a/stdlib/Sort/test/runtests.jl b/stdlib/Sort/test/runtests.jl index 55aa11851d355..aa74ef1b51af2 100644 --- a/stdlib/Sort/test/runtests.jl +++ b/stdlib/Sort/test/runtests.jl @@ -1,11 +1,12 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -module SortingTests - using Base.Order using Random using Test -using OffsetArrays + +const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test") +isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl")) +using .Main.OffsetArrays @testset "Order" begin @test Forward == ForwardOrdering() @@ -874,5 +875,3 @@ end println("TESTS ACTUALLY RAN 2") # TODO: deleteme end # The "searchsorted" testset is at the end of the file because it is slow. - -end From 0ec2d027d454b9766424a59f04c76e283a23f7b5 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 15:16:13 +0600 Subject: [PATCH 21/24] fixups for tests (remove debug print statements) --- stdlib/Sort/test/runtests.jl | 2 -- test/compiler/sort.jl | 2 -- 2 files changed, 4 deletions(-) diff --git a/stdlib/Sort/test/runtests.jl b/stdlib/Sort/test/runtests.jl index aa74ef1b51af2..48eeb6c9d24ad 100644 --- a/stdlib/Sort/test/runtests.jl +++ b/stdlib/Sort/test/runtests.jl @@ -871,7 +871,5 @@ end @test searchsorted(v, 0.1, rev=true) === 4:3 end end - - println("TESTS ACTUALLY RAN 2") # TODO: deleteme end # The "searchsorted" testset is at the end of the file because it is slow. diff --git a/test/compiler/sort.jl b/test/compiler/sort.jl index c4adbd646f9d1..beba0f833df5a 100644 --- a/test/compiler/sort.jl +++ b/test/compiler/sort.jl @@ -41,6 +41,4 @@ end @test sort(v; by, lt) == Core.Compiler.sort!(copy(v); by, < = lt) end end - - println("THE TESTS ACTUALLY RAN") # TODO: deleteme end From d0d50784ae1b3154c97b39cb799c3c754e085f85 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sat, 1 Oct 2022 16:48:10 +0600 Subject: [PATCH 22/24] try to fix precompile test --- test/precompile.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/precompile.jl b/test/precompile.jl index fc73231a3e308..ba078d7d8db59 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -375,7 +375,7 @@ precompile_test_harness(false) do dir :Distributed, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll, :LazyArtifacts, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra, :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf, - :Profile, :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :SharedArrays, :Sockets, + :Profile, :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :SharedArrays, :Sockets, :Sort, :SparseArrays, :TOML, :Tar, :Test, :UUIDs, :Unicode, :nghttp2_jll] ), From f26ee614f965deb3884932d2378878d9caabd3d3 Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sun, 2 Oct 2022 07:02:10 +0600 Subject: [PATCH 23/24] Revert e0a44ad0 and add explicit @ref targets Not an ideal solution because we no longer use automatic @ref, but produces the same result. --- stdlib/Sort/docs/src/index.md | 8 ++++---- stdlib/Sort/src/Sort.jl | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/stdlib/Sort/docs/src/index.md b/stdlib/Sort/docs/src/index.md index 35f3dd93cc5d0..93bd45e2002d3 100644 --- a/stdlib/Sort/docs/src/index.md +++ b/stdlib/Sort/docs/src/index.md @@ -136,10 +136,10 @@ partialsortperm! There are currently four sorting algorithms available in base Julia: - * `InsertionSort` - * `QuickSort` - * `PartialQuickSort(k)` - * `MergeSort` + * [`InsertionSort`](@ref Sort.InsertionSort) + * [`QuickSort`](@ref Sort.QuickSort) + * [`PartialQuickSort(k)`](@ref Sort.PartialQuickSort) + * [`MergeSort`](@ref Sort.MergeSort) `InsertionSort` is an O(n^2) stable sorting algorithm. It is efficient for very small `n`, and is used internally by `QuickSort`. diff --git a/stdlib/Sort/src/Sort.jl b/stdlib/Sort/src/Sort.jl index fb6f4c27d7398..8f375c80626a0 100644 --- a/stdlib/Sort/src/Sort.jl +++ b/stdlib/Sort/src/Sort.jl @@ -55,7 +55,7 @@ end issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) Test whether a vector is in sorted order. The `lt`, `by` and `rev` keywords modify what -order is considered to be sorted just as they do for `sort`. +order is considered to be sorted just as they do for [`sort`](@ref Base.sort). # Examples ```jldoctest @@ -144,7 +144,7 @@ partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}; """ partialsort(v, k, by=, lt=, rev=false) -Variant of `partialsort!` which copies `v` before partially sorting it, thereby returning the +Variant of [`partialsort!`](@ref Base.partialsort!) which copies `v` before partially sorting it, thereby returning the same thing as `partialsort!` but leaving `v` unmodified. """ partialsort(v::AbstractVector, k::Union{Integer,OrdinalRange}; kws...) = @@ -289,7 +289,7 @@ according to the order specified by the `by`, `lt` and `rev` keywords, assuming is already sorted in that order. Return an empty range located at the insertion point if `a` does not contain values equal to `x`. -See also: `insorted`, `searchsortedfirst`, `sort`, [`findall`](@ref). +See also: [`insorted`](@ref Base.insorted), [`searchsortedfirst`](@ref Base.searchsortedfirst), [`sort`](@ref Base.sort), [`findall`](@ref). # Examples ```jldoctest @@ -319,7 +319,7 @@ specified order. Return `lastindex(a) + 1` if `x` is greater than all values in `insert!`ing `x` at this index will maintain sorted order. -See also: `searchsortedlast`, `searchsorted`, [`findfirst`](@ref). +See also: [`searchsortedlast`](@ref Base.searchsortedlast), [`searchsorted`](@ref Base.searchsorted), [`findfirst`](@ref). # Examples ```jldoctest @@ -372,7 +372,7 @@ julia> searchsortedlast([1, 2, 4, 5, 5, 7], 0) # no match, insert at start Determine whether an item `x` is in the sorted collection `a`, in the sense that it is [`==`](@ref) to one of the values of the collection according to the order specified by the `by`, `lt` and `rev` keywords, assuming that `a` is already -sorted in that order, see `sort` for the keywords. +sorted in that order, see [`sort`](@ref Base.sort) for the keywords. See also [`in`](@ref). @@ -430,14 +430,14 @@ end Indicate that a sorting function should use the partial quick sort algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest -to largest, finding them and sorting them using `QuickSort`. +to largest, finding them and sorting them using [`QuickSort`](@ref Sort.QuickSort). Characteristics: * *not stable*: does not preserve the ordering of elements which compare equal (e.g. "a" and "A" in a sort of letters which ignores case). * *in-place* in memory. - * *divide-and-conquer*: sort strategy similar to `MergeSort`. + * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref Sort.MergeSort). """ struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm k::T @@ -960,7 +960,7 @@ end """ sort(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) -Variant of `sort!` that returns a sorted copy of `v` leaving `v` itself unmodified. +Variant of [`sort!`](@ref Base.sort!) that returns a sorted copy of `v` leaving `v` itself unmodified. # Examples ```jldoctest @@ -1020,7 +1020,7 @@ partialsortperm(v::AbstractVector, k::Union{Integer,OrdinalRange}; kwargs...) = """ partialsortperm!(ix, v, k; by=, lt=, rev=false, initialized=false) -Like `partialsortperm`, but accepts a preallocated index vector `ix` the same size as +Like [`partialsortperm`](@ref Base.partialsortperm), but accepts a preallocated index vector `ix` the same size as `v`, which is used to store (a permutation of) the indices of `v`. If the index vector `ix` is initialized with the indices of `v` (or a permutation thereof), `initialized` should be set to @@ -1092,11 +1092,11 @@ end Return a permutation vector or array `I` that puts `A[I]` in sorted order along the given dimension. If `A` has more than one dimension, then the `dims` keyword argument must be specified. The order is specified -using the same keywords as `sort!`. The permutation is guaranteed to be stable even +using the same keywords as [`sort!`](@ref Base.sort!). The permutation is guaranteed to be stable even if the sorting algorithm is unstable, meaning that indices of equal elements appear in ascending order. -See also `sortperm!`, `partialsortperm`, [`invperm`](@ref), [`indexin`](@ref). +See also [`sortperm!`](@ref Base.sortperm!), [`partialsortperm`](@ref Base.partialsortperm), [`invperm`](@ref), [`indexin`](@ref). To sort slices of an array, refer to [`sortslices`](@ref). # Examples @@ -1159,7 +1159,7 @@ end """ sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, initialized::Bool=false, [dims::Integer]) -Like `sortperm`, but accepts a preallocated index vector or array `ix` with the same `axes` as `A`. If `initialized` is `false` +Like [`sortperm`](@ref Base.sortperm), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`. If `initialized` is `false` (the default), `ix` is initialized to contain the values `LinearIndices(A)`. # Examples @@ -1241,7 +1241,7 @@ end sort(A; dims::Integer, alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) Sort a multidimensional array `A` along the given dimension. -See `sort!` for a description of possible +See [`sort!`](@ref Base.sort!) for a description of possible keyword arguments. To sort slices of an array, refer to [`sortslices`](@ref). @@ -1300,7 +1300,7 @@ end sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward) Sort the multidimensional array `A` along dimension `dims`. -See `sort!` for a description of possible keyword arguments. +See [`sort!`](@ref Base.sort!) for a description of possible keyword arguments. To sort slices of an array, refer to [`sortslices`](@ref). From 0c5c6a802ad4fd3e9372ffd518024fbfc032b56d Mon Sep 17 00:00:00 2001 From: Lilith Hafner Date: Sun, 2 Oct 2022 07:07:55 +0600 Subject: [PATCH 24/24] fix outdated comment (cf #46877) --- base/compiler/sort.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl index 1998e0a8d9742..71d2f8a51cd59 100644 --- a/base/compiler/sort.jl +++ b/base/compiler/sort.jl @@ -70,7 +70,7 @@ for s in [:searchsortedfirst, :searchsortedlast, :searchsorted] end end -# An unstable sorting algorithm for constructing IncrementalCompact. +# An unstable sorting algorithm for internal use function sort!(v::Vector; by::Function=identity, (<)::Function=<) isempty(v) && return v # This branch is hit 95% of the time