|
1 | | -# These functions define a heap |
| 1 | +# heap.jl |
| 2 | +# |
| 3 | +# This file defines Heap objects and a variety of utility functions. |
| 4 | +# |
2 | 5 | # Example usage of the Heap object: |
3 | | -# You can create an empty heap this way: |
4 | | -# h = Heap(Float64) |
5 | | -# This defaults to using < for comparison, so is a min-heap. |
| 6 | +# You can create an empty min-heap this way: |
| 7 | +# h = MinHeap(Float64) |
6 | 8 | # |
7 | | -# A more complex and complete example, yielding a max-heap: |
8 | | -# h = Heap(>,rand(3)) # initialize heap with 3 random points |
| 9 | +# A more complex and complete example with a max-heap: |
| 10 | +# h = MaxHeap(rand(3)) # initialize heap with 3 random points |
9 | 11 | # for i = 1:10 |
10 | 12 | # push!(h,rand()) # add more points |
11 | 13 | # end |
12 | 14 | # length(h) |
13 | 15 | # max_val = pop!(h) |
14 | | -# |
15 | | -# Example using pure vectors (avoids making any copies): |
| 16 | +# |
| 17 | +# You can also work with indexed heaps: |
| 18 | +# z = rand(8) |
| 19 | +# h = MinHeapIndirect(typeof(z)) |
| 20 | +# for i = 1:length(z) |
| 21 | +# push!(h,z[i]) |
| 22 | +# end |
| 23 | +# min_index, min_val = pop!(h) |
| 24 | +# |
| 25 | +# Finally, you can do min-heaps using pure vectors. This avoids the |
| 26 | +# need to make a copy of the data: |
16 | 27 | # z = rand(8) |
17 | 28 | # vector2heap!(z) |
18 | 29 | # isheap(z) |
19 | 30 | # min_val = heap_pop!(z) |
20 | 31 | # heap2rsorted!(z) |
| 32 | +# You can also do indirect min-heaps this way, by supplying an index |
| 33 | +# vector as the first argument. |
| 34 | + |
| 35 | +# Timothy E. Holy, 2012 |
21 | 36 |
|
22 | 37 | ## Function definitions for operating on vectors ## |
23 | 38 |
|
| 39 | +# A "direct heap" is represented as a vector, each entry containing |
| 40 | +# the "value" (key) of a particular item. An "indirect heap" is |
| 41 | +# stored as a vector of "integer pointers" (denoted by the variable |
| 42 | +# iptr), and the key of node i is value[iptr[i]]. Representing the |
| 43 | +# heap indirectly (requiring lookup of the value) has some cost, but |
| 44 | +# is useful when the item index is the more fundamentally-interesting |
| 45 | +# quantity. |
| 46 | + |
| 47 | +# For an indirect heap, note that value stores the entire set of |
| 48 | +# values ever added to the heap, whereas iptr reflects the current |
| 49 | +# state of the heap. Hence, value may be a longer vector than iptr, |
| 50 | +# if items have been popped off the heap. |
| 51 | + |
| 52 | +# These functions implement a min-heap. You can get a max-heap using |
| 53 | +# the Heap objects below. |
| 54 | + |
24 | 55 |
|
25 | 56 | # This is a "percolate down" function, used by several other |
26 | 57 | # functions. "percolate up" is implemented in heap_push. |
27 | | -function _heapify!{T}(cmp::Function,x::Vector{T},i::Int,len::Int) |
28 | | - il = 2*i # index of left child |
| 58 | +# direct version: |
| 59 | +function _heapify!{T}(value::Vector{T},i::Int,len::Int) |
| 60 | + il = 2*i # node index of left child |
29 | 61 | while il <= len |
30 | | - # Among node i and its two children, find the extreme value |
31 | | - #(e.g., the smallest when using < for comparison) |
32 | | - iextreme = cmp(x[il],x[i]) ? il : i # index of extreme value |
| 62 | + # Among node i and its two children, find the smallest value |
| 63 | + ismallest = isless(value[il],value[i]) ? il : i # node index of smallest value |
33 | 64 | if il < len |
34 | | - iextreme = cmp(x[il+1],x[iextreme]) ? il+1 : iextreme |
| 65 | + ismallest = isless(value[il+1],value[ismallest]) ? il+1 : ismallest |
35 | 66 | end |
36 | | - if iextreme == i |
| 67 | + if ismallest == i |
37 | 68 | return # The heap below this node is fine |
38 | 69 | end |
39 | | - # Put the extreme value at i via a swap |
40 | | - tmp = x[iextreme] |
41 | | - x[iextreme] = x[i] |
42 | | - x[i] = tmp |
| 70 | + # Put the smallest value at i via a swap of their iptrs |
| 71 | + value[ismallest], value[i] = value[i], value[ismallest] |
43 | 72 | # Descend to the modified child |
44 | | - i = iextreme |
| 73 | + i = ismallest |
| 74 | + il = 2*i |
| 75 | + end |
| 76 | +end |
| 77 | +# indirect version: |
| 78 | +function _heapify!{T}(iptr::Vector{Int},value::Vector{T},i::Int,len::Int) |
| 79 | + il = 2*i |
| 80 | + while il <= len |
| 81 | + ismallest = isless(value[iptr[il]],value[iptr[i]]) ? il : i |
| 82 | + if il < len |
| 83 | + ismallest = isless(value[iptr[il+1]],value[iptr[ismallest]]) ? il+1 : ismallest |
| 84 | + end |
| 85 | + if ismallest == i |
| 86 | + return |
| 87 | + end |
| 88 | + iptr[ismallest], iptr[i] = iptr[i], iptr[ismallest] |
| 89 | + i = ismallest |
45 | 90 | il = 2*i |
46 | 91 | end |
47 | 92 | end |
48 | 93 |
|
49 | 94 |
|
50 | 95 | # Convert an arbitrary vector into heap storage format |
51 | | -function vector2heap!{T}(cmp::Function, x::Vector{T}) |
52 | | - for i = convert(Int,ifloor(length(x)/2)):-1:1 |
53 | | - _heapify!(cmp,x,i,length(x)) |
| 96 | +function vector2heap!{T}(value::Vector{T}) |
| 97 | + for i = convert(Int,ifloor(length(value)/2)):-1:1 |
| 98 | + _heapify!(value,i,length(value)) |
54 | 99 | end |
55 | 100 | end |
56 | | -function vector2heap!{T}(x::Vector{T}) |
57 | | - vector2heap!(<,x) |
| 101 | +function vector2heap!{T}(iptr::Vector{Int},value::Vector{T}) |
| 102 | + for i = convert(Int,ifloor(length(iptr)/2)):-1:1 |
| 103 | + _heapify!(iptr,value,i,length(iptr)) |
| 104 | + end |
58 | 105 | end |
59 | 106 |
|
60 | 107 | # Test whether a vector is a valid heap |
61 | | -function isheap{T}(cmp::Function, x::Vector{T}) |
62 | | - for i = 1:convert(Int,ifloor(length(x)/2)) |
| 108 | +function isheap{T}(value::Vector{T}) |
| 109 | + for i = 1:convert(Int,ifloor(length(iptr)/2)) |
63 | 110 | i2 = 2*i |
64 | | - if !cmp(x[i],x[i2]) |
| 111 | + if isless(value[i2],value[i]) |
65 | 112 | return false |
66 | 113 | end |
67 | | - if i2 < length(x) && !cmp(x[i],x[i2+1]) |
| 114 | + if i2 < length(x) && isless(value[i2+1],value[i]) |
68 | 115 | return false |
69 | 116 | end |
70 | 117 | end |
71 | 118 | return true |
72 | 119 | end |
73 | | -function isheap{T}(x::Vector{T}) |
74 | | - isheap(<,x) |
| 120 | +function isheap{T}(iptr::Vector{Int},value::Vector{T}) |
| 121 | + for i = 1:convert(Int,ifloor(length(iptr)/2)) |
| 122 | + i2 = 2*i |
| 123 | + if isless(value[iptr[i2]],value[iptr[i]]) |
| 124 | + return false |
| 125 | + end |
| 126 | + if i2 < length(x) && isless(value[iptr[i2+1]],value[iptr[i]]) |
| 127 | + return false |
| 128 | + end |
| 129 | + end |
| 130 | + return true |
75 | 131 | end |
76 | 132 |
|
77 | 133 | # Add a new item to a heap |
78 | | -function heap_push!{T}(cmp::Function, x::Vector{T},item::T) |
79 | | - # Append new element at the bottom |
80 | | - push(x,item) |
| 134 | +function heap_push!{T}(value::Vector{T},newvalue::T) |
| 135 | + # Append the new value at the bottom |
| 136 | + push(value,newvalue) |
81 | 137 | # Let the new item percolate up until stopped by a more-extreme parent |
82 | | - i = length(x) |
| 138 | + i = length(value) |
83 | 139 | ip = convert(Int,ifloor(i/2)) # index of parent |
84 | | - while i > 1 && cmp(x[i],x[ip]) |
| 140 | + while i > 1 && isless(value[i],value[ip]) |
85 | 141 | # Swap i and its parent |
86 | | - tmp = x[ip] |
87 | | - x[ip] = x[i] |
88 | | - x[i] = tmp |
| 142 | + value[i], value[ip] = value[ip], value[i] |
89 | 143 | # Traverse up the tree |
90 | 144 | i = ip |
91 | 145 | ip = convert(Int,ifloor(i/2)) |
92 | 146 | end |
93 | 147 | end |
94 | | -function heap_push!{T}(x::Vector{T},item::T) |
95 | | - heap_push!(<,x,item) |
| 148 | +function heap_push!{T}(iptr::Vector{Int},value::Vector{T},newvalue::T) |
| 149 | + push(value,newvalue) |
| 150 | + push(iptr,length(value)) |
| 151 | + i = length(iptr) |
| 152 | + ip = convert(Int,ifloor(i/2)) |
| 153 | + while i > 1 && isless(value[iptr[i]],value[iptr[ip]]) |
| 154 | + iptr[i], iptr[ip] = iptr[ip], iptr[i] |
| 155 | + i = ip |
| 156 | + ip = convert(Int,ifloor(i/2)) |
| 157 | + end |
96 | 158 | end |
97 | 159 |
|
98 | 160 | # Remove the root node from the heap, leaving the remaining values in |
99 | 161 | # a valid heap |
100 | | -function heap_pop!{T}(cmp::Function, x::Vector{T}) |
101 | | - # Save the value we want to return |
102 | | - extreme_item = x[1] |
| 162 | +function heap_pop!{T}(value::Vector{T}) |
| 163 | + # Save the item we want to return |
| 164 | + extreme_item = value[1] |
103 | 165 | # We need to shorten the list, so replace the former top with the |
104 | 166 | # last item, then let it percolate down |
105 | | - x[1] = x[end] |
106 | | - pop(x) |
107 | | - _heapify!(cmp,x,1,length(x)) |
| 167 | + value[1] = value[end] |
| 168 | + pop(value) |
| 169 | + _heapify!(value,1,length(value)) |
108 | 170 | return extreme_item |
109 | 171 | end |
110 | | -function heap_pop!{T}(x::Vector{T}) |
111 | | - extreme_item = heap_pop!(<,x) |
| 172 | +function heap_pop!{T}(iptr::Vector{Int},value::Vector{T}) |
| 173 | + extreme_item = iptr[1] |
| 174 | + iptr[1] = iptr[end] |
| 175 | + pop(iptr) |
| 176 | + _heapify!(iptr,value,1,length(iptr)) |
112 | 177 | return extreme_item |
113 | 178 | end |
114 | 179 |
|
115 | 180 | # From a heap, return a sorted vector. This is implemented efficiently |
116 | 181 | # if the sorting is in the reverse order of the comparison function. |
117 | | -function heap2rsorted!{T}(cmp::Function, x::Vector{T}) |
118 | | - for i = length(x):-1:2 |
| 182 | +function heap2rsorted!{T}(value::Vector{T}) |
| 183 | + for i = length(value):-1:2 |
119 | 184 | # Swap the root with i, the last unsorted position |
120 | | - tmp = x[1] |
121 | | - x[1] = x[i] |
122 | | - x[i] = tmp |
| 185 | + value[1], value[i] = value[i], value[1] |
123 | 186 | # The heap portion now has length i-1, but needs fixing up |
124 | 187 | # starting with the root |
125 | | - _heapify!(cmp,x,1,i-1) |
| 188 | + _heapify!(value,1,i-1) |
126 | 189 | end |
127 | 190 | end |
128 | | -function heap2rsorted!{T}(x::Vector{T}) |
129 | | - heap2rsorted!(<,x) |
| 191 | +function heap2rsorted!{T}(iptr::Vector{Int},value::Vector{T}) |
| 192 | + for i = length(iptr):-1:2 |
| 193 | + iptr[1], iptr[i] = iptr[i], iptr[1] |
| 194 | + _heapify!(iptr,value,1,i-1) |
| 195 | + end |
130 | 196 | end |
131 | 197 |
|
132 | 198 |
|
133 | 199 |
|
134 | | -## Heap object ## |
| 200 | +## Heap objects ## |
135 | 201 |
|
136 | | -type Heap{T} |
137 | | - cmp::Function |
138 | | - data::Array{T,1} |
| 202 | +abstract Heap |
| 203 | +abstract HeapDirect <: Heap |
| 204 | +abstract HeapIndirect <: Heap |
139 | 205 |
|
140 | | - function Heap(cmp::Function,x::Array{T,1}) |
141 | | - data = copy(x) |
142 | | - vector2heap!(cmp,data) |
143 | | - new(cmp,data) |
| 206 | +# MinHeap |
| 207 | +type MinHeap{T} <: HeapDirect |
| 208 | + value::Vector{T} |
| 209 | + |
| 210 | + function MinHeap(v::Vector{T}) |
| 211 | + value = copy(v) |
| 212 | + vector2heap!(value) |
| 213 | + new(value) |
144 | 214 | end |
145 | 215 | end |
146 | | -Heap{T}(cmp::Function,x::Vector{T}) = Heap{T}(cmp,x) |
147 | | -Heap{T}(x::Vector{T}) = Heap{T}(<,x) |
148 | | -Heap{T}(cmp::Function,::Type{T}) = Heap{T}(cmp,zeros(T,0)) |
149 | | -Heap{T}(::Type{T}) = Heap{T}(<,zeros(T,0)) |
| 216 | +MinHeap{T}(v::Vector{T}) = MinHeap{T}(v) |
| 217 | +MinHeap{T}(::Type{T}) = MinHeap{T}(zeros(T,0)) |
150 | 218 |
|
151 | | -function push!{T}(h::Heap{T},item::T) |
152 | | - heap_push!(h.cmp,h.data,item) |
| 219 | +function push!{T}(h::MinHeap{T},item::T) |
| 220 | + heap_push!(h.data,item) |
153 | 221 | end |
154 | 222 |
|
155 | | -function pop!{T}(h::Heap{T}) |
156 | | - extreme_item = heap_pop!(h.cmp,h.data) |
157 | | - return extreme_item |
| 223 | +function pop!{T}(h::MinHeap{T}) |
| 224 | + min_item = heap_pop!(h.data) |
| 225 | + return min_item |
| 226 | +end |
| 227 | + |
| 228 | +function length{T}(h::HeapDirect) |
| 229 | + return length(h.value) |
| 230 | +end |
| 231 | + |
| 232 | +function isempty{T}(h::HeapDirect) |
| 233 | + return isempty(h.value) |
| 234 | +end |
| 235 | + |
| 236 | +# MaxHeap |
| 237 | +type MaxHeap{T} <: HeapDirect |
| 238 | + value::Vector{T} |
| 239 | + |
| 240 | + function MaxHeap(v::Vector{T}) |
| 241 | + value = copy(v) |
| 242 | + vector2heap!(value) |
| 243 | + new(value) |
| 244 | + end |
| 245 | +end |
| 246 | +MaxHeap{T}(v::Vector{T}) = MaxHeap{T}(v) |
| 247 | +MaxHeap{T}(::Type{T}) = MaxHeap{T}(zeros(T,0)) |
| 248 | + |
| 249 | +function push!{T}(h::MaxHeap{T},item::T) |
| 250 | + heap_push!(h.data,-item) |
| 251 | +end |
| 252 | + |
| 253 | +function pop!{T}(h::MaxHeap{T}) |
| 254 | + max_item = -heap_pop!(h.data) |
| 255 | + return max_item |
| 256 | +end |
| 257 | + |
| 258 | +# MinHeapIndirect (an indexed heap) |
| 259 | +type MinHeapIndirect{T} <: HeapIndirect |
| 260 | + index::Vector{Int} |
| 261 | + value::Vector{T} |
| 262 | + |
| 263 | + function MinHeapIndirect(v::Vector{T}) |
| 264 | + value = copy(v) |
| 265 | + index = linspace(1,length(v),length(v)) |
| 266 | + vector2heap!(index,value) |
| 267 | + new(index,value) |
| 268 | + end |
| 269 | +end |
| 270 | +#MinHeapIndirect{T}(i::Vector{Int},v::Vector{T}) = MinHeapIndirect{T}(i,v) |
| 271 | +MinHeapIndirect{T}(v::Vector{T}) = MinHeapIndirect{T}(v) |
| 272 | +MinHeapIndirect{T}(::Type{T}) = MinHeapIndirect{T}(zeros(T,0)) |
| 273 | + |
| 274 | +function push!{T}(h::MinHeapIndirect{T},newvalue::T) |
| 275 | + heap_push!(h.index,h.value,newvalue) |
| 276 | +end |
| 277 | + |
| 278 | +function pop!{T}(h::MinHeapIndirect{T}) |
| 279 | + min_index = heap_pop!(h.index,h.value) |
| 280 | + return (min_index, h.value[min_index]) |
| 281 | +end |
| 282 | + |
| 283 | +function length{T}(h::HeapIndirect) |
| 284 | + return length(h.index) |
| 285 | +end |
| 286 | +function isempty{T}(h::HeapIndirect) |
| 287 | + return isempty(h.index) |
| 288 | +end |
| 289 | + |
| 290 | +# MaxHeapIndirect (an indexed heap) |
| 291 | +type MaxHeapIndirect{T} <: HeapIndirect |
| 292 | + index::Vector{Int} |
| 293 | + value::Vector{T} |
| 294 | + |
| 295 | + function HeapIndirect(v::Vector{T}) |
| 296 | + value = copy(v) |
| 297 | + index = linspace(1,length(v),length(v)) |
| 298 | + vector2heap!(index,value) |
| 299 | + new(index,value) |
| 300 | + end |
| 301 | +end |
| 302 | +MaxHeapIndirect{T}(v::Vector{T}) = MaxHeapIndirect{T}(v) |
| 303 | +MaxHeapIndirect{T}(::Type{T}) = MaxHeapIndirect{T}(zeros(T,0)) |
| 304 | + |
| 305 | +function push!{T}(h::MaxHeapIndirect{T},newvalue::T) |
| 306 | + heap_push!(h.index,h.value,-newvalue) |
158 | 307 | end |
159 | 308 |
|
160 | | -function length{T}(h::Heap{T}) |
161 | | - return length(h.data) |
| 309 | +function pop!{T}(h::MaxHeapIndirect{T}) |
| 310 | + min_index = heap_pop!(h.index,h.value) |
| 311 | + return min_index, -h.value[index] |
162 | 312 | end |
0 commit comments