Skip to content

Commit f00957b

Browse files
Refactor final-gc-lowering (#50741)
1 parent 8066c29 commit f00957b

File tree

12 files changed

+84
-195
lines changed

12 files changed

+84
-195
lines changed

src/jl_exported_funcs.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,6 @@
583583
YY(LLVMExtraMPMAddCPUFeaturesPass) \
584584
YY(LLVMExtraMPMAddRemoveNIPass) \
585585
YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
586-
YY(LLVMExtraMPMAddFinalLowerGCPass) \
587586
YY(LLVMExtraMPMAddMultiVersioningPass) \
588587
YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
589588
YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
@@ -595,6 +594,7 @@
595594
YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \
596595
YY(LLVMExtraFPMAddLowerExcHandlersPass) \
597596
YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
597+
YY(LLVMExtraFPMAddFinalLowerGCPass) \
598598
YY(LLVMExtraLPMAddJuliaLICMPass) \
599599
YY(JLJITGetLLVMOrcExecutionSession) \
600600
YY(JLJITGetJuliaOJIT) \

src/llvm-final-gc-lowering.cpp

Lines changed: 58 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ using namespace llvm;
4141

4242
struct FinalLowerGC: private JuliaPassContext {
4343
bool runOnFunction(Function &F);
44-
bool doInitialization(Module &M);
45-
bool doFinalization(Module &M);
4644

4745
private:
4846
Function *queueRootFunc;
@@ -53,7 +51,7 @@ struct FinalLowerGC: private JuliaPassContext {
5351
Type *T_size;
5452

5553
// Lowers a `julia.new_gc_frame` intrinsic.
56-
Value *lowerNewGCFrame(CallInst *target, Function &F);
54+
void lowerNewGCFrame(CallInst *target, Function &F);
5755

5856
// Lowers a `julia.push_gc_frame` intrinsic.
5957
void lowerPushGCFrame(CallInst *target, Function &F);
@@ -62,26 +60,26 @@ struct FinalLowerGC: private JuliaPassContext {
6260
void lowerPopGCFrame(CallInst *target, Function &F);
6361

6462
// Lowers a `julia.get_gc_frame_slot` intrinsic.
65-
Value *lowerGetGCFrameSlot(CallInst *target, Function &F);
63+
void lowerGetGCFrameSlot(CallInst *target, Function &F);
6664

6765
// Lowers a `julia.gc_alloc_bytes` intrinsic.
68-
Value *lowerGCAllocBytes(CallInst *target, Function &F);
66+
void lowerGCAllocBytes(CallInst *target, Function &F);
6967

7068
// Lowers a `julia.queue_gc_root` intrinsic.
71-
Value *lowerQueueGCRoot(CallInst *target, Function &F);
69+
void lowerQueueGCRoot(CallInst *target, Function &F);
7270

7371
// Lowers a `julia.safepoint` intrinsic.
74-
Value *lowerSafepoint(CallInst *target, Function &F);
72+
void lowerSafepoint(CallInst *target, Function &F);
7573
};
7674

77-
Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
75+
void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
7876
{
7977
++NewGCFrameCount;
8078
assert(target->arg_size() == 1);
8179
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);
8280

8381
// Create the GC frame.
84-
IRBuilder<> builder(target->getNextNode());
82+
IRBuilder<> builder(target);
8583
auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2));
8684
gcframe_alloca->setAlignment(Align(16));
8785
// addrspacecast as needed for non-0 alloca addrspace
@@ -92,7 +90,8 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
9290
auto ptrsize = F.getParent()->getDataLayout().getPointerSize();
9391
builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);
9492

95-
return gcframe;
93+
target->replaceAllUsesWith(gcframe);
94+
target->eraseFromParent();
9695
}
9796

9897
void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
@@ -102,8 +101,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
102101
auto gcframe = target->getArgOperand(0);
103102
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);
104103

105-
IRBuilder<> builder(target->getContext());
106-
builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
104+
IRBuilder<> builder(target);
107105
StoreInst *inst = builder.CreateAlignedStore(
108106
ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
109107
builder.CreateBitCast(
@@ -123,6 +121,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
123121
gcframe,
124122
builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)),
125123
Align(sizeof(void*)));
124+
target->eraseFromParent();
126125
}
127126

128127
void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
@@ -131,8 +130,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
131130
assert(target->arg_size() == 1);
132131
auto gcframe = target->getArgOperand(0);
133132

134-
IRBuilder<> builder(target->getContext());
135-
builder.SetInsertPoint(target);
133+
IRBuilder<> builder(target);
136134
Instruction *gcpop =
137135
cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
138136
Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)), "frame.prev");
@@ -143,55 +141,53 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
143141
PointerType::get(T_prjlvalue, 0)),
144142
Align(sizeof(void*)));
145143
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
144+
target->eraseFromParent();
146145
}
147146

148-
Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
147+
void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
149148
{
150149
++GetGCFrameSlotCount;
151150
assert(target->arg_size() == 2);
152151
auto gcframe = target->getArgOperand(0);
153152
auto index = target->getArgOperand(1);
154153

155154
// Initialize an IR builder.
156-
IRBuilder<> builder(target->getContext());
157-
builder.SetInsertPoint(target);
155+
IRBuilder<> builder(target);
158156

159157
// The first two slots are reserved, so we'll add two to the index.
160158
index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));
161159

162160
// Lower the intrinsic as a GEP.
163161
auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
164162
gep->takeName(target);
165-
return gep;
163+
target->replaceAllUsesWith(gep);
164+
target->eraseFromParent();
166165
}
167166

168-
Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
167+
void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
169168
{
170169
++QueueGCRootCount;
171170
assert(target->arg_size() == 1);
172171
target->setCalledFunction(queueRootFunc);
173-
return target;
174172
}
175173

176-
Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
174+
void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
177175
{
178176
++SafepointCount;
179177
assert(target->arg_size() == 1);
180-
IRBuilder<> builder(target->getContext());
181-
builder.SetInsertPoint(target);
178+
IRBuilder<> builder(target);
182179
Value* signal_page = target->getOperand(0);
183-
Value* load = builder.CreateLoad(T_size, signal_page, true);
184-
return load;
180+
builder.CreateLoad(T_size, signal_page, true);
181+
target->eraseFromParent();
185182
}
186183

187-
Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
184+
void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
188185
{
189186
++GCAllocBytesCount;
190187
assert(target->arg_size() == 3);
191188
CallInst *newI;
192189

193190
IRBuilder<> builder(target);
194-
builder.SetCurrentDebugLocation(target->getDebugLoc());
195191
auto ptls = target->getArgOperand(0);
196192
auto type = target->getArgOperand(2);
197193
Attribute derefAttr;
@@ -222,86 +218,13 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
222218
newI->setAttributes(newI->getCalledFunction()->getAttributes());
223219
newI->addRetAttr(derefAttr);
224220
newI->takeName(target);
225-
return newI;
226-
}
227-
228-
bool FinalLowerGC::doInitialization(Module &M) {
229-
// Initialize platform-agnostic references.
230-
initAll(M);
231-
232-
// Initialize platform-specific references.
233-
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
234-
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
235-
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
236-
allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
237-
T_size = M.getDataLayout().getIntPtrType(M.getContext());
238-
239-
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
240-
unsigned j = 0;
241-
for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
242-
if (!functionList[i])
243-
continue;
244-
if (i != j)
245-
functionList[j] = functionList[i];
246-
j++;
247-
}
248-
if (j != 0)
249-
appendToCompilerUsed(M, ArrayRef<GlobalValue*>(functionList, j));
250-
return true;
251-
}
252-
253-
bool FinalLowerGC::doFinalization(Module &M)
254-
{
255-
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
256-
queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
257-
auto used = M.getGlobalVariable("llvm.compiler.used");
258-
if (!used)
259-
return false;
260-
SmallPtrSet<Constant*, 16> InitAsSet(
261-
functionList,
262-
functionList + sizeof(functionList) / sizeof(void*));
263-
bool changed = false;
264-
SmallVector<Constant*, 16> init;
265-
ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
266-
for (auto &Op : CA->operands()) {
267-
Constant *C = cast_or_null<Constant>(Op);
268-
if (InitAsSet.count(C->stripPointerCasts())) {
269-
changed = true;
270-
continue;
271-
}
272-
init.push_back(C);
273-
}
274-
if (!changed)
275-
return false;
276-
used->eraseFromParent();
277-
if (init.empty())
278-
return true;
279-
ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
280-
used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
281-
ConstantArray::get(ATy, init), "llvm.compiler.used");
282-
used->setSection("llvm.metadata");
283-
return true;
284-
}
285-
286-
template<typename TIterator>
287-
static void replaceInstruction(
288-
Instruction *oldInstruction,
289-
Value *newInstruction,
290-
TIterator &it)
291-
{
292-
if (newInstruction != oldInstruction) {
293-
oldInstruction->replaceAllUsesWith(newInstruction);
294-
it = oldInstruction->eraseFromParent();
295-
}
296-
else {
297-
++it;
298-
}
221+
target->replaceAllUsesWith(newI);
222+
target->eraseFromParent();
299223
}
300224

301225
bool FinalLowerGC::runOnFunction(Function &F)
302226
{
303-
// Check availability of functions again since they might have been deleted.
304-
initFunctions(*F.getParent());
227+
initAll(*F.getParent());
305228
if (!pgcstack_getter && !adoptthread_func) {
306229
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
307230
return false;
@@ -314,55 +237,39 @@ bool FinalLowerGC::runOnFunction(Function &F)
314237
return false;
315238
}
316239
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
317-
318-
// Acquire intrinsic functions.
319-
auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
320-
auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame);
321-
auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame);
322-
auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
323-
auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
324-
auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
325-
auto safepointFunc = getOrNull(jl_intrinsics::safepoint);
240+
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
241+
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
242+
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
243+
allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
244+
T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
326245

327246
// Lower all calls to supported intrinsics.
328-
for (BasicBlock &BB : F) {
329-
for (auto it = BB.begin(); it != BB.end();) {
330-
auto *CI = dyn_cast<CallInst>(&*it);
331-
if (!CI) {
332-
++it;
247+
for (auto &BB : F) {
248+
for (auto &I : make_early_inc_range(BB)) {
249+
auto *CI = dyn_cast<CallInst>(&I);
250+
if (!CI)
333251
continue;
334-
}
335252

336253
Value *callee = CI->getCalledOperand();
337254
assert(callee);
338255

339-
if (callee == newGCFrameFunc) {
340-
replaceInstruction(CI, lowerNewGCFrame(CI, F), it);
341-
}
342-
else if (callee == pushGCFrameFunc) {
343-
lowerPushGCFrame(CI, F);
344-
it = CI->eraseFromParent();
345-
}
346-
else if (callee == popGCFrameFunc) {
347-
lowerPopGCFrame(CI, F);
348-
it = CI->eraseFromParent();
349-
}
350-
else if (callee == getGCFrameSlotFunc) {
351-
replaceInstruction(CI, lowerGetGCFrameSlot(CI, F), it);
352-
}
353-
else if (callee == GCAllocBytesFunc) {
354-
replaceInstruction(CI, lowerGCAllocBytes(CI, F), it);
355-
}
356-
else if (callee == queueGCRootFunc) {
357-
replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
358-
}
359-
else if (callee == safepointFunc) {
360-
lowerSafepoint(CI, F);
361-
it = CI->eraseFromParent();
362-
}
363-
else {
364-
++it;
365-
}
256+
#define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \
257+
do { \
258+
auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \
259+
if (intrinsic == callee) { \
260+
LOWER_INTRINSIC_FUNC(CI, F); \
261+
} \
262+
} while (0)
263+
264+
LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
265+
LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
266+
LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame);
267+
LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
268+
LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes);
269+
LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
270+
LOWER_INTRINSIC(safepoint, lowerSafepoint);
271+
272+
#undef LOWER_INTRINSIC
366273
}
367274
}
368275

@@ -380,44 +287,24 @@ struct FinalLowerGCLegacy: public FunctionPass {
380287

381288
private:
382289
bool runOnFunction(Function &F) override;
383-
bool doInitialization(Module &M) override;
384-
bool doFinalization(Module &M) override;
385290

386291
FinalLowerGC finalLowerGC;
387292
};
388293

389294
bool FinalLowerGCLegacy::runOnFunction(Function &F) {
390-
return finalLowerGC.runOnFunction(F);
391-
}
392-
393-
bool FinalLowerGCLegacy::doInitialization(Module &M) {
394-
return finalLowerGC.doInitialization(M);
395-
}
396-
397-
bool FinalLowerGCLegacy::doFinalization(Module &M) {
398-
auto ret = finalLowerGC.doFinalization(M);
295+
auto modified = finalLowerGC.runOnFunction(F);
399296
#ifdef JL_VERIFY_PASSES
400-
assert(!verifyLLVMIR(M));
297+
assert(!verifyLLVMIR(F));
401298
#endif
402-
return ret;
299+
return modified;
403300
}
404301

405-
406-
PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
302+
PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
407303
{
408-
auto finalLowerGC = FinalLowerGC();
409-
bool modified = false;
410-
modified |= finalLowerGC.doInitialization(M);
411-
for (auto &F : M.functions()) {
412-
if (F.isDeclaration())
413-
continue;
414-
modified |= finalLowerGC.runOnFunction(F);
415-
}
416-
modified |= finalLowerGC.doFinalization(M);
304+
if (FinalLowerGC().runOnFunction(F)) {
417305
#ifdef JL_VERIFY_PASSES
418-
assert(!verifyLLVMIR(M));
306+
assert(!verifyLLVMIR(F));
419307
#endif
420-
if (modified) {
421308
return PreservedAnalyses::allInSet<CFGAnalyses>();
422309
}
423310
return PreservedAnalyses::all();

0 commit comments

Comments
 (0)