From 83b9de8419d52c3578a36babcb89f7ce0a848aa9 Mon Sep 17 00:00:00 2001 From: Felix Daas Date: Mon, 26 May 2025 16:08:47 +0200 Subject: [PATCH 1/5] pass nonTemporal Attribute in cir.store to memref.store lowering --- clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp index 2428a76b4157..27af5db6b1f9 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp @@ -265,12 +265,12 @@ class CIRStoreOpLowering : public mlir::OpConversionPattern { mlir::Value value = emitToMemory(rewriter, op, adaptor.getValue()); if (findBaseAndIndices(adaptor.getAddr(), base, indices, eraseList, rewriter)) { - rewriter.replaceOpWithNewOp(op, value, base, - indices); + rewriter.replaceOpWithNewOp( + op, value, base, indices, op.getIsNontemporal()); eraseIfSafe(op.getAddr(), base, eraseList, rewriter); } else - rewriter.replaceOpWithNewOp(op, value, - adaptor.getAddr()); + rewriter.replaceOpWithNewOp( + op, value, adaptor.getAddr(), mlir::ValueRange{}, op.getIsNontemporal()); return mlir::LogicalResult::success(); } }; From 975893e291c796097ce5bce09216c24212f7794f Mon Sep 17 00:00:00 2001 From: Felix Daas Date: Wed, 28 May 2025 11:24:43 +0200 Subject: [PATCH 2/5] pass nonTemporal Attribute in cir.load to memref.load lowering --- .../Lowering/ThroughMLIR/LowerCIRToMLIR.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp index 27af5db6b1f9..0ec37d0da46c 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp @@ -235,13 +235,14 @@ class CIRLoadOpLowering : public mlir::OpConversionPattern { mlir::memref::LoadOp newLoad; if (findBaseAndIndices(adaptor.getAddr(), base, indices, eraseList, rewriter)) { - newLoad = - rewriter.create(op.getLoc(), base, indices); + newLoad = rewriter.create( + op.getLoc(), base, indices, op.getIsNontemporal()); // rewriter.replaceOpWithNewOp(op, base, indices); eraseIfSafe(op.getAddr(), base, eraseList, rewriter); } else - newLoad = - rewriter.create(op.getLoc(), adaptor.getAddr()); + newLoad = rewriter.create( + op.getLoc(), adaptor.getAddr(), mlir::ValueRange{}, + op.getIsNontemporal()); // Convert adapted result to its original type if needed. mlir::Value result = emitFromMemory(rewriter, op, newLoad.getResult()); @@ -270,7 +271,8 @@ class CIRStoreOpLowering : public mlir::OpConversionPattern { eraseIfSafe(op.getAddr(), base, eraseList, rewriter); } else rewriter.replaceOpWithNewOp( - op, value, adaptor.getAddr(), mlir::ValueRange{}, op.getIsNontemporal()); + op, value, adaptor.getAddr(), mlir::ValueRange{}, + op.getIsNontemporal()); return mlir::LogicalResult::success(); } }; @@ -1448,10 +1450,13 @@ mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule, pm.addPass(createConvertCIRToMLIRPass()); auto result = !mlir::failed(pm.run(theModule)); - if (!result) + if (!result) { + //just for debugging purposes + //TODO: remove before creating a PR + theModule->dump(); report_fatal_error( "The pass manager failed to lower CIR to MLIR standard dialects!"); - + } // Now that we ran all the lowering passes, verify the final output. if (theModule.verify().failed()) report_fatal_error( From a81b1e9054f3afe09fd4f5d748399aea254764ab Mon Sep 17 00:00:00 2001 From: Felix Daas Date: Fri, 30 May 2025 12:26:22 +0200 Subject: [PATCH 3/5] first version of fix for eraseifsafe + small improvements to canonical forOp lowering (TODO: cleanup + write tests) --- .../ThroughMLIR/LowerCIRLoopToSCF.cpp | 24 +++++++--- .../Lowering/ThroughMLIR/LowerCIRToMLIR.cpp | 46 ++++++++++++++++--- clang/test/CIR/Lowering/ThroughMLIR/for.cpp | 24 +++------- 3 files changed, 63 insertions(+), 31 deletions(-) diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp index 148038877649..06a2b1c120ee 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp @@ -303,16 +303,26 @@ void SCFLoop::transferToSCFForOp() { "Not support lowering loop with break, continue or if yet"); // Replace the IV usage to scf loop induction variable. if (isIVLoad(op, ivAddr)) { - // Replace CIR IV load with arith.addi scf.IV, 0. - // The replacement makes the SCF IV can be automatically propogated - // by OpAdaptor for individual IV user lowering. - // The redundant arith.addi can be removed by later MLIR passes. - rewriter->setInsertionPoint(op); - auto newIV = plusConstant(scfForOp.getInductionVar(), loc, 0); - rewriter->replaceOp(op, newIV.getDefiningOp()); + // Replace CIR IV load with scf.IV + // (i.e. remove the load op and replace the uses of the result of the CIR + // IV load with the scf.IV) + rewriter->replaceOp(op, scfForOp.getInductionVar()); } return mlir::WalkResult::advance(); }); + // If the IV was declared in the for op all uses have been replaced by the + // scf.IV and we can remove the alloca + initial store + + // The operations before the loop have been transferred to MLIR. + // So we need to go through getRemappedValue to find the value. + auto remapAddr = rewriter->getRemappedValue(ivAddr); + // If IV has more uses than the use in the initial store op keep it + if (!remapAddr || !remapAddr.hasOneUse()) + return; + + // otherwise remove the alloca + initial store op + rewriter->eraseOp(remapAddr.getDefiningOp()); + rewriter->eraseOp(*remapAddr.user_begin()); } void SCFLoop::transformToSCFWhileOp() { diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp index 0ec37d0da46c..3c0f01b8e847 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp @@ -28,6 +28,7 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Region.h" @@ -36,6 +37,7 @@ #include "mlir/IR/ValueRange.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -51,6 +53,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/Value.h" #include "llvm/Support/TimeProfiler.h" using namespace cir; @@ -209,16 +212,45 @@ static bool findBaseAndIndices(mlir::Value addr, mlir::Value &base, static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr, SmallVector &eraseList, mlir::ConversionPatternRewriter &rewriter) { + newAddr.getDefiningOp()->getParentOfType()->dump(); + oldAddr.dump(); + newAddr.dump(); + unsigned oldUsedNum = std::distance(oldAddr.getUses().begin(), oldAddr.getUses().end()); unsigned newUsedNum = 0; for (auto *user : newAddr.getUsers()) { - if (isa(*user) || isa(*user)) - ++newUsedNum; + user->dump(); + if (auto loadOpUser = mlir::dyn_cast_or_null(*user)) { + if (auto strideVal = loadOpUser.getIndices()[0]) { + strideVal.dump(); + mlir::dyn_cast(eraseList.back()) + .getOffsets()[0] + .dump(); + if (strideVal == + mlir::dyn_cast(eraseList.back()) + .getOffsets()[0]) + ++newUsedNum; + } + } else if (auto storeOpUser = + mlir::dyn_cast_or_null(*user)) { + if (auto strideVal = storeOpUser.getIndices()[0]) { + strideVal.dump(); + mlir::dyn_cast(eraseList.back()) + .getOffsets()[0] + .dump(); + if (strideVal == + mlir::dyn_cast(eraseList.back()) + .getOffsets()[0]) + ++newUsedNum; + } + } } if (oldUsedNum == newUsedNum) { - for (auto op : eraseList) + for (auto op : eraseList) { + op->dump(); rewriter.eraseOp(op); + } } } @@ -237,7 +269,7 @@ class CIRLoadOpLowering : public mlir::OpConversionPattern { rewriter)) { newLoad = rewriter.create( op.getLoc(), base, indices, op.getIsNontemporal()); - // rewriter.replaceOpWithNewOp(op, base, indices); + newLoad->dump(); eraseIfSafe(op.getAddr(), base, eraseList, rewriter); } else newLoad = rewriter.create( @@ -756,6 +788,8 @@ class CIRScopeOpLowering : public mlir::OpConversionPattern { return mlir::success(); } + // TODO: evaluate if a different mlir core dialect op is better suited for + // this for (auto &block : scopeOp.getScopeRegion()) { rewriter.setInsertionPointToEnd(&block); auto *terminator = block.getTerminator(); @@ -1451,8 +1485,8 @@ mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule, auto result = !mlir::failed(pm.run(theModule)); if (!result) { - //just for debugging purposes - //TODO: remove before creating a PR + // just for debugging purposes + // TODO: remove before creating a PR theModule->dump(); report_fatal_error( "The pass manager failed to lower CIR to MLIR standard dialects!"); diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp index 2a6137a0cdc3..71a7669ee5b3 100644 --- a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp +++ b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp @@ -14,9 +14,7 @@ void constantLoopBound() { // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 { // CHECK: %[[C3:.*]] = arith.constant 3 : i32 // CHECK: %[[BASE:.*]] = memref.get_global @a : memref<101xi32> -// CHECK: %[[C0_i32:.*]] = arith.constant 0 : i32 -// CHECK: %[[IV:.*]] = arith.addi %[[I]], %[[C0_i32]] : i32 -// CHECK: %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index +// CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } @@ -33,9 +31,7 @@ void constantLoopBound_LE() { // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C101]] step %[[C1_STEP]] : i32 { // CHECK: %[[C3:.*]] = arith.constant 3 : i32 // CHECK: %[[BASE:.*]] = memref.get_global @a : memref<101xi32> -// CHECK: %[[C0_i32:.*]] = arith.constant 0 : i32 -// CHECK: %[[IV:.*]] = arith.addi %[[I]], %[[C0_i32]] : i32 -// CHECK: %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index +// CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } @@ -52,9 +48,7 @@ void variableLoopBound(int l, int u) { // CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C1]] : i32 { // CHECK: %[[C3:.*]] = arith.constant 3 : i32 // CHECK: %[[BASE:.*]] = memref.get_global @a : memref<101xi32> -// CHECK: %[[C0:.*]] = arith.constant 0 : i32 -// CHECK: %[[IV:.*]] = arith.addi %[[I]], %[[C0]] : i32 -// CHECK: %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index +// CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } @@ -73,9 +67,7 @@ void ariableLoopBound_LE(int l, int u) { // CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C4]] : i32 { // CHECK: %[[C3:.*]] = arith.constant 3 : i32 // CHECK: %[[BASE:.*]] = memref.get_global @a : memref<101xi32> -// CHECK: %[[C0:.*]] = arith.constant 0 : i32 -// CHECK: %[[IV:.*]] = arith.addi %[[I]], %[[C0]] : i32 -// CHECK: %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index +// CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } @@ -89,14 +81,10 @@ void incArray() { // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 { // CHECK: %[[B:.*]] = memref.get_global @b : memref<101xi32> -// CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -// CHECK: %[[IV2:.*]] = arith.addi %[[I]], %[[C0_2]] : i32 -// CHECK: %[[INDEX_2:.*]] = arith.index_cast %[[IV2]] : i32 to index +// CHECK: %[[INDEX_2:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: %[[B_VALUE:.*]] = memref.load %[[B]][%[[INDEX_2]]] : memref<101xi32> // CHECK: %[[A:.*]] = memref.get_global @a : memref<101xi32> -// CHECK: %[[C0_1:.*]] = arith.constant 0 : i32 -// CHECK: %[[IV1:.*]] = arith.addi %[[I]], %[[C0_1]] : i32 -// CHECK: %[[INDEX_1:.*]] = arith.index_cast %[[IV1]] : i32 to index +// CHECK: %[[INDEX_1:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: %[[A_VALUE:.*]] = memref.load %[[A]][%[[INDEX_1]]] : memref<101xi32> // CHECK: %[[SUM:.*]] = arith.addi %[[A_VALUE]], %[[B_VALUE]] : i32 // CHECK: memref.store %[[SUM]], %[[A]][%[[INDEX_1]]] : memref<101xi32> From 78a81f882ee7a43ca6bfca7074a3bde6c959eca4 Mon Sep 17 00:00:00 2001 From: Felix Daas Date: Mon, 2 Jun 2025 12:15:57 +0200 Subject: [PATCH 4/5] improvement to canonical forOp lowering + added/improved new array/for tests --- .../ThroughMLIR/LowerCIRLoopToSCF.cpp | 24 ++++-- .../Lowering/ThroughMLIR/LowerCIRToMLIR.cpp | 41 ++++------ clang/test/CIR/Lowering/ThroughMLIR/array.c | 29 ++++++++ .../CIR/Lowering/ThroughMLIR/for-reject-1.cpp | 24 ------ .../CIR/Lowering/ThroughMLIR/for-reject-2.cpp | 25 ------- .../CIR/Lowering/ThroughMLIR/for-reject.cpp | 74 +++++++++++++++++++ clang/test/CIR/Lowering/ThroughMLIR/for.cpp | 24 +++++- 7 files changed, 155 insertions(+), 86 deletions(-) delete mode 100644 clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp delete mode 100644 clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp create mode 100644 clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp index 06a2b1c120ee..16b67ddef33b 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp @@ -14,15 +14,18 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Location.h" #include "mlir/IR/ValueRange.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Support/LLVM.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/LowerToMLIR.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/Module.h" using namespace cir; using namespace llvm; @@ -252,6 +255,14 @@ void SCFLoop::analysis() { if (!canonical) return; + // If the IV is defined before the forOp (i.e. outside the surrounding + // cir.scope) this is not a canonical loop as the IV would not have the + // correct value after the forOp + if (ivAddr.getDefiningOp()->getBlock() != forOp->getBlock()) { + canonical = false; + return; + } + cmpOp = findCmpOp(); if (!cmpOp) { canonical = false; @@ -310,17 +321,14 @@ void SCFLoop::transferToSCFForOp() { } return mlir::WalkResult::advance(); }); - // If the IV was declared in the for op all uses have been replaced by the - // scf.IV and we can remove the alloca + initial store + + // All uses have been replaced by the scf.IV and we can remove the alloca + initial store operations // The operations before the loop have been transferred to MLIR. - // So we need to go through getRemappedValue to find the value. + // So we need to go through getRemappedValue to find the operations. auto remapAddr = rewriter->getRemappedValue(ivAddr); - // If IV has more uses than the use in the initial store op keep it - if (!remapAddr || !remapAddr.hasOneUse()) - return; - - // otherwise remove the alloca + initial store op + + // Since this is a canonical loop we can remove the alloca + initial store op rewriter->eraseOp(remapAddr.getDefiningOp()); rewriter->eraseOp(*remapAddr.user_begin()); } diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp index 3c0f01b8e847..1bd6ac0a9e71 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp @@ -207,26 +207,23 @@ static bool findBaseAndIndices(mlir::Value addr, mlir::Value &base, return true; } -// For memref.reinterpret_cast has multiple users, erasing the operation -// after the last load or store been generated. +// If the memref.reinterpret_cast has multiple users (i.e the original +// cir.ptr_stride op has multiple users), only erase the operation after the +// last load or store has been generated. static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr, SmallVector &eraseList, mlir::ConversionPatternRewriter &rewriter) { - newAddr.getDefiningOp()->getParentOfType()->dump(); - oldAddr.dump(); - newAddr.dump(); unsigned oldUsedNum = std::distance(oldAddr.getUses().begin(), oldAddr.getUses().end()); unsigned newUsedNum = 0; + // Count the uses of the newAddr (the result of the original base alloca) in + // load/store ops using an forwarded offset from the current + // memref.reinterpret_cast op for (auto *user : newAddr.getUsers()) { - user->dump(); if (auto loadOpUser = mlir::dyn_cast_or_null(*user)) { - if (auto strideVal = loadOpUser.getIndices()[0]) { - strideVal.dump(); - mlir::dyn_cast(eraseList.back()) - .getOffsets()[0] - .dump(); + if (!loadOpUser.getIndices().empty()) { + auto strideVal = loadOpUser.getIndices()[0]; if (strideVal == mlir::dyn_cast(eraseList.back()) .getOffsets()[0]) @@ -234,11 +231,8 @@ static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr, } } else if (auto storeOpUser = mlir::dyn_cast_or_null(*user)) { - if (auto strideVal = storeOpUser.getIndices()[0]) { - strideVal.dump(); - mlir::dyn_cast(eraseList.back()) - .getOffsets()[0] - .dump(); + if (!storeOpUser.getIndices().empty()) { + auto strideVal = storeOpUser.getIndices()[0]; if (strideVal == mlir::dyn_cast(eraseList.back()) .getOffsets()[0]) @@ -246,11 +240,11 @@ static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr, } } } + // If all load/store ops using forwarded offsets from the current + // memref.reinterpret_cast ops erase the memref.reinterpret_cast ops if (oldUsedNum == newUsedNum) { - for (auto op : eraseList) { - op->dump(); + for (auto op : eraseList) rewriter.eraseOp(op); - } } } @@ -269,7 +263,6 @@ class CIRLoadOpLowering : public mlir::OpConversionPattern { rewriter)) { newLoad = rewriter.create( op.getLoc(), base, indices, op.getIsNontemporal()); - newLoad->dump(); eraseIfSafe(op.getAddr(), base, eraseList, rewriter); } else newLoad = rewriter.create( @@ -788,8 +781,6 @@ class CIRScopeOpLowering : public mlir::OpConversionPattern { return mlir::success(); } - // TODO: evaluate if a different mlir core dialect op is better suited for - // this for (auto &block : scopeOp.getScopeRegion()) { rewriter.setInsertionPointToEnd(&block); auto *terminator = block.getTerminator(); @@ -1484,13 +1475,9 @@ mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule, pm.addPass(createConvertCIRToMLIRPass()); auto result = !mlir::failed(pm.run(theModule)); - if (!result) { - // just for debugging purposes - // TODO: remove before creating a PR - theModule->dump(); + if (!result) report_fatal_error( "The pass manager failed to lower CIR to MLIR standard dialects!"); - } // Now that we ran all the lowering passes, verify the final output. if (theModule.verify().failed()) report_fatal_error( diff --git a/clang/test/CIR/Lowering/ThroughMLIR/array.c b/clang/test/CIR/Lowering/ThroughMLIR/array.c index 0504f4a61694..b02b07c9cfd8 100644 --- a/clang/test/CIR/Lowering/ThroughMLIR/array.c +++ b/clang/test/CIR/Lowering/ThroughMLIR/array.c @@ -29,3 +29,32 @@ int test_array2() { int a[3][4]; return a[1][2]; } + +int test_array3() { + // CIR-LABEL: cir.func {{.*}} @test_array3() + // CIR: %[[ARRAY:.*]] = cir.alloca !cir.array, !cir.ptr>, ["a"] {alignment = 4 : i64} + // CIR: %[[PTRDECAY1:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr>), !cir.ptr + // CIR: %[[PTRSTRIDE1:.*]] = cir.ptr_stride(%[[PTRDECAY1]] : !cir.ptr, {{.*}} : !s32i), !cir.ptr + // CIR: {{.*}} = cir.load align(4) %[[PTRSTRIDE1]] : !cir.ptr, !s32i + // CIR: %[[PTRDECAY2:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr>), !cir.ptr + // CIR: %[[PTRSTRIDE2:.*]] = cir.ptr_stride(%[[PTRDECAY2]] : !cir.ptr, {{.*}} : !s32i), !cir.ptr + // CIR: %{{.*}} = cir.load align(4) %[[PTRSTRIDE2]] : !cir.ptr, !s32i + // CIR: cir.store align(4) {{.*}}, %[[PTRSTRIDE2]] : !s32i, !cir.ptr + // CIR: %[[PTRDECAY3:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr>), !cir.ptr + // CIR: %[[PTRSTRIDE3:.*]] = cir.ptr_stride(%[[PTRDECAY3]] : !cir.ptr, {{.*}} : !s32i), !cir.ptr + // CIR: %{{.*}} = cir.load align(4) %[[PTRSTRIDE3]] : !cir.ptr, !s32i + + // MLIR-LABEL: func @test_array3 + // MLIR: %{{.*}} = memref.alloca() {alignment = 4 : i64} : memref + // MLIR: %[[ARRAY:.*]] = memref.alloca() {alignment = 4 : i64} : memref<3xi32> + // MLIR: %[[IDX1:.*]] = arith.index_cast %{{.*}} : i32 to index + // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX1]]] : memref<3xi32> + // MLIR: %[[IDX2:.*]] = arith.index_cast %{{.*}} : i32 to index + // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX2]]] : memref<3xi32> + // MLIR: memref.store %{{.*}}, %[[ARRAY]][%[[IDX2]]] : memref<3xi32> + // MLIR: %[[IDX3:.*]] = arith.index_cast %{{.*}} : i32 to index + // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX3]]] : memref<3xi32> + int a[3]; + a[0] += a[2]; + return a[1]; +} diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp deleted file mode 100644 index 60267bfbb953..000000000000 --- a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir -// RUN: FileCheck --input-file=%t.mlir %s - -void f() {} - -void reject() { - for (int i = 0; i < 100; i++, f()); - // CHECK: %[[ALLOCA:.+]] = memref.alloca - // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: memref.store %[[ZERO]], %[[ALLOCA]] - // CHECK: %[[HUNDRED:.+]] = arith.constant 100 - // CHECK: scf.while : () -> () { - // CHECK: %[[TMP:.+]] = memref.load %[[ALLOCA]] - // CHECK: %[[TMP1:.+]] = arith.cmpi slt, %0, %[[HUNDRED]] - // CHECK: scf.condition(%[[TMP1]]) - // CHECK: } do { - // CHECK: %[[TMP2:.+]] = memref.load %[[ALLOCA]] - // CHECK: %[[ONE:.+]] = arith.constant 1 - // CHECK: %[[TMP3:.+]] = arith.addi %[[TMP2]], %[[ONE]] - // CHECK: memref.store %[[TMP3]], %[[ALLOCA]] - // CHECK: func.call @_Z1fv() - // CHECK: scf.yield - // CHECK: } -} diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp deleted file mode 100644 index c58d0675ccc6..000000000000 --- a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir -// RUN: FileCheck --input-file=%t.mlir %s - -void reject() { - for (int i = 0; i < 100; i++, i++); - // CHECK: %[[ALLOCA:.+]] = memref.alloca - // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: memref.store %[[ZERO]], %[[ALLOCA]] - // CHECK: %[[HUNDRED:.+]] = arith.constant 100 - // CHECK: scf.while : () -> () { - // CHECK: %[[TMP:.+]] = memref.load %[[ALLOCA]] - // CHECK: %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]] - // CHECK: scf.condition(%[[TMP2]]) - // CHECK: } do { - // CHECK: %[[TMP3:.+]] = memref.load %[[ALLOCA]] - // CHECK: %[[ONE:.+]] = arith.constant 1 - // CHECK: %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]] - // CHECK: memref.store %[[ADD]], %[[ALLOCA]] - // CHECK: %[[LOAD:.+]] = memref.load %[[ALLOCA]] - // CHECK: %[[ONE2:.+]] = arith.constant 1 - // CHECK: %[[ADD2:.+]] = arith.addi %[[LOAD]], %[[ONE2]] - // CHECK: memref.store %[[ADD2]], %[[ALLOCA]] - // CHECK: scf.yield - // CHECK: } -} diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp new file mode 100644 index 000000000000..a0c80d9f8a16 --- /dev/null +++ b/clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir +// RUN: FileCheck --input-file=%t.mlir %s + +void f() {} + +void reject_test1() { + for (int i = 0; i < 100; i++, f()); + // CHECK: %[[ALLOCA:.+]] = memref.alloca + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: memref.store %[[ZERO]], %[[ALLOCA]] + // CHECK: %[[HUNDRED:.+]] = arith.constant 100 + // CHECK: scf.while : () -> () { + // CHECK: %[[TMP:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[TMP1:.+]] = arith.cmpi slt, %0, %[[HUNDRED]] + // CHECK: scf.condition(%[[TMP1]]) + // CHECK: } do { + // CHECK: %[[TMP2:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[ONE:.+]] = arith.constant 1 + // CHECK: %[[TMP3:.+]] = arith.addi %[[TMP2]], %[[ONE]] + // CHECK: memref.store %[[TMP3]], %[[ALLOCA]] + // CHECK: func.call @_Z1fv() + // CHECK: scf.yield + // CHECK: } +} + +void reject_test2() { + for (int i = 0; i < 100; i++, i++); + // CHECK: %[[ALLOCA:.+]] = memref.alloca + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: memref.store %[[ZERO]], %[[ALLOCA]] + // CHECK: %[[HUNDRED:.+]] = arith.constant 100 + // CHECK: scf.while : () -> () { + // CHECK: %[[TMP:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]] + // CHECK: scf.condition(%[[TMP2]]) + // CHECK: } do { + // CHECK: %[[TMP3:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[ONE:.+]] = arith.constant 1 + // CHECK: %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]] + // CHECK: memref.store %[[ADD]], %[[ALLOCA]] + // CHECK: %[[LOAD:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[ONE2:.+]] = arith.constant 1 + // CHECK: %[[ADD2:.+]] = arith.addi %[[LOAD]], %[[ONE2]] + // CHECK: memref.store %[[ADD2]], %[[ALLOCA]] + // CHECK: scf.yield + // CHECK: } +} + +void reject_test3() { + int i; + for (i = 0; i < 100; i++); + i += 10; + // CHECK: %[[ALLOCA:.+]] = memref.alloca() + // CHECK: memref.alloca_scope { + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: memref.store %[[ZERO]], %[[ALLOCA]] + // CHECK: %[[HUNDRED:.+]] = arith.constant 100 + // CHECK: scf.while : () -> () { + // CHECK: %[[TMP:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]] + // CHECK: scf.condition(%[[TMP2]]) + // CHECK: } do { + // CHECK: %[[TMP3:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[ONE:.+]] = arith.constant 1 + // CHECK: %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]] + // CHECK: memref.store %[[ADD]], %[[ALLOCA]] + // CHECK: scf.yield + // CHECK: } + // CHECK: } + // CHECK: %[[TEN:.+]] = arith.constant 10 + // CHECK: %[[TMP4:.+]] = memref.load %[[ALLOCA]] + // CHECK: %[[TMP5:.+]] = arith.addi %[[TMP4]], %[[TEN]] + // CHECK: memref.store %[[TMP5]], %[[ALLOCA]] +} diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp index 71a7669ee5b3..60922d18fbca 100644 --- a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp +++ b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp @@ -8,7 +8,10 @@ void constantLoopBound() { a[i] = 3; } // CHECK-LABEL: func.func @_Z17constantLoopBoundv() { +// CHECK: memref.alloca_scope { +// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref // CHECK: %[[C0:.*]] = arith.constant 0 : i32 +// CHECK-NOT: memref.store %[[C0]], {{.*}}[] : memref // CHECK: %[[C100:.*]] = arith.constant 100 : i32 // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 { @@ -17,13 +20,17 @@ void constantLoopBound() { // CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } +// CHECK: } void constantLoopBound_LE() { for (int i = 0; i <= 100; ++i) a[i] = 3; } // CHECK-LABEL: func.func @_Z20constantLoopBound_LEv() { +// CHECK: memref.alloca_scope { +// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref // CHECK: %[[C0:.*]] = arith.constant 0 : i32 +// CHECK-NOT: memref.store %[[C0]], {{.*}}[] : memref // CHECK: %[[C100:.*]] = arith.constant 100 : i32 // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: %[[C101:.*]] = arith.addi %c100_i32, %c1_i32 : i32 @@ -34,6 +41,7 @@ void constantLoopBound_LE() { // CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } +// CHECK: } void variableLoopBound(int l, int u) { for (int i = l; i < u; ++i) @@ -42,7 +50,10 @@ void variableLoopBound(int l, int u) { // CHECK-LABEL: func.func @_Z17variableLoopBoundii // CHECK: memref.store %arg0, %alloca[] : memref // CHECK: memref.store %arg1, %alloca_0[] : memref +// CHECK: memref.alloca_scope { +// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref // CHECK: %[[LOWER:.*]] = memref.load %alloca[] : memref +// CHECK-NOT: memref.store %[[LOWER]], {{.*}}[] : memref // CHECK: %[[UPPER:.*]] = memref.load %alloca_0[] : memref // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C1]] : i32 { @@ -51,15 +62,19 @@ void variableLoopBound(int l, int u) { // CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } +// CHECK: } -void ariableLoopBound_LE(int l, int u) { +void variableLoopBound_LE(int l, int u) { for (int i = l; i <= u; i+=4) a[i] = 3; } -// CHECK-LABEL: func.func @_Z19ariableLoopBound_LEii +// CHECK-LABEL: func.func @_Z20variableLoopBound_LEii // CHECK: memref.store %arg0, %alloca[] : memref // CHECK: memref.store %arg1, %alloca_0[] : memref +// CHECK: memref.alloca_scope { +// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref // CHECK: %[[LOWER:.*]] = memref.load %alloca[] : memref +// CHECK-NOT: memref.store %[[LOWER]], {{.*}}[] : memref // CHECK: %[[UPPER_DEC_1:.*]] = memref.load %alloca_0[] : memref // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: %[[UPPER:.*]] = arith.addi %[[UPPER_DEC_1]], %[[C1]] : i32 @@ -70,13 +85,17 @@ void ariableLoopBound_LE(int l, int u) { // CHECK: %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index // CHECK: memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32> // CHECK: } +// CHECK: } void incArray() { for (int i = 0; i < 100; ++i) a[i] += b[i]; } // CHECK-LABEL: func.func @_Z8incArrayv() { +// CHECK: memref.alloca_scope { +// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref // CHECK: %[[C0:.*]] = arith.constant 0 : i32 +// CHECK-NOT: memref.store %[[C0]], {{.*}}[] : memref // CHECK: %[[C100:.*]] = arith.constant 100 : i32 // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 { @@ -89,3 +108,4 @@ void incArray() { // CHECK: %[[SUM:.*]] = arith.addi %[[A_VALUE]], %[[B_VALUE]] : i32 // CHECK: memref.store %[[SUM]], %[[A]][%[[INDEX_1]]] : memref<101xi32> // CHECK: } +// CHECK: } From 396326513d22fd52888234a4d310b797508ce080 Mon Sep 17 00:00:00 2001 From: Felix Daas Date: Tue, 3 Jun 2025 15:25:02 +0200 Subject: [PATCH 5/5] run SCFPreparePass cir pass always when lowering throughMLIR --- clang/include/clang/CIR/CIRToCIRPasses.h | 2 +- clang/lib/CIR/CodeGen/CIRPasses.cpp | 4 ++-- clang/lib/CIR/FrontendAction/CIRGenAction.cpp | 8 +++----- clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp | 5 +++-- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/CIR/CIRToCIRPasses.h b/clang/include/clang/CIR/CIRToCIRPasses.h index 4ad4aeebb22e..5f6e25c8c148 100644 --- a/clang/include/clang/CIR/CIRToCIRPasses.h +++ b/clang/include/clang/CIR/CIRToCIRPasses.h @@ -34,7 +34,7 @@ mlir::LogicalResult runCIRToCIRPasses( llvm::StringRef lifetimeOpts, bool enableIdiomRecognizer, llvm::StringRef idiomRecognizerOpts, bool enableLibOpt, llvm::StringRef libOptOpts, std::string &passOptParsingFailure, - bool enableCIRSimplify, bool flattenCIR, bool emitMLIR, + bool enableCIRSimplify, bool flattenCIR, bool throughMLIR, bool enableCallConvLowering, bool enableMem2reg); } // namespace cir diff --git a/clang/lib/CIR/CodeGen/CIRPasses.cpp b/clang/lib/CIR/CodeGen/CIRPasses.cpp index 59d2445cb16e..9fc51e29bd64 100644 --- a/clang/lib/CIR/CodeGen/CIRPasses.cpp +++ b/clang/lib/CIR/CodeGen/CIRPasses.cpp @@ -28,7 +28,7 @@ mlir::LogicalResult runCIRToCIRPasses( llvm::StringRef lifetimeOpts, bool enableIdiomRecognizer, llvm::StringRef idiomRecognizerOpts, bool enableLibOpt, llvm::StringRef libOptOpts, std::string &passOptParsingFailure, - bool enableCIRSimplify, bool flattenCIR, bool emitCore, + bool enableCIRSimplify, bool flattenCIR, bool throughMLIR, bool enableCallConvLowering, bool enableMem2Reg) { llvm::TimeTraceScope scope("CIR To CIR Passes"); @@ -81,7 +81,7 @@ mlir::LogicalResult runCIRToCIRPasses( if (enableMem2Reg) pm.addPass(mlir::createMem2Reg()); - if (emitCore) + if (throughMLIR) pm.addPass(mlir::createSCFPreparePass()); // FIXME: once CIRCodenAction fixes emission other than CIR we diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp index 970afd03a471..b6a31032de4b 100644 --- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp +++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp @@ -210,9 +210,6 @@ class CIRGenConsumer : public clang::ASTConsumer { action == CIRGenAction::OutputType::EmitMLIR && feOptions.MLIRTargetDialect == clang::frontend::MLIR_CIR_FLAT; - bool emitCore = action == CIRGenAction::OutputType::EmitMLIR && - feOptions.MLIRTargetDialect == clang::frontend::MLIR_CORE; - // Setup and run CIR pipeline. std::string passOptParsingFailure; if (runCIRToCIRPasses( @@ -220,8 +217,9 @@ class CIRGenConsumer : public clang::ASTConsumer { feOptions.ClangIRLifetimeCheck, lifetimeOpts, feOptions.ClangIRIdiomRecognizer, idiomRecognizerOpts, feOptions.ClangIRLibOpt, libOptOpts, passOptParsingFailure, - codeGenOptions.OptimizationLevel > 0, flattenCIR, emitCore, - enableCCLowering, feOptions.ClangIREnableMem2Reg) + codeGenOptions.OptimizationLevel > 0, flattenCIR, + !feOptions.ClangIRDirectLowering, enableCCLowering, + feOptions.ClangIREnableMem2Reg) .failed()) { if (!passOptParsingFailure.empty()) diagnosticsEngine.Report(diag::err_drv_cir_pass_opt_parsing) diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp index 16b67ddef33b..ea1d1ddbc37b 100644 --- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp +++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp @@ -321,8 +321,9 @@ void SCFLoop::transferToSCFForOp() { } return mlir::WalkResult::advance(); }); - - // All uses have been replaced by the scf.IV and we can remove the alloca + initial store operations + + // All uses have been replaced by the scf.IV and we can remove the alloca + + // initial store operations // The operations before the loop have been transferred to MLIR. // So we need to go through getRemappedValue to find the operations.