From 83b9de8419d52c3578a36babcb89f7ce0a848aa9 Mon Sep 17 00:00:00 2001
From: Felix Daas <f1298@t-online.de>
Date: Mon, 26 May 2025 16:08:47 +0200
Subject: [PATCH 1/5] pass nonTemporal Attribute in cir.store to memref.store
 lowering

---
 clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
index 2428a76b4157..27af5db6b1f9 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
@@ -265,12 +265,12 @@ class CIRStoreOpLowering : public mlir::OpConversionPattern<cir::StoreOp> {
     mlir::Value value = emitToMemory(rewriter, op, adaptor.getValue());
     if (findBaseAndIndices(adaptor.getAddr(), base, indices, eraseList,
                            rewriter)) {
-      rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(op, value, base,
-                                                         indices);
+      rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(
+          op, value, base, indices, op.getIsNontemporal());
       eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
     } else
-      rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(op, value,
-                                                         adaptor.getAddr());
+      rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(
+          op, value, adaptor.getAddr(), mlir::ValueRange{}, op.getIsNontemporal());
     return mlir::LogicalResult::success();
   }
 };

From 975893e291c796097ce5bce09216c24212f7794f Mon Sep 17 00:00:00 2001
From: Felix Daas <f1298@t-online.de>
Date: Wed, 28 May 2025 11:24:43 +0200
Subject: [PATCH 2/5] pass nonTemporal Attribute in cir.load to memref.load
 lowering

---
 .../Lowering/ThroughMLIR/LowerCIRToMLIR.cpp   | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
index 27af5db6b1f9..0ec37d0da46c 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
@@ -235,13 +235,14 @@ class CIRLoadOpLowering : public mlir::OpConversionPattern<cir::LoadOp> {
     mlir::memref::LoadOp newLoad;
     if (findBaseAndIndices(adaptor.getAddr(), base, indices, eraseList,
                            rewriter)) {
-      newLoad =
-          rewriter.create<mlir::memref::LoadOp>(op.getLoc(), base, indices);
+      newLoad = rewriter.create<mlir::memref::LoadOp>(
+          op.getLoc(), base, indices, op.getIsNontemporal());
       // rewriter.replaceOpWithNewOp<mlir::memref::LoadOp>(op, base, indices);
       eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
     } else
-      newLoad =
-          rewriter.create<mlir::memref::LoadOp>(op.getLoc(), adaptor.getAddr());
+      newLoad = rewriter.create<mlir::memref::LoadOp>(
+          op.getLoc(), adaptor.getAddr(), mlir::ValueRange{},
+          op.getIsNontemporal());
 
     // Convert adapted result to its original type if needed.
     mlir::Value result = emitFromMemory(rewriter, op, newLoad.getResult());
@@ -270,7 +271,8 @@ class CIRStoreOpLowering : public mlir::OpConversionPattern<cir::StoreOp> {
       eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
     } else
       rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(
-          op, value, adaptor.getAddr(), mlir::ValueRange{}, op.getIsNontemporal());
+          op, value, adaptor.getAddr(), mlir::ValueRange{},
+          op.getIsNontemporal());
     return mlir::LogicalResult::success();
   }
 };
@@ -1448,10 +1450,13 @@ mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule,
   pm.addPass(createConvertCIRToMLIRPass());
 
   auto result = !mlir::failed(pm.run(theModule));
-  if (!result)
+  if (!result) {
+    //just for debugging purposes
+    //TODO: remove before creating a PR
+    theModule->dump();
     report_fatal_error(
         "The pass manager failed to lower CIR to MLIR standard dialects!");
-
+  }
   // Now that we ran all the lowering passes, verify the final output.
   if (theModule.verify().failed())
     report_fatal_error(

From a81b1e9054f3afe09fd4f5d748399aea254764ab Mon Sep 17 00:00:00 2001
From: Felix Daas <f1298@t-online.de>
Date: Fri, 30 May 2025 12:26:22 +0200
Subject: [PATCH 3/5] first version of fix for eraseifsafe + small improvements
 to canonical forOp lowering (TODO: cleanup + write tests)

---
 .../ThroughMLIR/LowerCIRLoopToSCF.cpp         | 24 +++++++---
 .../Lowering/ThroughMLIR/LowerCIRToMLIR.cpp   | 46 ++++++++++++++++---
 clang/test/CIR/Lowering/ThroughMLIR/for.cpp   | 24 +++-------
 3 files changed, 63 insertions(+), 31 deletions(-)

diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
index 148038877649..06a2b1c120ee 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
@@ -303,16 +303,26 @@ void SCFLoop::transferToSCFForOp() {
           "Not support lowering loop with break, continue or if yet");
     // Replace the IV usage to scf loop induction variable.
     if (isIVLoad(op, ivAddr)) {
-      // Replace CIR IV load with arith.addi scf.IV, 0.
-      // The replacement makes the SCF IV can be automatically propogated
-      // by OpAdaptor for individual IV user lowering.
-      // The redundant arith.addi can be removed by later MLIR passes.
-      rewriter->setInsertionPoint(op);
-      auto newIV = plusConstant(scfForOp.getInductionVar(), loc, 0);
-      rewriter->replaceOp(op, newIV.getDefiningOp());
+      // Replace CIR IV load with scf.IV
+      // (i.e. remove the load op and replace the uses of the result of the CIR
+      // IV load with the scf.IV)
+      rewriter->replaceOp(op, scfForOp.getInductionVar());
     }
     return mlir::WalkResult::advance();
   });
+  // If the IV was declared in the for op all uses have been replaced by the
+  // scf.IV and we can remove the alloca + initial store
+
+  // The operations before the loop have been transferred to MLIR.
+  // So we need to go through getRemappedValue to find the value.
+  auto remapAddr = rewriter->getRemappedValue(ivAddr);
+  // If IV has more uses than the use in the initial store op keep it
+  if (!remapAddr || !remapAddr.hasOneUse())
+    return;
+  
+  // otherwise remove the alloca + initial store op
+  rewriter->eraseOp(remapAddr.getDefiningOp());
+  rewriter->eraseOp(*remapAddr.user_begin());
 }
 
 void SCFLoop::transformToSCFWhileOp() {
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
index 0ec37d0da46c..3c0f01b8e847 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
@@ -28,6 +28,7 @@
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/Region.h"
@@ -36,6 +37,7 @@
 #include "mlir/IR/ValueRange.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
@@ -51,6 +53,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/TimeProfiler.h"
 
 using namespace cir;
@@ -209,16 +212,45 @@ static bool findBaseAndIndices(mlir::Value addr, mlir::Value &base,
 static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr,
                         SmallVector<mlir::Operation *> &eraseList,
                         mlir::ConversionPatternRewriter &rewriter) {
+  newAddr.getDefiningOp()->getParentOfType<mlir::ModuleOp>()->dump();
+  oldAddr.dump();
+  newAddr.dump();
+
   unsigned oldUsedNum =
       std::distance(oldAddr.getUses().begin(), oldAddr.getUses().end());
   unsigned newUsedNum = 0;
   for (auto *user : newAddr.getUsers()) {
-    if (isa<mlir::memref::LoadOp>(*user) || isa<mlir::memref::StoreOp>(*user))
-      ++newUsedNum;
+    user->dump();
+    if (auto loadOpUser = mlir::dyn_cast_or_null<mlir::memref::LoadOp>(*user)) {
+      if (auto strideVal = loadOpUser.getIndices()[0]) {
+        strideVal.dump();
+        mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
+            .getOffsets()[0]
+            .dump();
+        if (strideVal ==
+            mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
+                .getOffsets()[0])
+          ++newUsedNum;
+      }
+    } else if (auto storeOpUser =
+                   mlir::dyn_cast_or_null<mlir::memref::StoreOp>(*user)) {
+      if (auto strideVal = storeOpUser.getIndices()[0]) {
+        strideVal.dump();
+        mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
+            .getOffsets()[0]
+            .dump();
+        if (strideVal ==
+            mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
+                .getOffsets()[0])
+          ++newUsedNum;
+      }
+    }
   }
   if (oldUsedNum == newUsedNum) {
-    for (auto op : eraseList)
+    for (auto op : eraseList) {
+      op->dump();
       rewriter.eraseOp(op);
+    }
   }
 }
 
@@ -237,7 +269,7 @@ class CIRLoadOpLowering : public mlir::OpConversionPattern<cir::LoadOp> {
                            rewriter)) {
       newLoad = rewriter.create<mlir::memref::LoadOp>(
           op.getLoc(), base, indices, op.getIsNontemporal());
-      // rewriter.replaceOpWithNewOp<mlir::memref::LoadOp>(op, base, indices);
+      newLoad->dump();
       eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
     } else
       newLoad = rewriter.create<mlir::memref::LoadOp>(
@@ -756,6 +788,8 @@ class CIRScopeOpLowering : public mlir::OpConversionPattern<cir::ScopeOp> {
       return mlir::success();
     }
 
+    // TODO: evaluate if a different mlir core dialect op is better suited for
+    // this
     for (auto &block : scopeOp.getScopeRegion()) {
       rewriter.setInsertionPointToEnd(&block);
       auto *terminator = block.getTerminator();
@@ -1451,8 +1485,8 @@ mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule,
 
   auto result = !mlir::failed(pm.run(theModule));
   if (!result) {
-    //just for debugging purposes
-    //TODO: remove before creating a PR
+    // just for debugging purposes
+    // TODO: remove before creating a PR
     theModule->dump();
     report_fatal_error(
         "The pass manager failed to lower CIR to MLIR standard dialects!");
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
index 2a6137a0cdc3..71a7669ee5b3 100644
--- a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
+++ b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
@@ -14,9 +14,7 @@ void constantLoopBound() {
 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
 // CHECK:   %[[C3:.*]] = arith.constant 3 : i32
 // CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
-// CHECK:   %[[C0_i32:.*]] = arith.constant 0 : i32
-// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0_i32]] : i32
-// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
 
@@ -33,9 +31,7 @@ void constantLoopBound_LE() {
 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C101]] step %[[C1_STEP]] : i32 {
 // CHECK:   %[[C3:.*]] = arith.constant 3 : i32
 // CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
-// CHECK:   %[[C0_i32:.*]] = arith.constant 0 : i32
-// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0_i32]] : i32
-// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
 
@@ -52,9 +48,7 @@ void variableLoopBound(int l, int u) {
 // CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C1]] : i32 {
 // CHECK:   %[[C3:.*]] = arith.constant 3 : i32
 // CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
-// CHECK:   %[[C0:.*]] = arith.constant 0 : i32
-// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0]] : i32
-// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
 
@@ -73,9 +67,7 @@ void ariableLoopBound_LE(int l, int u) {
 // CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C4]] : i32 {
 // CHECK:   %[[C3:.*]] = arith.constant 3 : i32
 // CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
-// CHECK:   %[[C0:.*]] = arith.constant 0 : i32
-// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0]] : i32
-// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
 
@@ -89,14 +81,10 @@ void incArray() {
 // CHECK: %[[C1:.*]] = arith.constant 1 : i32
 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
 // CHECK:   %[[B:.*]] = memref.get_global @b : memref<101xi32>
-// CHECK:   %[[C0_2:.*]] = arith.constant 0 : i32
-// CHECK:   %[[IV2:.*]] = arith.addi %[[I]], %[[C0_2]] : i32
-// CHECK:   %[[INDEX_2:.*]] = arith.index_cast %[[IV2]] : i32 to index
+// CHECK:   %[[INDEX_2:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   %[[B_VALUE:.*]] = memref.load %[[B]][%[[INDEX_2]]] : memref<101xi32>
 // CHECK:   %[[A:.*]] = memref.get_global @a : memref<101xi32>
-// CHECK:   %[[C0_1:.*]] = arith.constant 0 : i32
-// CHECK:   %[[IV1:.*]] = arith.addi %[[I]], %[[C0_1]] : i32
-// CHECK:   %[[INDEX_1:.*]] = arith.index_cast %[[IV1]] : i32 to index
+// CHECK:   %[[INDEX_1:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   %[[A_VALUE:.*]] = memref.load %[[A]][%[[INDEX_1]]] : memref<101xi32>
 // CHECK:   %[[SUM:.*]] = arith.addi %[[A_VALUE]], %[[B_VALUE]] : i32
 // CHECK:   memref.store %[[SUM]], %[[A]][%[[INDEX_1]]] : memref<101xi32>

From 78a81f882ee7a43ca6bfca7074a3bde6c959eca4 Mon Sep 17 00:00:00 2001
From: Felix Daas <f1298@t-online.de>
Date: Mon, 2 Jun 2025 12:15:57 +0200
Subject: [PATCH 4/5] improvement to canonical forOp lowering + added/improved
 new array/for tests

---
 .../ThroughMLIR/LowerCIRLoopToSCF.cpp         | 24 ++++--
 .../Lowering/ThroughMLIR/LowerCIRToMLIR.cpp   | 41 ++++------
 clang/test/CIR/Lowering/ThroughMLIR/array.c   | 29 ++++++++
 .../CIR/Lowering/ThroughMLIR/for-reject-1.cpp | 24 ------
 .../CIR/Lowering/ThroughMLIR/for-reject-2.cpp | 25 -------
 .../CIR/Lowering/ThroughMLIR/for-reject.cpp   | 74 +++++++++++++++++++
 clang/test/CIR/Lowering/ThroughMLIR/for.cpp   | 24 +++++-
 7 files changed, 155 insertions(+), 86 deletions(-)
 delete mode 100644 clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp
 delete mode 100644 clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp
 create mode 100644 clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp

diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
index 06a2b1c120ee..16b67ddef33b 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
@@ -14,15 +14,18 @@
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/ValueRange.h"
 #include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/LowerToMLIR.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "llvm/IR/Module.h"
 
 using namespace cir;
 using namespace llvm;
@@ -252,6 +255,14 @@ void SCFLoop::analysis() {
   if (!canonical)
     return;
 
+  // If the IV is defined before the forOp (i.e. outside the surrounding
+  // cir.scope) this is not a canonical loop as the IV would not have the
+  // correct value after the forOp
+  if (ivAddr.getDefiningOp()->getBlock() != forOp->getBlock()) {
+    canonical = false;
+    return;
+  }
+
   cmpOp = findCmpOp();
   if (!cmpOp) {
     canonical = false;
@@ -310,17 +321,14 @@ void SCFLoop::transferToSCFForOp() {
     }
     return mlir::WalkResult::advance();
   });
-  // If the IV was declared in the for op all uses have been replaced by the
-  // scf.IV and we can remove the alloca + initial store
+  
+  // All uses have been replaced by the scf.IV and we can remove the alloca + initial store operations
 
   // The operations before the loop have been transferred to MLIR.
-  // So we need to go through getRemappedValue to find the value.
+  // So we need to go through getRemappedValue to find the operations.
   auto remapAddr = rewriter->getRemappedValue(ivAddr);
-  // If IV has more uses than the use in the initial store op keep it
-  if (!remapAddr || !remapAddr.hasOneUse())
-    return;
-  
-  // otherwise remove the alloca + initial store op
+
+  // Since this is a canonical loop we can remove the alloca + initial store op
   rewriter->eraseOp(remapAddr.getDefiningOp());
   rewriter->eraseOp(*remapAddr.user_begin());
 }
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
index 3c0f01b8e847..1bd6ac0a9e71 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
@@ -207,26 +207,23 @@ static bool findBaseAndIndices(mlir::Value addr, mlir::Value &base,
   return true;
 }
 
-// For memref.reinterpret_cast has multiple users, erasing the operation
-// after the last load or store been generated.
+// If the memref.reinterpret_cast has multiple users (i.e the original
+// cir.ptr_stride op has multiple users), only erase the operation after the
+// last load or store has been generated.
 static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr,
                         SmallVector<mlir::Operation *> &eraseList,
                         mlir::ConversionPatternRewriter &rewriter) {
-  newAddr.getDefiningOp()->getParentOfType<mlir::ModuleOp>()->dump();
-  oldAddr.dump();
-  newAddr.dump();
 
   unsigned oldUsedNum =
       std::distance(oldAddr.getUses().begin(), oldAddr.getUses().end());
   unsigned newUsedNum = 0;
+  // Count the uses of the newAddr (the result of the original base alloca) in
+  // load/store ops using an forwarded offset from the current
+  // memref.reinterpret_cast op
   for (auto *user : newAddr.getUsers()) {
-    user->dump();
     if (auto loadOpUser = mlir::dyn_cast_or_null<mlir::memref::LoadOp>(*user)) {
-      if (auto strideVal = loadOpUser.getIndices()[0]) {
-        strideVal.dump();
-        mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
-            .getOffsets()[0]
-            .dump();
+      if (!loadOpUser.getIndices().empty()) {
+        auto strideVal = loadOpUser.getIndices()[0];
         if (strideVal ==
             mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
                 .getOffsets()[0])
@@ -234,11 +231,8 @@ static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr,
       }
     } else if (auto storeOpUser =
                    mlir::dyn_cast_or_null<mlir::memref::StoreOp>(*user)) {
-      if (auto strideVal = storeOpUser.getIndices()[0]) {
-        strideVal.dump();
-        mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
-            .getOffsets()[0]
-            .dump();
+      if (!storeOpUser.getIndices().empty()) {
+        auto strideVal = storeOpUser.getIndices()[0];
         if (strideVal ==
             mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(eraseList.back())
                 .getOffsets()[0])
@@ -246,11 +240,11 @@ static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr,
       }
     }
   }
+  // If all load/store ops using forwarded offsets from the current
+  // memref.reinterpret_cast ops erase the memref.reinterpret_cast ops
   if (oldUsedNum == newUsedNum) {
-    for (auto op : eraseList) {
-      op->dump();
+    for (auto op : eraseList)
       rewriter.eraseOp(op);
-    }
   }
 }
 
@@ -269,7 +263,6 @@ class CIRLoadOpLowering : public mlir::OpConversionPattern<cir::LoadOp> {
                            rewriter)) {
       newLoad = rewriter.create<mlir::memref::LoadOp>(
           op.getLoc(), base, indices, op.getIsNontemporal());
-      newLoad->dump();
       eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
     } else
       newLoad = rewriter.create<mlir::memref::LoadOp>(
@@ -788,8 +781,6 @@ class CIRScopeOpLowering : public mlir::OpConversionPattern<cir::ScopeOp> {
       return mlir::success();
     }
 
-    // TODO: evaluate if a different mlir core dialect op is better suited for
-    // this
     for (auto &block : scopeOp.getScopeRegion()) {
       rewriter.setInsertionPointToEnd(&block);
       auto *terminator = block.getTerminator();
@@ -1484,13 +1475,9 @@ mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule,
   pm.addPass(createConvertCIRToMLIRPass());
 
   auto result = !mlir::failed(pm.run(theModule));
-  if (!result) {
-    // just for debugging purposes
-    // TODO: remove before creating a PR
-    theModule->dump();
+  if (!result)
     report_fatal_error(
         "The pass manager failed to lower CIR to MLIR standard dialects!");
-  }
   // Now that we ran all the lowering passes, verify the final output.
   if (theModule.verify().failed())
     report_fatal_error(
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/array.c b/clang/test/CIR/Lowering/ThroughMLIR/array.c
index 0504f4a61694..b02b07c9cfd8 100644
--- a/clang/test/CIR/Lowering/ThroughMLIR/array.c
+++ b/clang/test/CIR/Lowering/ThroughMLIR/array.c
@@ -29,3 +29,32 @@ int test_array2() {
     int a[3][4];
     return a[1][2]; 
 }
+
+int test_array3() {
+    // CIR-LABEL: cir.func {{.*}} @test_array3()
+    // CIR: %[[ARRAY:.*]] = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["a"] {alignment = 4 : i64}
+    // CIR: %[[PTRDECAY1:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i> 
+    // CIR: %[[PTRSTRIDE1:.*]] = cir.ptr_stride(%[[PTRDECAY1]] : !cir.ptr<!s32i>, {{.*}} : !s32i), !cir.ptr<!s32i>
+    // CIR: {{.*}} = cir.load align(4) %[[PTRSTRIDE1]] : !cir.ptr<!s32i>, !s32i
+    // CIR: %[[PTRDECAY2:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+    // CIR: %[[PTRSTRIDE2:.*]] = cir.ptr_stride(%[[PTRDECAY2]] : !cir.ptr<!s32i>, {{.*}} : !s32i), !cir.ptr<!s32i>
+    // CIR: %{{.*}} = cir.load align(4) %[[PTRSTRIDE2]] : !cir.ptr<!s32i>, !s32i
+    // CIR: cir.store align(4) {{.*}}, %[[PTRSTRIDE2]] : !s32i, !cir.ptr<!s32i>
+    // CIR: %[[PTRDECAY3:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i> 
+    // CIR: %[[PTRSTRIDE3:.*]] = cir.ptr_stride(%[[PTRDECAY3]] : !cir.ptr<!s32i>, {{.*}} : !s32i), !cir.ptr<!s32i>
+    // CIR: %{{.*}} = cir.load align(4) %[[PTRSTRIDE3]] : !cir.ptr<!s32i>, !s32i  
+
+    // MLIR-LABEL: func @test_array3
+    // MLIR: %{{.*}} = memref.alloca() {alignment = 4 : i64} : memref<i32>
+    // MLIR: %[[ARRAY:.*]] = memref.alloca() {alignment = 4 : i64} : memref<3xi32>
+    // MLIR: %[[IDX1:.*]] = arith.index_cast %{{.*}} : i32 to index
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX1]]] : memref<3xi32>
+    // MLIR: %[[IDX2:.*]] = arith.index_cast %{{.*}} : i32 to index
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX2]]] : memref<3xi32> 
+    // MLIR: memref.store %{{.*}}, %[[ARRAY]][%[[IDX2]]] : memref<3xi32> 
+    // MLIR: %[[IDX3:.*]] = arith.index_cast %{{.*}} : i32 to index
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX3]]] : memref<3xi32> 
+    int a[3];
+    a[0] += a[2];
+    return a[1];  
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp
deleted file mode 100644
index 60267bfbb953..000000000000
--- a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-1.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
-// RUN: FileCheck --input-file=%t.mlir %s
-
-void f() {}
-
-void reject() {
-  for (int i = 0; i < 100; i++, f());
-  // CHECK: %[[ALLOCA:.+]] = memref.alloca
-  // CHECK: %[[ZERO:.+]] = arith.constant 0
-  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
-  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
-  // CHECK: scf.while : () -> () {
-  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
-  // CHECK:   %[[TMP1:.+]] = arith.cmpi slt, %0, %[[HUNDRED]]
-  // CHECK:   scf.condition(%[[TMP1]])
-  // CHECK: } do {
-  // CHECK:   %[[TMP2:.+]] = memref.load %[[ALLOCA]]
-  // CHECK:   %[[ONE:.+]] = arith.constant 1
-  // CHECK:   %[[TMP3:.+]] = arith.addi %[[TMP2]], %[[ONE]]
-  // CHECK:   memref.store %[[TMP3]], %[[ALLOCA]]
-  // CHECK:   func.call @_Z1fv()
-  // CHECK:   scf.yield
-  // CHECK: }
-}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp
deleted file mode 100644
index c58d0675ccc6..000000000000
--- a/clang/test/CIR/Lowering/ThroughMLIR/for-reject-2.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
-// RUN: FileCheck --input-file=%t.mlir %s
-
-void reject() {
-  for (int i = 0; i < 100; i++, i++);
-  // CHECK: %[[ALLOCA:.+]] = memref.alloca
-  // CHECK: %[[ZERO:.+]] = arith.constant 0
-  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
-  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
-  // CHECK: scf.while : () -> () {
-  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
-  // CHECK:   %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]]
-  // CHECK:   scf.condition(%[[TMP2]])
-  // CHECK: } do {
-  // CHECK:   %[[TMP3:.+]] = memref.load %[[ALLOCA]]
-  // CHECK:   %[[ONE:.+]] = arith.constant 1
-  // CHECK:   %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]]
-  // CHECK:   memref.store %[[ADD]], %[[ALLOCA]]
-  // CHECK:   %[[LOAD:.+]] = memref.load %[[ALLOCA]]
-  // CHECK:   %[[ONE2:.+]] = arith.constant 1
-  // CHECK:   %[[ADD2:.+]] = arith.addi %[[LOAD]], %[[ONE2]]
-  // CHECK:   memref.store %[[ADD2]], %[[ALLOCA]]
-  // CHECK:   scf.yield
-  // CHECK: }
-}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp
new file mode 100644
index 000000000000..a0c80d9f8a16
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/for-reject.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void f() {}
+
+void reject_test1() {
+  for (int i = 0; i < 100; i++, f());
+  // CHECK: %[[ALLOCA:.+]] = memref.alloca
+  // CHECK: %[[ZERO:.+]] = arith.constant 0
+  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
+  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[TMP1:.+]] = arith.cmpi slt, %0, %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP1]])
+  // CHECK: } do {
+  // CHECK:   %[[TMP2:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[TMP3:.+]] = arith.addi %[[TMP2]], %[[ONE]]
+  // CHECK:   memref.store %[[TMP3]], %[[ALLOCA]]
+  // CHECK:   func.call @_Z1fv()
+  // CHECK:   scf.yield
+  // CHECK: }
+}
+
+void reject_test2() {
+  for (int i = 0; i < 100; i++, i++);
+  // CHECK: %[[ALLOCA:.+]] = memref.alloca
+  // CHECK: %[[ZERO:.+]] = arith.constant 0
+  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
+  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP2]])
+  // CHECK: } do {
+  // CHECK:   %[[TMP3:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]]
+  // CHECK:   memref.store %[[ADD]], %[[ALLOCA]]
+  // CHECK:   %[[LOAD:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE2:.+]] = arith.constant 1
+  // CHECK:   %[[ADD2:.+]] = arith.addi %[[LOAD]], %[[ONE2]]
+  // CHECK:   memref.store %[[ADD2]], %[[ALLOCA]]
+  // CHECK:   scf.yield
+  // CHECK: }
+}
+
+void reject_test3() {
+  int i;
+  for (i = 0; i < 100; i++);
+  i += 10;
+  // CHECK: %[[ALLOCA:.+]] = memref.alloca()
+  // CHECK: memref.alloca_scope  {
+  // CHECK: %[[ZERO:.+]] = arith.constant 0
+  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
+  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP2]])
+  // CHECK: } do {
+  // CHECK:   %[[TMP3:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]]
+  // CHECK:   memref.store %[[ADD]], %[[ALLOCA]]
+  // CHECK:   scf.yield
+  // CHECK: }
+  // CHECK: }
+  // CHECK: %[[TEN:.+]] = arith.constant 10
+  // CHECK: %[[TMP4:.+]] = memref.load %[[ALLOCA]]
+  // CHECK: %[[TMP5:.+]] = arith.addi %[[TMP4]], %[[TEN]]
+  // CHECK: memref.store %[[TMP5]], %[[ALLOCA]]
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
index 71a7669ee5b3..60922d18fbca 100644
--- a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
+++ b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
@@ -8,7 +8,10 @@ void constantLoopBound() {
     a[i] = 3;
 }
 // CHECK-LABEL: func.func @_Z17constantLoopBoundv() {
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<i32>
 // CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-NOT: memref.store %[[C0]], {{.*}}[] : memref<i32>
 // CHECK: %[[C100:.*]] = arith.constant 100 : i32
 // CHECK: %[[C1:.*]] = arith.constant 1 : i32
 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
@@ -17,13 +20,17 @@ void constantLoopBound() {
 // CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
+// CHECK: }
 
 void constantLoopBound_LE() {
   for (int i = 0; i <= 100; ++i)
     a[i] = 3;
 }
 // CHECK-LABEL: func.func @_Z20constantLoopBound_LEv() {
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<i32>
 // CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-NOT: memref.store %[[C0]], {{.*}}[] : memref<i32>
 // CHECK: %[[C100:.*]] = arith.constant 100 : i32
 // CHECK: %[[C1:.*]] = arith.constant 1 : i32
 // CHECK: %[[C101:.*]] = arith.addi %c100_i32, %c1_i32 : i32
@@ -34,6 +41,7 @@ void constantLoopBound_LE() {
 // CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
+// CHECK: }
 
 void variableLoopBound(int l, int u) {
   for (int i = l; i < u; ++i)
@@ -42,7 +50,10 @@ void variableLoopBound(int l, int u) {
 // CHECK-LABEL: func.func @_Z17variableLoopBoundii
 // CHECK: memref.store %arg0, %alloca[] : memref<i32>
 // CHECK: memref.store %arg1, %alloca_0[] : memref<i32>
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<i32>
 // CHECK: %[[LOWER:.*]] = memref.load %alloca[] : memref<i32>
+// CHECK-NOT: memref.store %[[LOWER]], {{.*}}[] : memref<i32>
 // CHECK: %[[UPPER:.*]] = memref.load %alloca_0[] : memref<i32>
 // CHECK: %[[C1:.*]] = arith.constant 1 : i32
 // CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C1]] : i32 {
@@ -51,15 +62,19 @@ void variableLoopBound(int l, int u) {
 // CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
+// CHECK: }
 
-void ariableLoopBound_LE(int l, int u) {
+void variableLoopBound_LE(int l, int u) {
   for (int i = l; i <= u; i+=4)
     a[i] = 3;
 }
-// CHECK-LABEL: func.func @_Z19ariableLoopBound_LEii
+// CHECK-LABEL: func.func @_Z20variableLoopBound_LEii
 // CHECK: memref.store %arg0, %alloca[] : memref<i32>
 // CHECK: memref.store %arg1, %alloca_0[] : memref<i32>
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<i32>
 // CHECK: %[[LOWER:.*]] = memref.load %alloca[] : memref<i32>
+// CHECK-NOT: memref.store %[[LOWER]], {{.*}}[] : memref<i32>
 // CHECK: %[[UPPER_DEC_1:.*]] = memref.load %alloca_0[] : memref<i32>
 // CHECK: %[[C1:.*]] = arith.constant 1 : i32
 // CHECK: %[[UPPER:.*]] = arith.addi %[[UPPER_DEC_1]], %[[C1]] : i32
@@ -70,13 +85,17 @@ void ariableLoopBound_LE(int l, int u) {
 // CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
 // CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
 // CHECK: }
+// CHECK: }
 
 void incArray() {
   for (int i = 0; i < 100; ++i)
     a[i] += b[i];
 }
 // CHECK-LABEL: func.func @_Z8incArrayv() {
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<i32>
 // CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-NOT: memref.store %[[C0]], {{.*}}[] : memref<i32>
 // CHECK: %[[C100:.*]] = arith.constant 100 : i32
 // CHECK: %[[C1:.*]] = arith.constant 1 : i32
 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
@@ -89,3 +108,4 @@ void incArray() {
 // CHECK:   %[[SUM:.*]] = arith.addi %[[A_VALUE]], %[[B_VALUE]] : i32
 // CHECK:   memref.store %[[SUM]], %[[A]][%[[INDEX_1]]] : memref<101xi32>
 // CHECK: }
+// CHECK: }

From 396326513d22fd52888234a4d310b797508ce080 Mon Sep 17 00:00:00 2001
From: Felix Daas <f1298@t-online.de>
Date: Tue, 3 Jun 2025 15:25:02 +0200
Subject: [PATCH 5/5] run SCFPreparePass cir pass always when lowering
 throughMLIR

---
 clang/include/clang/CIR/CIRToCIRPasses.h                 | 2 +-
 clang/lib/CIR/CodeGen/CIRPasses.cpp                      | 4 ++--
 clang/lib/CIR/FrontendAction/CIRGenAction.cpp            | 8 +++-----
 clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp | 5 +++--
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/CIR/CIRToCIRPasses.h b/clang/include/clang/CIR/CIRToCIRPasses.h
index 4ad4aeebb22e..5f6e25c8c148 100644
--- a/clang/include/clang/CIR/CIRToCIRPasses.h
+++ b/clang/include/clang/CIR/CIRToCIRPasses.h
@@ -34,7 +34,7 @@ mlir::LogicalResult runCIRToCIRPasses(
     llvm::StringRef lifetimeOpts, bool enableIdiomRecognizer,
     llvm::StringRef idiomRecognizerOpts, bool enableLibOpt,
     llvm::StringRef libOptOpts, std::string &passOptParsingFailure,
-    bool enableCIRSimplify, bool flattenCIR, bool emitMLIR,
+    bool enableCIRSimplify, bool flattenCIR, bool throughMLIR,
     bool enableCallConvLowering, bool enableMem2reg);
 
 } // namespace cir
diff --git a/clang/lib/CIR/CodeGen/CIRPasses.cpp b/clang/lib/CIR/CodeGen/CIRPasses.cpp
index 59d2445cb16e..9fc51e29bd64 100644
--- a/clang/lib/CIR/CodeGen/CIRPasses.cpp
+++ b/clang/lib/CIR/CodeGen/CIRPasses.cpp
@@ -28,7 +28,7 @@ mlir::LogicalResult runCIRToCIRPasses(
     llvm::StringRef lifetimeOpts, bool enableIdiomRecognizer,
     llvm::StringRef idiomRecognizerOpts, bool enableLibOpt,
     llvm::StringRef libOptOpts, std::string &passOptParsingFailure,
-    bool enableCIRSimplify, bool flattenCIR, bool emitCore,
+    bool enableCIRSimplify, bool flattenCIR, bool throughMLIR,
     bool enableCallConvLowering, bool enableMem2Reg) {
 
   llvm::TimeTraceScope scope("CIR To CIR Passes");
@@ -81,7 +81,7 @@ mlir::LogicalResult runCIRToCIRPasses(
   if (enableMem2Reg)
     pm.addPass(mlir::createMem2Reg());
 
-  if (emitCore)
+  if (throughMLIR)
     pm.addPass(mlir::createSCFPreparePass());
 
   // FIXME: once CIRCodenAction fixes emission other than CIR we
diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
index 970afd03a471..b6a31032de4b 100644
--- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
+++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
@@ -210,9 +210,6 @@ class CIRGenConsumer : public clang::ASTConsumer {
           action == CIRGenAction::OutputType::EmitMLIR &&
           feOptions.MLIRTargetDialect == clang::frontend::MLIR_CIR_FLAT;
 
-      bool emitCore = action == CIRGenAction::OutputType::EmitMLIR &&
-                      feOptions.MLIRTargetDialect == clang::frontend::MLIR_CORE;
-
       // Setup and run CIR pipeline.
       std::string passOptParsingFailure;
       if (runCIRToCIRPasses(
@@ -220,8 +217,9 @@ class CIRGenConsumer : public clang::ASTConsumer {
               feOptions.ClangIRLifetimeCheck, lifetimeOpts,
               feOptions.ClangIRIdiomRecognizer, idiomRecognizerOpts,
               feOptions.ClangIRLibOpt, libOptOpts, passOptParsingFailure,
-              codeGenOptions.OptimizationLevel > 0, flattenCIR, emitCore,
-              enableCCLowering, feOptions.ClangIREnableMem2Reg)
+              codeGenOptions.OptimizationLevel > 0, flattenCIR,
+              !feOptions.ClangIRDirectLowering, enableCCLowering,
+              feOptions.ClangIREnableMem2Reg)
               .failed()) {
         if (!passOptParsingFailure.empty())
           diagnosticsEngine.Report(diag::err_drv_cir_pass_opt_parsing)
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
index 16b67ddef33b..ea1d1ddbc37b 100644
--- a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
@@ -321,8 +321,9 @@ void SCFLoop::transferToSCFForOp() {
     }
     return mlir::WalkResult::advance();
   });
-  
-  // All uses have been replaced by the scf.IV and we can remove the alloca + initial store operations
+
+  // All uses have been replaced by the scf.IV and we can remove the alloca +
+  // initial store operations
 
   // The operations before the loop have been transferred to MLIR.
   // So we need to go through getRemappedValue to find the operations.