Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e440258
emit const attr for cuda
anominos Feb 25, 2025
73fabd6
emitGlobal: move shouldEmitCUDAGlobalVar to first check
anominos Feb 28, 2025
76901bb
tests: add constant to global-vars.cu
anominos Mar 3, 2025
2617e0f
implement const
anominos Mar 6, 2025
a250e36
fix address space for offload_constant
anominos Mar 6, 2025
8a3c8f1
Merge branch 'main' into anom/constant-var
anominos Mar 6, 2025
d514fc4
Merge branch 'anom/const-var' into anom/constant-var
anominos Mar 6, 2025
33c353f
Fix address space values for NVPTX
anominos Mar 6, 2025
92cc36e
add testcase
anominos Mar 7, 2025
6e7f8ab
Merge branch 'anom/fix-nvptx-addrspace' into anom/constant-var
anominos Mar 7, 2025
07ac4f5
test: enable const test in addrspace-lowering.cu
anominos Mar 7, 2025
466ce1d
[CIR][CUDA] Support builtin CUDA variables
advay168 Mar 6, 2025
6b902e5
[CIR][CUDA] Fix formatting
advay168 Mar 7, 2025
57c5572
[CIR][CUDA] Defer failing test
advay168 Mar 7, 2025
e480ce4
[CIR][CUDA] Revert change to test
advay168 Mar 7, 2025
0c1c8ed
[CIR][CUDA] Fix test
advay168 Mar 8, 2025
3431265
Merge branch 'main' into anom/constant-var
anominos Mar 8, 2025
bf4c995
remove duplicated case
anominos Mar 8, 2025
0a26ede
Merge branch 'main' into anom/constant-var
anominos Mar 11, 2025
0776935
Merge remote-tracking branch 'aiden/anom/constant-var'
advay168 Mar 11, 2025
f2f98d6
Fix style
advay168 Mar 11, 2025
108f38e
Change naming case
advay168 Mar 11, 2025
19aead8
Merge remote-tracking branch 'original/main'
advay168 Mar 11, 2025
01b5987
Formatting
advay168 Mar 11, 2025
c0ed490
Change naming
advay168 Mar 12, 2025
d6239ef
Naming
advay168 Mar 12, 2025
05b644b
Merge remote-tracking branch 'original/main'
advay168 Mar 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2497,7 +2497,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}

if (IntrinsicID != Intrinsic::not_intrinsic) {
llvm_unreachable("NYI");
unsigned iceArguments = 0;
ASTContext::GetBuiltinTypeError error;
getContext().GetBuiltinType(BuiltinID, error, &iceArguments);
assert(error == ASTContext::GE_None && "Should not codegen an error");
if (iceArguments > 0)
llvm_unreachable("NYI");
}

// Some target-specific builtins can have aggregate return values, e.g.
Expand Down Expand Up @@ -2595,7 +2600,7 @@ static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *CGF,
llvm_unreachable("NYI");
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
llvm_unreachable("NYI");
return CGF->emitNVPTXBuiltinExpr(BuiltinID, E);
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
llvm_unreachable("NYI");
Expand Down
82 changes: 82 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//===---- CIRGenBuiltinX86.cpp - Emit CIR for X86 builtins ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This contains code to emit NVPTX Builtin calls.
//
//===----------------------------------------------------------------------===//

#include "CIRGenCXXABI.h"
#include "CIRGenCall.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
#include "TargetInfo.h"
#include "clang/CIR/MissingFeatures.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Value.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "llvm/Support/ErrorHandling.h"

using namespace clang;
using namespace clang::CIRGen;
using namespace cir;

mlir::Value CIRGenFunction::emitNVPTXBuiltinExpr(unsigned builtinId,
const CallExpr *expr) {
auto getIntrinsic = [&](const char *name) {
mlir::Type intTy = cir::IntType::get(&getMLIRContext(), 32, false);
return builder
.create<cir::LLVMIntrinsicCallOp>(getLoc(expr->getExprLoc()),
builder.getStringAttr(name), intTy)
.getResult();
};
switch (builtinId) {
case NVPTX::BI__nvvm_read_ptx_sreg_tid_x:
return getIntrinsic("nvvm.read.ptx.sreg.tid.x");
case NVPTX::BI__nvvm_read_ptx_sreg_tid_y:
return getIntrinsic("nvvm.read.ptx.sreg.tid.y");
case NVPTX::BI__nvvm_read_ptx_sreg_tid_z:
return getIntrinsic("nvvm.read.ptx.sreg.tid.z");
case NVPTX::BI__nvvm_read_ptx_sreg_tid_w:
return getIntrinsic("nvvm.read.ptx.sreg.tid.w");

case NVPTX::BI__nvvm_read_ptx_sreg_ntid_x:
return getIntrinsic("nvvm.read.ptx.sreg.ntid.x");
case NVPTX::BI__nvvm_read_ptx_sreg_ntid_y:
return getIntrinsic("nvvm.read.ptx.sreg.ntid.y");
case NVPTX::BI__nvvm_read_ptx_sreg_ntid_z:
return getIntrinsic("nvvm.read.ptx.sreg.ntid.z");
case NVPTX::BI__nvvm_read_ptx_sreg_ntid_w:
return getIntrinsic("nvvm.read.ptx.sreg.ntid.w");

case NVPTX::BI__nvvm_read_ptx_sreg_ctaid_x:
return getIntrinsic("nvvm.read.ptx.sreg.ctaid.x");
case NVPTX::BI__nvvm_read_ptx_sreg_ctaid_y:
return getIntrinsic("nvvm.read.ptx.sreg.ctaid.y");
case NVPTX::BI__nvvm_read_ptx_sreg_ctaid_z:
return getIntrinsic("nvvm.read.ptx.sreg.ctaid.z");
case NVPTX::BI__nvvm_read_ptx_sreg_ctaid_w:
return getIntrinsic("nvvm.read.ptx.sreg.ctaid.w");

case NVPTX::BI__nvvm_read_ptx_sreg_nctaid_x:
return getIntrinsic("nvvm.read.ptx.sreg.nctaid.x");
case NVPTX::BI__nvvm_read_ptx_sreg_nctaid_y:
return getIntrinsic("nvvm.read.ptx.sreg.nctaid.y");
case NVPTX::BI__nvvm_read_ptx_sreg_nctaid_z:
return getIntrinsic("nvvm.read.ptx.sreg.nctaid.z");
case NVPTX::BI__nvvm_read_ptx_sreg_nctaid_w:
return getIntrinsic("nvvm.read.ptx.sreg.nctaid.w");

default:
llvm_unreachable("NYI");
}
}
89 changes: 89 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3313,3 +3313,92 @@ LValue CIRGenFunction::emitPredefinedLValue(const PredefinedExpr *E) {

return emitStringLiteralLValue(SL);
}

namespace {
struct LValueOrRValue {
LValue lv;
RValue rv;
};

LValueOrRValue emitPseudoObjectExpr(CIRGenFunction &cgf,
const PseudoObjectExpr *expr,
bool forLValue, AggValueSlot slot) {
SmallVector<CIRGenFunction::OpaqueValueMappingData, 4> opaques;

// Find the result expression, if any.
const Expr *resultExpr = expr->getResultExpr();
LValueOrRValue result;

for (PseudoObjectExpr::const_semantics_iterator i = expr->semantics_begin(),
e = expr->semantics_end();
i != e; ++i) {
const Expr *semantic = *i;

// If this semantic expression is an opaque value, bind it
// to the result of its source expression.
if (const auto *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
// Skip unique OVEs.
if (ov->isUnique()) {
assert(ov != resultExpr &&
"A unique OVE cannot be used as the result expression");
continue;
}

// If this is the result expression, we may need to evaluate
// directly into the slot.
using OVMA = CIRGenFunction::OpaqueValueMappingData;
OVMA opaqueData;
if (ov == resultExpr && ov->isPRValue() && !forLValue &&
CIRGenFunction::hasAggregateEvaluationKind(ov->getType())) {
cgf.emitAggExpr(ov->getSourceExpr(), slot);
LValue lv = cgf.makeAddrLValue(slot.getAddress(), ov->getType(),
AlignmentSource::Decl);
opaqueData = OVMA::bind(cgf, ov, lv);
result.rv = slot.asRValue();

// Otherwise, emit as normal.
} else {
opaqueData = OVMA::bind(cgf, ov, ov->getSourceExpr());

// If this is the result, also evaluate the result now.
if (ov == resultExpr) {
if (forLValue)
result.lv = cgf.emitLValue(ov);
else
result.rv = cgf.emitAnyExpr(ov, slot);
}
}

opaques.push_back(opaqueData);

// Otherwise, if the expression is the result, evaluate it
// and remember the result.
} else if (semantic == resultExpr) {
if (forLValue)
result.lv = cgf.emitLValue(semantic);
else
result.rv = cgf.emitAnyExpr(semantic, slot);

// Otherwise, evaluate the expression in an ignored context.
} else {
cgf.emitIgnoredExpr(semantic);
}
}

// Unbind all the opaques now.
for (auto &opaque : opaques)
opaque.unbind(cgf);

return result;
}

} // namespace

RValue CIRGenFunction::emitPseudoObjectRValue(const PseudoObjectExpr *expr,
AggValueSlot slot) {
return emitPseudoObjectExpr(*this, expr, false, slot).rv;
}

LValue CIRGenFunction::emitPseudoObjectLValue(const PseudoObjectExpr *expr) {
return emitPseudoObjectExpr(*this, expr, true, AggValueSlot::ignored()).lv;
}
2 changes: 1 addition & 1 deletion clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
llvm_unreachable("NYI");
}
mlir::Value VisitPseudoObjectExpr(PseudoObjectExpr *E) {
llvm_unreachable("NYI");
return CGF.emitPseudoObjectRValue(E).getScalarVal();
}
mlir::Value VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
llvm_unreachable("NYI");
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,11 @@ class CIRGenFunction : public CIRGenTypeCache {
ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr);
ConstantEmission tryEmitAsConstant(const MemberExpr *ME);

RValue emitPseudoObjectRValue(const PseudoObjectExpr *expr,
AggValueSlot slot = AggValueSlot::ignored());

LValue emitPseudoObjectLValue(const PseudoObjectExpr *expr);

/// Emit the computation of the specified expression of scalar type,
/// ignoring the result.
mlir::Value emitScalarExpr(const clang::Expr *E);
Expand Down Expand Up @@ -1471,6 +1476,7 @@ class CIRGenFunction : public CIRGenTypeCache {
mlir::Value emitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
mlir::Value emitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
mlir::Value emitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
mlir::Value emitNVPTXBuiltinExpr(unsigned builtinID, const CallExpr *expr);

/// Given an expression with a pointer type, emit the value and compute our
/// best estimate of the alignment of the pointee.
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CIR/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_clang_library(clangCIR
CIRGenBuiltin.cpp
CIRGenBuiltinAArch64.cpp
CIRGenBuiltinX86.cpp
CIRGenBuiltinNVPTX.cpp
CIRGenCXX.cpp
CIRGenCXXABI.cpp
CIRGenCall.cpp
Expand Down
88 changes: 88 additions & 0 deletions clang/test/CIR/CodeGen/CUDA/cuda-builtin-vars.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-llvm -o - %s \
// RUN: | FileCheck --check-prefix=LLVM %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-cir -o - %s \
// RUN: | FileCheck --check-prefix=CIR %s

#include "__clang_cuda_builtin_vars.h"

// LLVM: define{{.*}} void @_Z6kernelPi(ptr %0)
__attribute__((global))
void kernel(int *out) {
int i = 0;

out[i++] = threadIdx.x;
// CIR: cir.func linkonce_odr @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.tid.x"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.x()

out[i++] = threadIdx.y;
// CIR: cir.func linkonce_odr @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.tid.y"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.y()

out[i++] = threadIdx.z;
// CIR: cir.func linkonce_odr @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_zEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.tid.z"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.z()


out[i++] = blockIdx.x;
// CIR: cir.func linkonce_odr @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.ctaid.x"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()

out[i++] = blockIdx.y;
// CIR: cir.func linkonce_odr @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.ctaid.y"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()

out[i++] = blockIdx.z;
// CIR: cir.func linkonce_odr @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_zEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.ctaid.z"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()


out[i++] = blockDim.x;
// CIR: cir.func linkonce_odr @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.ntid.x"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.x()

out[i++] = blockDim.y;
// CIR: cir.func linkonce_odr @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.ntid.y"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.y()

out[i++] = blockDim.z;
// CIR: cir.func linkonce_odr @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_zEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.ntid.z"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.z()


out[i++] = gridDim.x;
// CIR: cir.func linkonce_odr @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.nctaid.x"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()

out[i++] = gridDim.y;
// CIR: cir.func linkonce_odr @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.nctaid.y"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()

out[i++] = gridDim.z;
// CIR: cir.func linkonce_odr @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_zEv()
// CIR: cir.llvm.intrinsic "nvvm.read.ptx.sreg.nctaid.z"
// LLVM: call {{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()


out[i++] = warpSize;
// CIR: [[REGISTER:%.*]] = cir.const #cir.int<32>
// CIR: cir.store [[REGISTER]]
// LLVM: store i32 32,


// CIR: cir.return loc
// LLVM: ret void
}