Skip to content

[CIR] Upstream ArraySubscriptExpr for fixed size array #134536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 10, 2025
2 changes: 2 additions & 0 deletions clang/include/clang/CIR/MissingFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ struct MissingFeatures {
static bool mangledNames() { return false; }
static bool setDLLStorageClass() { return false; }
static bool openMP() { return false; }
static bool emitCheckedInBoundsGEP() { return false; }
static bool preservedAccessIndexRegion() { return false; }

// Missing types
static bool dataMemberType() { return false; }
Expand Down
40 changes: 40 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuilder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "CIRGenBuilder.h"

using namespace clang::CIRGen;

mlir::Value CIRGenBuilderTy::maybeBuildArrayDecay(mlir::Location loc,
mlir::Value arrayPtr,
mlir::Type eltTy) {
const auto arrayPtrTy = mlir::cast<cir::PointerType>(arrayPtr.getType());
const auto arrayTy = mlir::dyn_cast<cir::ArrayType>(arrayPtrTy.getPointee());

if (arrayTy) {
const cir::PointerType flatPtrTy = getPointerTo(arrayTy.getEltType());
return create<cir::CastOp>(loc, flatPtrTy, cir::CastKind::array_to_ptrdecay,
arrayPtr);
}

assert(arrayPtrTy.getPointee() == eltTy &&
"flat pointee type must match original array element type");
return arrayPtr;
}

mlir::Value CIRGenBuilderTy::getArrayElement(mlir::Location arrayLocBegin,
mlir::Location arrayLocEnd,
mlir::Value arrayPtr,
mlir::Type eltTy, mlir::Value idx,
bool shouldDecay) {
mlir::Value basePtr = arrayPtr;
if (shouldDecay)
basePtr = maybeBuildArrayDecay(arrayLocBegin, arrayPtr, eltTy);
const mlir::Type flatPtrTy = basePtr.getType();
return create<cir::PtrStrideOp>(arrayLocEnd, flatPtrTy, basePtr, idx);
}
13 changes: 13 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,19 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {

return create<cir::BinOp>(loc, cir::BinOpKind::Div, lhs, rhs);
}

/// Create a cir.ptr_stride operation to get access to an array element.
/// \p idx is the index of the element to access, \p shouldDecay is true if
/// the result should decay to a pointer to the element type.
mlir::Value getArrayElement(mlir::Location arrayLocBegin,
mlir::Location arrayLocEnd, mlir::Value arrayPtr,
mlir::Type eltTy, mlir::Value idx,
bool shouldDecay);

/// Returns a decayed pointer to the first element of the array
/// pointed to by \p arrayPtr.
mlir::Value maybeBuildArrayDecay(mlir::Location loc, mlir::Value arrayPtr,
mlir::Type eltTy);
};

} // namespace clang::CIRGen
Expand Down
138 changes: 138 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "Address.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
#include "CIRGenValue.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "clang/AST/Attr.h"
Expand Down Expand Up @@ -430,6 +431,143 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *e) {
llvm_unreachable("Unknown unary operator kind!");
}

/// If the specified expr is a simple decay from an array to pointer,
/// return the array subexpression.
/// FIXME: this could be abstracted into a common AST helper.
static const Expr *getSimpleArrayDecayOperand(const Expr *e) {
// If this isn't just an array->pointer decay, bail out.
const auto *castExpr = dyn_cast<CastExpr>(e);
if (!castExpr || castExpr->getCastKind() != CK_ArrayToPointerDecay)
return nullptr;

// If this is a decay from variable width array, bail out.
const Expr *subExpr = castExpr->getSubExpr();
if (subExpr->getType()->isVariableArrayType())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need an Assert/NYI/etc here? Seems like the purpose of MissingFeatures

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I understand, it's internal to be nullptr, not a missing feature or implementation similar to Clang

/// isSimpleArrayDecayOperand - If the specified expr is a simple decay from an
/// array to pointer, return the array subexpression.
static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
// If this isn't just an array->pointer decay, bail out.
const auto *CE = dyn_cast<CastExpr>(E);
if (!CE || CE->getCastKind() != CK_ArrayToPointerDecay)
return nullptr;
// If this is a decay from variable width array, bail out.
const Expr *SubExpr = CE->getSubExpr();
if (SubExpr->getType()->isVariableArrayType())
return nullptr;
return SubExpr;
}

return nullptr;

return subExpr;
}

static cir::IntAttr getConstantIndexOrNull(mlir::Value idx) {
// TODO(cir): should we consider using MLIRs IndexType instead of IntegerAttr?
if (auto constantOp = dyn_cast<cir::ConstantOp>(idx.getDefiningOp()))
return mlir::dyn_cast<cir::IntAttr>(constantOp.getValue());
return {};
}

static CharUnits getArrayElementAlign(CharUnits arrayAlign, mlir::Value idx,
CharUnits eltSize) {
// If we have a constant index, we can use the exact offset of the
// element we're accessing.
const cir::IntAttr constantIdx = getConstantIndexOrNull(idx);
if (constantIdx) {
const CharUnits offset = constantIdx.getValue().getZExtValue() * eltSize;
return arrayAlign.alignmentAtOffset(offset);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a test case where this would return something different than alignmentOfArrayElement(eltSize)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am thinking for test case that can be used

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order to find a test case for this, I modified the equivalent classic clang codegen implementation so that it would always go to the else case here. That caused 15 tests to fail, but we don't have enough support implemented to run any of the failing tests with CIR. There were 12 OpenMP failures, two PowerPC intrinsics, and one HLSL.

}
// Otherwise, use the worst-case alignment for any element.
return arrayAlign.alignmentOfArrayElement(eltSize);
}

static QualType getFixedSizeElementType(const ASTContext &astContext,
const VariableArrayType *vla) {
QualType eltType;
do {
eltType = vla->getElementType();
} while ((vla = astContext.getAsVariableArrayType(eltType)));
return eltType;
}

static mlir::Value emitArraySubscriptPtr(CIRGenFunction &cgf,
mlir::Location beginLoc,
mlir::Location endLoc, mlir::Value ptr,
mlir::Type eltTy, mlir::Value idx,
bool shouldDecay) {
CIRGenModule &cgm = cgf.getCIRGenModule();
// TODO(cir): LLVM codegen emits in bound gep check here, is there anything
// that would enhance tracking this later in CIR?
assert(!cir::MissingFeatures::emitCheckedInBoundsGEP());
return cgm.getBuilder().getArrayElement(beginLoc, endLoc, ptr, eltTy, idx,
shouldDecay);
}

static Address emitArraySubscriptPtr(CIRGenFunction &cgf,
mlir::Location beginLoc,
mlir::Location endLoc, Address addr,
QualType eltType, mlir::Value idx,
mlir::Location loc, bool shouldDecay) {

// Determine the element size of the statically-sized base. This is
// the thing that the indices are expressed in terms of.
if (const VariableArrayType *vla =
cgf.getContext().getAsVariableArrayType(eltType)) {
eltType = getFixedSizeElementType(cgf.getContext(), vla);
}

// We can use that to compute the best alignment of the element.
const CharUnits eltSize = cgf.getContext().getTypeSizeInChars(eltType);
const CharUnits eltAlign =
getArrayElementAlign(addr.getAlignment(), idx, eltSize);

assert(!cir::MissingFeatures::preservedAccessIndexRegion());
const mlir::Value eltPtr =
emitArraySubscriptPtr(cgf, beginLoc, endLoc, addr.getPointer(),
addr.getElementType(), idx, shouldDecay);
const mlir::Type elementType = cgf.convertTypeForMem(eltType);
return Address(eltPtr, elementType, eltAlign);
}

LValue
CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
if (e->getBase()->getType()->isVectorType() &&
!isa<ExtVectorElementExpr>(e->getBase())) {
cgm.errorNYI(e->getSourceRange(), "emitArraySubscriptExpr: VectorType");
return LValue::makeAddr(Address::invalid(), e->getType(), LValueBaseInfo());
}

if (isa<ExtVectorElementExpr>(e->getBase())) {
cgm.errorNYI(e->getSourceRange(),
"emitArraySubscriptExpr: ExtVectorElementExpr");
return LValue::makeAddr(Address::invalid(), e->getType(), LValueBaseInfo());
}

if (getContext().getAsVariableArrayType(e->getType())) {
cgm.errorNYI(e->getSourceRange(),
"emitArraySubscriptExpr: VariableArrayType");
return LValue::makeAddr(Address::invalid(), e->getType(), LValueBaseInfo());
}

if (e->getType()->getAs<ObjCObjectType>()) {
cgm.errorNYI(e->getSourceRange(), "emitArraySubscriptExpr: ObjCObjectType");
return LValue::makeAddr(Address::invalid(), e->getType(), LValueBaseInfo());
}

// The index must always be an integer, which is not an aggregate. Emit it
// in lexical order (this complexity is, sadly, required by C++17).
assert((e->getIdx() == e->getLHS() || e->getIdx() == e->getRHS()) &&
"index was neither LHS nor RHS");
const mlir::Value idx = emitScalarExpr(e->getIdx());
if (const Expr *array = getSimpleArrayDecayOperand(e->getBase())) {
LValue arrayLV;
if (const auto *ase = dyn_cast<ArraySubscriptExpr>(array))
arrayLV = emitArraySubscriptExpr(ase);
else
arrayLV = emitLValue(array);

// Propagate the alignment from the array itself to the result.
const Address addr = emitArraySubscriptPtr(
*this, cgm.getLoc(array->getBeginLoc()), cgm.getLoc(array->getEndLoc()),
arrayLV.getAddress(), e->getType(), idx, cgm.getLoc(e->getExprLoc()),
/*shouldDecay=*/true);

return LValue::makeAddr(addr, e->getType(), LValueBaseInfo());
}

// The base must be a pointer; emit it with an estimate of its alignment.
cgm.errorNYI(e->getSourceRange(),
"emitArraySubscriptExpr: The base must be a pointer");
return {};
}

LValue CIRGenFunction::emitBinaryOperatorLValue(const BinaryOperator *e) {
// Comma expressions just emit their LHS then their RHS as an l-value.
if (e->getOpcode() == BO_Comma) {
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
mlir::Value VisitCastExpr(CastExpr *e);
mlir::Value VisitCallExpr(const CallExpr *e);

mlir::Value VisitArraySubscriptExpr(ArraySubscriptExpr *e) {
if (e->getBase()->getType()->isVectorType()) {
assert(!cir::MissingFeatures::scalableVectors());
cgf.getCIRGenModule().errorNYI("VisitArraySubscriptExpr: VectorType");
return {};
}
// Just load the lvalue formed by the subscript expression.
return emitLoadOfLValue(e);
}

mlir::Value VisitExplicitCastExpr(ExplicitCastExpr *e) {
return VisitCastExpr(e);
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
std::string("l-value not implemented for '") +
e->getStmtClassName() + "'");
return LValue();
case Expr::ArraySubscriptExprClass:
return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
case Expr::UnaryOperatorClass:
return emitUnaryOpLValue(cast<UnaryOperator>(e));
case Expr::BinaryOperatorClass:
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ class CIRGenFunction : public CIRGenTypeCache {
/// should be returned.
RValue emitAnyExpr(const clang::Expr *e);

LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e);

AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d);

/// Emit code and set up symbol table for a variable declaration with auto,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

#include "mlir/IR/Value.h"

#include "clang/CIR/MissingFeatures.h"

namespace clang::CIRGen {

/// This trivial value class is used to represent the result of an
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CIR/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)

add_clang_library(clangCIR
CIRGenerator.cpp
CIRGenBuilder.cpp
CIRGenCall.cpp
CIRGenDecl.cpp
CIRGenDeclOpenACC.cpp
Expand Down
Loading
Loading