diff --git a/.github/workflows/bazel-checks.yml b/.github/workflows/bazel-checks.yml index 1b27dbc1dbc4d..27092d9326aeb 100644 --- a/.github/workflows/bazel-checks.yml +++ b/.github/workflows/bazel-checks.yml @@ -33,7 +33,11 @@ jobs: bazel-build: name: "Bazel Build/Test" - runs-on: llvm-premerge-linux-runners + # Only run on US Central workers so we only have to keep one cache warm as + # the cache buckets are per cluster. + runs-on: + group: llvm-premerge-cluster-us-central + labels: llvm-premerge-linux-runners if: github.repository == 'llvm/llvm-project' steps: - name: Fetch LLVM sources @@ -44,7 +48,7 @@ jobs: - name: Setup System Dependencies run: | sudo apt-get update - sudo apt-get install -y libmpfr-dev libpfm4-dev + sudo apt-get install -y libmpfr-dev libpfm4-dev m4 libedit-dev sudo curl -L https://github.com/bazelbuild/bazelisk/releases/download/v1.27.0/bazelisk-amd64.deb > /tmp/bazelisk.deb sudo apt-get install -y /tmp/bazelisk.deb rm /tmp/bazelisk.deb @@ -54,4 +58,4 @@ jobs: bazelisk test --config=ci --sandbox_base="" \ --remote_cache=https://storage.googleapis.com/$CACHE_GCS_BUCKET-bazel \ --google_default_credentials \ - @llvm-project//llvm/unittests:adt_tests + @llvm-project//... //... diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 5e58abee2bbb3..4c8ab3f859a49 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -43,6 +43,7 @@ class FactsGenerator : public ConstStmtVisitor { void VisitUnaryOperator(const UnaryOperator *UO); void VisitReturnStmt(const ReturnStmt *RS); void VisitBinaryOperator(const BinaryOperator *BO); + void VisitConditionalOperator(const ConditionalOperator *CO); void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); void VisitInitListExpr(const InitListExpr *ILE); diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index b64e07ff2bfb8..a673ae6f9eb71 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -153,6 +153,7 @@ struct MissingFeatures { static bool coroEndBuiltinCall() { return false; } static bool coroutineFrame() { return false; } static bool emitBodyAndFallthrough() { return false; } + static bool coroOutsideFrameMD() { return false; } // Various handling of deferred processing in CIRGenModule. static bool cgmRelease() { return false; } @@ -298,6 +299,7 @@ struct MissingFeatures { static bool opTBAA() { return false; } static bool peepholeProtection() { return false; } static bool pgoUse() { return false; } + static bool pointerAuthentication() { return false; } static bool pointerOverflowSanitizer() { return false; } static bool preservedAccessIndexRegion() { return false; } static bool requiresCleanups() { return false; } diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index bec8e1dabb0b5..381ff99aae420 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -176,6 +176,15 @@ void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { handleAssignment(BO->getLHS(), BO->getRHS()); } +void FactsGenerator::VisitConditionalOperator(const ConditionalOperator *CO) { + if (hasOrigin(CO)) { + // Merge origins from both branches of the conditional operator. + // We kill to clear the initial state and merge both origins into it. + killAndFlowOrigin(*CO, *CO->getTrueExpr()); + flowOrigin(*CO, *CO->getFalseExpr()); + } +} + void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { // Assignment operators have special "kill-then-propagate" semantics // and are handled separately. diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp index 930ae55405756..05fb1aedcbf4a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp @@ -13,6 +13,7 @@ #include "CIRGenFunction.h" #include "mlir/Support/LLVM.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetInfo.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" @@ -33,6 +34,65 @@ struct clang::CIRGen::CGCoroData { CIRGenFunction::CGCoroInfo::CGCoroInfo() {} CIRGenFunction::CGCoroInfo::~CGCoroInfo() {} +namespace { +// FIXME: both GetParamRef and ParamReferenceReplacerRAII are good template +// candidates to be shared among LLVM / CIR codegen. + +// Hunts for the parameter reference in the parameter copy/move declaration. +struct GetParamRef : public StmtVisitor { +public: + DeclRefExpr *expr = nullptr; + GetParamRef() {} + void VisitDeclRefExpr(DeclRefExpr *e) { + assert(expr == nullptr && "multilple declref in param move"); + expr = e; + } + void VisitStmt(Stmt *s) { + for (Stmt *c : s->children()) { + if (c) + Visit(c); + } + } +}; + +// This class replaces references to parameters to their copies by changing +// the addresses in CGF.LocalDeclMap and restoring back the original values in +// its destructor. +struct ParamReferenceReplacerRAII { + CIRGenFunction::DeclMapTy savedLocals; + CIRGenFunction::DeclMapTy &localDeclMap; + + ParamReferenceReplacerRAII(CIRGenFunction::DeclMapTy &localDeclMap) + : localDeclMap(localDeclMap) {} + + void addCopy(const DeclStmt *pm) { + // Figure out what param it refers to. + + assert(pm->isSingleDecl()); + const VarDecl *vd = static_cast(pm->getSingleDecl()); + const Expr *initExpr = vd->getInit(); + GetParamRef visitor; + visitor.Visit(const_cast(initExpr)); + assert(visitor.expr); + DeclRefExpr *dreOrig = visitor.expr; + auto *pd = dreOrig->getDecl(); + + auto it = localDeclMap.find(pd); + assert(it != localDeclMap.end() && "parameter is not found"); + savedLocals.insert({pd, it->second}); + + auto copyIt = localDeclMap.find(vd); + assert(copyIt != localDeclMap.end() && "parameter copy is not found"); + it->second = copyIt->getSecond(); + } + + ~ParamReferenceReplacerRAII() { + for (auto &&savedLocal : savedLocals) { + localDeclMap.insert({savedLocal.first, savedLocal.second}); + } + } +}; +} // namespace static void createCoroData(CIRGenFunction &cgf, CIRGenFunction::CGCoroInfo &curCoro, cir::CallOp coroId) { @@ -149,7 +209,47 @@ CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt &s) { if (s.getReturnStmtOnAllocFailure()) cgm.errorNYI("handle coroutine return alloc failure"); - assert(!cir::MissingFeatures::generateDebugInfo()); - assert(!cir::MissingFeatures::emitBodyAndFallthrough()); + { + assert(!cir::MissingFeatures::generateDebugInfo()); + ParamReferenceReplacerRAII paramReplacer(localDeclMap); + // Create mapping between parameters and copy-params for coroutine + // function. + llvm::ArrayRef paramMoves = s.getParamMoves(); + assert((paramMoves.size() == 0 || (paramMoves.size() == fnArgs.size())) && + "ParamMoves and FnArgs should be the same size for coroutine " + "function"); + // For zipping the arg map into debug info. + assert(!cir::MissingFeatures::generateDebugInfo()); + + // Create parameter copies. We do it before creating a promise, since an + // evolution of coroutine TS may allow promise constructor to observe + // parameter copies. + assert(!cir::MissingFeatures::coroOutsideFrameMD()); + for (auto *pm : paramMoves) { + if (emitStmt(pm, /*useCurrentScope=*/true).failed()) + return mlir::failure(); + paramReplacer.addCopy(cast(pm)); + } + + if (emitStmt(s.getPromiseDeclStmt(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + // returnValue should be valid as long as the coroutine's return type + // is not void. The assertion could help us to reduce the check later. + assert(returnValue.isValid() == (bool)s.getReturnStmt()); + // Now we have the promise, initialize the GRO. + // We need to emit `get_return_object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_return_­object is sequenced before the call to + // initial_suspend and is invoked at most once. + // + // So we couldn't emit return value when we emit return statment, + // otherwise the call to get_return_object wouldn't be in front + // of initial_suspend. + if (returnValue.isValid()) + emitAnyExprToMem(s.getReturnValue(), returnValue, + s.getReturnValue()->getType().getQualifiers(), + /*isInit*/ true); + assert(!cir::MissingFeatures::emitBodyAndFallthrough()); + } return mlir::success(); } diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 422fa1cf5ad2e..9bb76894c13f1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -2134,79 +2134,6 @@ RValue CIRGenFunction::emitCXXMemberCallExpr(const CXXMemberCallExpr *ce, ce, md, returnValue, hasQualifier, qualifier, isArrow, base); } -void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e, - AggValueSlot dest) { - assert(!dest.isIgnored() && "Must have a destination!"); - const CXXConstructorDecl *cd = e->getConstructor(); - - // If we require zero initialization before (or instead of) calling the - // constructor, as can be the case with a non-user-provided default - // constructor, emit the zero initialization now, unless destination is - // already zeroed. - if (e->requiresZeroInitialization() && !dest.isZeroed()) { - switch (e->getConstructionKind()) { - case CXXConstructionKind::Delegating: - case CXXConstructionKind::Complete: - emitNullInitialization(getLoc(e->getSourceRange()), dest.getAddress(), - e->getType()); - break; - case CXXConstructionKind::VirtualBase: - case CXXConstructionKind::NonVirtualBase: - cgm.errorNYI(e->getSourceRange(), - "emitCXXConstructExpr: base requires initialization"); - break; - } - } - - // If this is a call to a trivial default constructor, do nothing. - if (cd->isTrivial() && cd->isDefaultConstructor()) - return; - - // Elide the constructor if we're constructing from a temporary - if (getLangOpts().ElideConstructors && e->isElidable()) { - // FIXME: This only handles the simplest case, where the source object is - // passed directly as the first argument to the constructor. This - // should also handle stepping through implicit casts and conversion - // sequences which involve two steps, with a conversion operator - // follwed by a converting constructor. - const Expr *srcObj = e->getArg(0); - assert(srcObj->isTemporaryObject(getContext(), cd->getParent())); - assert( - getContext().hasSameUnqualifiedType(e->getType(), srcObj->getType())); - emitAggExpr(srcObj, dest); - return; - } - - if (const ArrayType *arrayType = getContext().getAsArrayType(e->getType())) { - assert(!cir::MissingFeatures::sanitizers()); - emitCXXAggrConstructorCall(cd, arrayType, dest.getAddress(), e, false); - } else { - - clang::CXXCtorType type = Ctor_Complete; - bool forVirtualBase = false; - bool delegating = false; - - switch (e->getConstructionKind()) { - case CXXConstructionKind::Complete: - type = Ctor_Complete; - break; - case CXXConstructionKind::Delegating: - // We should be emitting a constructor; GlobalDecl will assert this - type = curGD.getCtorType(); - delegating = true; - break; - case CXXConstructionKind::VirtualBase: - forVirtualBase = true; - [[fallthrough]]; - case CXXConstructionKind::NonVirtualBase: - type = Ctor_Base; - break; - } - - emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e); - } -} - RValue CIRGenFunction::emitReferenceBindingToExpr(const Expr *e) { // Emit the expression as an lvalue. LValue lv = emitLValue(e); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index 201fb73983155..dcded94b012f4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -779,8 +779,8 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr( Expr *e, ArrayRef args, FieldDecl *initializedFieldInUnion, Expr *arrayFiller) { - const AggValueSlot dest = - ensureSlot(cgf.getLoc(e->getSourceRange()), e->getType()); + const mlir::Location loc = cgf.getLoc(e->getSourceRange()); + const AggValueSlot dest = ensureSlot(loc, e->getType()); if (e->getType()->isConstantArrayType()) { cir::ArrayType arrayTy = @@ -819,10 +819,23 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr( if (auto *cxxrd = dyn_cast(record)) { assert(numInitElements >= cxxrd->getNumBases() && "missing initializer for base class"); - if (cxxrd->getNumBases() > 0) { - cgf.cgm.errorNYI(e->getSourceRange(), - "visitCXXParenListOrInitListExpr base class init"); - return; + for (auto &base : cxxrd->bases()) { + assert(!base.isVirtual() && "should not see vbases here"); + CXXRecordDecl *baseRD = base.getType()->getAsCXXRecordDecl(); + Address address = cgf.getAddressOfDirectBaseInCompleteClass( + loc, dest.getAddress(), cxxrd, baseRD, + /*baseIsVirtual=*/false); + assert(!cir::MissingFeatures::aggValueSlotGC()); + AggValueSlot aggSlot = AggValueSlot::forAddr( + address, Qualifiers(), AggValueSlot::IsDestructed, + AggValueSlot::IsNotAliased, + cgf.getOverlapForBaseInit(cxxrd, baseRD, false)); + cgf.emitAggExpr(args[curInitIndex++], aggSlot); + if (base.getType().isDestructedType()) { + cgf.cgm.errorNYI(e->getSourceRange(), + "push deferred deactivation cleanup"); + return; + } } } diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 9dd9b6d550763..ac126965a95a5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -234,6 +234,89 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall( return emitCall(fnInfo, callee, returnValue, args, nullptr, loc); } +static void emitNullBaseClassInitialization(CIRGenFunction &cgf, + Address destPtr, + const CXXRecordDecl *base) { + if (base->isEmpty()) + return; + + cgf.cgm.errorNYI(base->getSourceRange(), + "emitNullBaseClassInitialization: not empty"); +} + +void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e, + AggValueSlot dest) { + assert(!dest.isIgnored() && "Must have a destination!"); + const CXXConstructorDecl *cd = e->getConstructor(); + + // If we require zero initialization before (or instead of) calling the + // constructor, as can be the case with a non-user-provided default + // constructor, emit the zero initialization now, unless destination is + // already zeroed. + if (e->requiresZeroInitialization() && !dest.isZeroed()) { + switch (e->getConstructionKind()) { + case CXXConstructionKind::Delegating: + case CXXConstructionKind::Complete: + emitNullInitialization(getLoc(e->getSourceRange()), dest.getAddress(), + e->getType()); + break; + case CXXConstructionKind::VirtualBase: + case CXXConstructionKind::NonVirtualBase: + emitNullBaseClassInitialization(*this, dest.getAddress(), + cd->getParent()); + break; + } + } + + // If this is a call to a trivial default constructor, do nothing. + if (cd->isTrivial() && cd->isDefaultConstructor()) + return; + + // Elide the constructor if we're constructing from a temporary + if (getLangOpts().ElideConstructors && e->isElidable()) { + // FIXME: This only handles the simplest case, where the source object is + // passed directly as the first argument to the constructor. This + // should also handle stepping through implicit casts and conversion + // sequences which involve two steps, with a conversion operator + // follwed by a converting constructor. + const Expr *srcObj = e->getArg(0); + assert(srcObj->isTemporaryObject(getContext(), cd->getParent())); + assert( + getContext().hasSameUnqualifiedType(e->getType(), srcObj->getType())); + emitAggExpr(srcObj, dest); + return; + } + + if (const ArrayType *arrayType = getContext().getAsArrayType(e->getType())) { + assert(!cir::MissingFeatures::sanitizers()); + emitCXXAggrConstructorCall(cd, arrayType, dest.getAddress(), e, false); + } else { + + clang::CXXCtorType type = Ctor_Complete; + bool forVirtualBase = false; + bool delegating = false; + + switch (e->getConstructionKind()) { + case CXXConstructionKind::Complete: + type = Ctor_Complete; + break; + case CXXConstructionKind::Delegating: + // We should be emitting a constructor; GlobalDecl will assert this + type = curGD.getCtorType(); + delegating = true; + break; + case CXXConstructionKind::VirtualBase: + forVirtualBase = true; + [[fallthrough]]; + case CXXConstructionKind::NonVirtualBase: + type = Ctor_Base; + break; + } + + emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e); + } +} + static CharUnits calculateCookiePadding(CIRGenFunction &cgf, const CXXNewExpr *e) { if (!e->isArray()) diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index 047f3599eed03..9ed920085c8c6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -339,7 +339,7 @@ mlir::Value ComplexExprEmitter::emitLoadOfLValue(LValue lv, cgf.cgm.errorNYI(loc, "emitLoadOfLValue with Atomic LV"); const Address srcAddr = lv.getAddress(); - return builder.createLoad(cgf.getLoc(loc), srcAddr); + return builder.createLoad(cgf.getLoc(loc), srcAddr, lv.isVolatileQualified()); } /// EmitStoreOfComplex - Store the specified real/imag parts into the @@ -353,7 +353,7 @@ void ComplexExprEmitter::emitStoreOfComplex(mlir::Location loc, mlir::Value val, } const Address destAddr = lv.getAddress(); - builder.createStore(loc, val, destAddr); + builder.createStore(loc, val, destAddr, lv.isVolatileQualified()); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index c1a36134d8942..4461875fcf678 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -1933,6 +1933,14 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) { return builder.createIntToPtr(middleVal, destCIRTy); } + case CK_UncheckedDerivedToBase: + case CK_DerivedToBase: { + // The EmitPointerWithAlignment path does this fine; just discard + // the alignment. + return cgf.getAsNaturalPointerTo(cgf.emitPointerWithAlignment(ce), + ce->getType()->getPointeeType()); + } + case CK_Dynamic: { Address v = cgf.emitPointerWithAlignment(subExpr); const auto *dce = cast(ce); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 5d5209b9ffb60..cc75acc18c211 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -632,6 +632,10 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn, startFunction(gd, retTy, fn, funcType, args, loc, bodyRange.getBegin()); + // Save parameters for coroutine function. + if (body && isa_and_nonnull(body)) + llvm::append_range(fnArgs, funcDecl->parameters()); + if (isa(funcDecl)) { emitDestructorBody(args); } else if (isa(funcDecl)) { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index f879e580989f7..b71a28c54dbef 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -152,6 +152,9 @@ class CIRGenFunction : public CIRGenTypeCache { /// global initializers. mlir::Operation *curFn = nullptr; + /// Save Parameter Decl for coroutine. + llvm::SmallVector fnArgs; + using DeclMapTy = llvm::DenseMap; /// This keeps track of the CIR allocas or globals for local C /// declarations. @@ -497,6 +500,12 @@ class CIRGenFunction : public CIRGenTypeCache { VlaSizePair getVLASize(const VariableArrayType *type); VlaSizePair getVLASize(QualType type); + Address getAsNaturalAddressOf(Address addr, QualType pointeeTy); + + mlir::Value getAsNaturalPointerTo(Address addr, QualType pointeeType) { + return getAsNaturalAddressOf(addr, pointeeType).getBasePointer(); + } + void finishFunction(SourceLocation endLoc); /// Determine whether the given initializer is trivial in the sense diff --git a/clang/lib/CIR/CodeGen/CIRGenPointerAuth.cpp b/clang/lib/CIR/CodeGen/CIRGenPointerAuth.cpp new file mode 100644 index 0000000000000..20b0646fdab44 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenPointerAuth.cpp @@ -0,0 +1,23 @@ +//===--- CIRGenPointerAuth.cpp - CIR generation for ptr auth --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains common routines relating to the emission of +// pointer authentication operations. +// +//===----------------------------------------------------------------------===// + +#include "CIRGenFunction.h" + +using namespace clang; +using namespace clang::CIRGen; + +Address CIRGenFunction::getAsNaturalAddressOf(Address addr, + QualType pointeeTy) { + assert(!cir::MissingFeatures::pointerAuthentication()); + return addr; +} diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt index 7c31beacc5fb3..d3e2290ceea0b 100644 --- a/clang/lib/CIR/CodeGen/CMakeLists.txt +++ b/clang/lib/CIR/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ add_clang_library(clangCIR CIRGenOpenACC.cpp CIRGenOpenACCClause.cpp CIRGenOpenACCRecipe.cpp + CIRGenPointerAuth.cpp CIRGenRecordLayoutBuilder.cpp CIRGenStmt.cpp CIRGenStmtOpenACC.cpp diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 5897352829891..f7907c76c8ccb 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -341,7 +341,7 @@ RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, if (isUnion()) return dataLayout.getTypeSize(getLargestMember(dataLayout)); - unsigned recordSize = computeStructSize(dataLayout); + auto recordSize = static_cast(computeStructSize(dataLayout)); return llvm::TypeSize::getFixed(recordSize * 8); } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index eee397f1f3d19..4e61a6f61948f 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -755,10 +755,9 @@ void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) { void AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { - if (Dest.isPotentiallyAliased() && - E->getType().isPODType(CGF.getContext())) { - // For a POD type, just emit a load of the lvalue + a copy, because our - // compound literal might alias the destination. + if (Dest.isPotentiallyAliased()) { + // Just emit a load of the lvalue + a copy, because our compound literal + // might alias the destination. EmitAggLoadOfLValue(E); return; } diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 6fe18aa4cceba..5b5b5607da69e 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -488,6 +488,9 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, // control of header or library search. If we're not linking, don't check // for missing libraries. auto CheckSDKPartExists = [&](StringRef Dir, StringRef Desc) { + // In ThinLTO code generation mode SDK files are not required. + if (Args.hasArgNoClaim(options::OPT_fthinlto_index_EQ)) + return true; if (llvm::sys::fs::exists(Dir)) return true; D.Diag(clang::diag::warn_drv_unable_to_find_directory_expected) diff --git a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp index a5070f51fad63..f2dbb3cc76ad2 100644 --- a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp +++ b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp @@ -237,18 +237,18 @@ void foo4() { // CXX_CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] // CXX_CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] // CXX_CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c", init] -// CXX_CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.complex -// CXX_CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.complex +// CXX_CIR: %[[TMP_A:.*]] = cir.load volatile {{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.complex +// CXX_CIR: %[[TMP_B:.*]] = cir.load volatile {{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.complex // CXX_CIR: %[[RESULT:.*]] = cir.complex.add %[[TMP_B]], %[[TMP_A]] : !cir.complex -// CXX_CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex, !cir.ptr> -// CXX_CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.complex +// CXX_CIR: cir.store volatile {{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex, !cir.ptr> +// CXX_CIR: %[[TMP_B:.*]] = cir.load volatile {{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.complex // CXX_CIR: cir.store{{.*}} %[[TMP_B]], %[[C_ADDR]] : !cir.complex, !cir.ptr // CXX_LLVM: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 // CXX_LLVM: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 // CXX_LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 -// CXX_LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4 -// CXX_LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4 +// CXX_LLVM: %[[TMP_A:.*]] = load volatile { i32, i32 }, ptr %[[A_ADDR]], align 4 +// CXX_LLVM: %[[TMP_B:.*]] = load volatile { i32, i32 }, ptr %[[B_ADDR]], align 4 // CXX_LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0 // CXX_LLVM: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1 // CXX_LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0 @@ -257,8 +257,8 @@ void foo4() { // CXX_LLVM: %[[ADD_IMAG:.*]] = add i32 %[[B_IMAG]], %[[A_IMAG]] // CXX_LLVM: %[[TMP_RESULT:.*]] = insertvalue { i32, i32 } poison, i32 %[[ADD_REAL]], 0 // CXX_LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[TMP_RESULT]], i32 %[[ADD_IMAG]], 1 -// CXX_LLVM: store { i32, i32 } %[[RESULT]], ptr %[[B_ADDR]], align 4 -// CXX_LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4 +// CXX_LLVM: store volatile { i32, i32 } %[[RESULT]], ptr %[[B_ADDR]], align 4 +// CXX_LLVM: %[[TMP_B:.*]] = load volatile { i32, i32 }, ptr %[[B_ADDR]], align 4 // CXX_LLVM: store { i32, i32 } %[[TMP_B]], ptr %[[C_ADDR]], align 4 // CXX_OGCG: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4 diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index 4eab3999dfc42..82c9f2d7aaf26 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1534,3 +1534,146 @@ void imag_literal_gnu_extension() { // OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1 // OGCG: store i32 0, ptr %[[C_REAL_PTR]], align 4 // OGCG: store i32 3, ptr %[[C_IMAG_PTR]], align 4 + +void load_store_volatile() { + volatile double _Complex a; + volatile double _Complex b; + a = b; + + volatile int _Complex c; + volatile int _Complex d; + c = d; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] +// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c"] +// CIR: %[[D_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["d"] +// CIR: %[[TMP_B:.*]] = cir.load volatile {{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.store volatile {{.*}} %[[TMP_B]], %[[A_ADDR]] : !cir.complex, !cir.ptr> +// CIR: %[[TMP_D:.*]] = cir.load volatile {{.*}} %[[D_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.store volatile {{.*}} %[[TMP_D]], %[[C_ADDR]] : !cir.complex, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[B_ADDR:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[D_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[TMP_B:.*]] = load volatile { double, double }, ptr %[[B_ADDR]], align 8 +// LLVM: store volatile { double, double } %[[TMP_B]], ptr %[[A_ADDR]], align 8 +// LLVM: %[[TMP_D:.*]] = load volatile { i32, i32 }, ptr %[[D_ADDR]], align 4 +// LLVM: store volatile { i32, i32 } %[[TMP_D]], ptr %[[C_ADDR]], align 4 + +// OGCG: %[[A_ADDR:.*]] = alloca { double, double }, align 8 +// OGCG: %[[B_ADDR:.*]] = alloca { double, double }, align 8 +// OGCG: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[D_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG: %[[B_REAL:.*]] = load volatile double, ptr %[[B_REAL_PTR]], align 8 +// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG: %[[B_IMAG:.*]] = load volatile double, ptr %[[B_IMAG_PTR]], align 8 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: store volatile double %[[B_REAL]], ptr %[[A_REAL_PTR]], align 8 +// OGCG: store volatile double %[[B_IMAG]], ptr %[[A_IMAG_PTR]], align 8 +// OGCG: %[[D_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[D_ADDR]], i32 0, i32 0 +// OGCG: %[[D_REAL:.*]] = load volatile i32, ptr %[[D_REAL_PTR]], align 4 +// OGCG: %[[D_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[D_ADDR]], i32 0, i32 1 +// OGCG: %[[D_IMAG:.*]] = load volatile i32, ptr %[[D_IMAG_PTR]], align 4 +// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG: store volatile i32 %[[D_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG: store volatile i32 %[[D_IMAG]], ptr %[[C_IMAG_PTR]], align 4 + + +void load_store_volatile_2() { + volatile double _Complex av; + double _Complex a; + av = a; + + double _Complex b; + volatile double _Complex bv; + b = bv; + + int _Complex c; + volatile int _Complex cv; + c = cv; + + volatile int _Complex dv; + int _Complex d; + dv = d; +} + +// CIR: %[[AV_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["av"] +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["b"] +// CIR: %[[BV_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["bv"] +// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c"] +// CIR: %[[CV_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["cv"] +// CIR: %[[DV_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["dv"] +// CIR: %[[D_ADDR:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["d"] +// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.store volatile {{.*}} %[[TMP_A]], %[[AV_ADDR]] : !cir.complex, !cir.ptr> +// CIR: %[[TMP_BV:.*]] = cir.load volatile {{.*}} %[[BV_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.store {{.*}} %[[TMP_BV]], %[[B_ADDR]] : !cir.complex, !cir.ptr> +// CIR: %[[TMP_CV:.*]] = cir.load volatile {{.*}} %[[CV_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.store {{.*}} %[[TMP_CV]], %[[C_ADDR]] : !cir.complex, !cir.ptr> +// CIR: %[[TMP_D:.*]] = cir.load {{.*}} %[[D_ADDR]] : !cir.ptr>, !cir.complex +// CIR: cir.store volatile {{.*}} %[[TMP_D]], %[[DV_ADDR]] : !cir.complex, !cir.ptr> + +// LLVM: %[[AV_ADDR:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[A_ADDR:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[B_ADDR:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[BV_ADDR:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[CV_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[DV_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[D_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4 +// LLVM: %[[TMP_A:.*]] = load { double, double }, ptr %[[A_ADDR]], align 8 +// LLVM: store volatile { double, double } %[[TMP_A]], ptr %[[AV_ADDR]], align 8 +// LLVM: %[[TMP_BV:.*]] = load volatile { double, double }, ptr %[[BV_ADDR]], align 8 +// LLVM: store { double, double } %[[TMP_BV]], ptr %[[B_ADDR]], align 8 +// LLVM: %[[TMP_CV:.*]] = load volatile { i32, i32 }, ptr %[[CV_ADDR]], align 4 +// LLVM: store { i32, i32 } %[[TMP_CV]], ptr %[[C_ADDR]], align 4 +// LLVM: %[[TMP_D:.*]] = load { i32, i32 }, ptr %[[D_ADDR]], align 4 +// LLVM: store volatile { i32, i32 } %[[TMP_D]], ptr %[[DV_ADDR]], align 4 + +// OGCG: %[[AV_ADDR:.*]] = alloca { double, double }, align 8 +// OGCG: %[[A_ADDR:.*]] = alloca { double, double }, align 8 +// OGCG: %[[B_ADDR:.*]] = alloca { double, double }, align 8 +// OGCG: %[[BV_ADDR:.*]] = alloca { double, double }, align 8 +// OGCG: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[CV_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[DV_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[D_ADDR:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 0 +// OGCG: %[[A_REAL:.*]] = load double, ptr %[[A_REAL_PTR]], align 8 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load double, ptr %[[A_IMAG_PTR]], align 8 +// OGCG: %[[AV_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[AV_ADDR]], i32 0, i32 0 +// OGCG: %[[AV_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[AV_ADDR]], i32 0, i32 1 +// OGCG: store volatile double %[[A_REAL]], ptr %[[AV_REAL_PTR]], align 8 +// OGCG: store volatile double %[[A_IMAG]], ptr %[[AV_IMAG_PTR]], align 8 +// OGCG: %[[BV_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[BV_ADDR]], i32 0, i32 0 +// OGCG: %[[BV_REAL:.*]] = load volatile double, ptr %[[BV_REAL_PTR]], align 8 +// OGCG: %[[BV_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[BV_ADDR]], i32 0, i32 1 +// OGCG: %[[BV_IMAG:.*]] = load volatile double, ptr %[[BV_IMAG_PTR]], align 8 +// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 0 +// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 1 +// OGCG: store double %[[BV_REAL]], ptr %[[B_REAL_PTR]], align 8 +// OGCG: store double %[[BV_IMAG]], ptr %[[B_IMAG_PTR]], align 8 +// OGCG: %[[CV_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[CV_ADDR]], i32 0, i32 0 +// OGCG: %[[CV_REAL:.*]] = load volatile i32, ptr %[[CV_REAL_PTR]], align 4 +// OGCG: %[[CV_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[CV_ADDR]], i32 0, i32 1 +// OGCG: %[[CV_IMAG:.*]] = load volatile i32, ptr %[[CV_IMAG_PTR]], align 4 +// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0 +// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1 +// OGCG: store i32 %[[CV_REAL]], ptr %[[C_REAL_PTR]], align 4 +// OGCG: store i32 %[[CV_IMAG]], ptr %[[C_IMAG_PTR]], align 4 +// OGCG: %[[D_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[D_ADDR]], i32 0, i32 0 +// OGCG: %[[D_REAL:.*]] = load i32, ptr %[[D_REAL_PTR]], align 4 +// OGCG: %[[D_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[D_ADDR]], i32 0, i32 1 +// OGCG: %[[D_IMAG:.*]] = load i32, ptr %[[D_IMAG_PTR]], align 4 +// OGCG: %[[DV_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[DV_ADDR]], i32 0, i32 0 +// OGCG: %[[DV_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[DV_ADDR]], i32 0, i32 1 +// OGCG: store volatile i32 %[[D_REAL]], ptr %[[DV_REAL_PTR]], align 4 +// OGCG: store volatile i32 %[[D_IMAG]], ptr %[[DV_IMAG_PTR]], align 4 diff --git a/clang/test/CIR/CodeGen/coro-task.cpp b/clang/test/CIR/CodeGen/coro-task.cpp index 265325f82d7f7..5738c815909ea 100644 --- a/clang/test/CIR/CodeGen/coro-task.cpp +++ b/clang/test/CIR/CodeGen/coro-task.cpp @@ -36,6 +36,12 @@ struct suspend_never { void await_resume() noexcept {} }; +struct string { + int size() const; + string(); + string(char const *s); +}; + } // namespace std namespace folly { @@ -101,7 +107,10 @@ co_invoke_fn co_invoke; }} // namespace folly::coro // CIR-DAG: ![[VoidTask:.*]] = !cir.record" padded {!u8i}> - +// CIR-DAG: ![[IntTask:.*]] = !cir.record" padded {!u8i}> +// CIR-DAG: ![[VoidPromisse:.*]] = !cir.record::promise_type" padded {!u8i}> +// CIR-DAG: ![[IntPromisse:.*]] = !cir.record::promise_type" padded {!u8i}> +// CIR-DAG: ![[StdString:.*]] = !cir.record // CIR: module {{.*}} { // CIR-NEXT: cir.global external @_ZN5folly4coro9co_invokeE = #cir.zero : !rec_folly3A3Acoro3A3Aco_invoke_fn @@ -119,6 +128,7 @@ VoidTask silly_task() { // CIR: cir.func coroutine dso_local @_Z10silly_taskv() -> ![[VoidTask]] // CIR: %[[VoidTaskAddr:.*]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"] // CIR: %[[SavedFrameAddr:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["__coro_frame_addr"] +// CIR: %[[VoidPromisseAddr:.*]] = cir.alloca ![[VoidPromisse]], {{.*}}, ["__promise"] // Get coroutine id with __builtin_coro_id. @@ -138,3 +148,27 @@ VoidTask silly_task() { // CIR: } // CIR: %[[Load0:.*]] = cir.load{{.*}} %[[SavedFrameAddr]] : !cir.ptr>, !cir.ptr // CIR: %[[CoroFrameAddr:.*]] = cir.call @__builtin_coro_begin(%[[CoroId]], %[[Load0]]) + +// Call promise.get_return_object() to retrieve the task object. + +// CIR: %[[RetObj:.*]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type17get_return_objectEv(%[[VoidPromisseAddr]]) nothrow : {{.*}} -> ![[VoidTask]] +// CIR: cir.store{{.*}} %[[RetObj]], %[[VoidTaskAddr]] : ![[VoidTask]] + +folly::coro::Task byRef(const std::string& s) { + co_return s.size(); +} + +// CIR: cir.func coroutine dso_local @_Z5byRefRKSt6string(%[[ARG:.*]]: !cir.ptr {{.*}}) -> ![[IntTask]] +// CIR: %[[AllocaParam:.*]] = cir.alloca !cir.ptr, {{.*}}, ["s", init, const] +// CIR: %[[IntTaskAddr:.*]] = cir.alloca ![[IntTask]], {{.*}}, ["__retval"] +// CIR: %[[SavedFrameAddr:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["__coro_frame_addr"] +// CIR: %[[AllocaFnUse:.*]] = cir.alloca !cir.ptr, {{.*}}, ["s", init, const] +// CIR: %[[IntPromisseAddr:.*]] = cir.alloca ![[IntPromisse]], {{.*}}, ["__promise"] +// CIR: cir.store %[[ARG]], %[[AllocaParam]] : !cir.ptr, {{.*}} + +// Call promise.get_return_object() to retrieve the task object. + +// CIR: %[[LOAD:.*]] = cir.load %[[AllocaParam]] : !cir.ptr>, !cir.ptr +// CIR: cir.store {{.*}} %[[LOAD]], %[[AllocaFnUse]] : !cir.ptr, !cir.ptr> +// CIR: %[[RetObj:.*]] = cir.call @_ZN5folly4coro4TaskIiE12promise_type17get_return_objectEv(%4) nothrow : {{.*}} -> ![[IntTask]] +// CIR: cir.store {{.*}} %[[RetObj]], %[[IntTaskAddr]] : ![[IntTask]] diff --git a/clang/test/CIR/CodeGen/ctor-null-init.cpp b/clang/test/CIR/CodeGen/ctor-null-init.cpp new file mode 100644 index 0000000000000..4324b329c8b41 --- /dev/null +++ b/clang/test/CIR/CodeGen/ctor-null-init.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll --check-prefix=LLVM %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll --check-prefix=OGCG %s + +struct A { + A() = default; + A(int); // This constructor triggers the null base class initialization. +}; + +struct B : A { +}; + +void test_empty_base_null_init() { + B{}; +} + +// CIR: cir.func {{.*}} @_Z25test_empty_base_null_initv() +// CIR-NEXT: %[[B_ADDR:.*]] = cir.alloca !rec_B, !cir.ptr, ["agg.tmp.ensured"] +// CIR-NEXT: %[[A_ADDR:.*]] = cir.base_class_addr %[[B_ADDR]] : !cir.ptr nonnull [0] -> !cir.ptr + +// LLVM: define{{.*}} @_Z25test_empty_base_null_initv() +// LLVM-NEXT: %[[B:.*]] = alloca %struct.B +// LLVM-NEXT: ret void + +// OGCG: define{{.*}} @_Z25test_empty_base_null_initv() +// OGCG-NEXT: entry: +// OGCG-NEXT: %[[B:.*]] = alloca %struct.B +// OGCG-NEXT: ret void diff --git a/clang/test/CIR/CodeGen/derived-to-base.cpp b/clang/test/CIR/CodeGen/derived-to-base.cpp new file mode 100644 index 0000000000000..13acb47022c65 --- /dev/null +++ b/clang/test/CIR/CodeGen/derived-to-base.cpp @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +// TODO(cir): The constructors in this test case are only here because we don't +// have support for zero-initialization of base classes yet. We should +// fix that soon. + +struct Base { + Base(); + void f(); + int a; +}; + +struct Derived : Base { + Derived(); + double b; +}; + +void f() { + Derived d; + d.f(); +} + +// CIR: cir.func {{.*}} @_Z1fv() +// CIR: %[[D:.*]] = cir.alloca !rec_Derived, !cir.ptr, ["d", init] +// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) : (!cir.ptr) -> () +// CIR: %[[D_BASE:.*]] = cir.base_class_addr %[[D]] : !cir.ptr nonnull [0] -> !cir.ptr +// CIR: cir.call @_ZN4Base1fEv(%[[D_BASE]]) : (!cir.ptr) -> () + +// LLVM: define {{.*}}void @_Z1fv() +// LLVM: %[[D:.*]] = alloca %struct.Derived +// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]]) +// LLVM: call void @_ZN4Base1fEv(ptr %[[D]]) + +// OGCG: define {{.*}}void @_Z1fv() +// OGCG: %[[D:.*]] = alloca %struct.Derived +// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]]) +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[D]]) + +void useBase(Base *base); +void callBaseUsingDerived(Derived *derived) { + useBase(derived); +} + + +// CIR: cir.func {{.*}} @_Z20callBaseUsingDerivedP7Derived(%[[DERIVED_ARG:.*]]: !cir.ptr {{.*}}) +// CIR: %[[DERIVED_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["derived", init] +// CIR: cir.store %[[DERIVED_ARG]], %[[DERIVED_ADDR]] +// CIR: %[[DERIVED:.*]] = cir.load{{.*}} %[[DERIVED_ADDR]] +// CIR: %[[DERIVED_BASE:.*]] = cir.base_class_addr %[[DERIVED]] : !cir.ptr nonnull [0] -> !cir.ptr +// CIR: cir.call @_Z7useBaseP4Base(%[[DERIVED_BASE]]) : (!cir.ptr) -> () + +// LLVM: define {{.*}} void @_Z20callBaseUsingDerivedP7Derived(ptr %[[DERIVED_ARG:.*]]) +// LLVM: %[[DERIVED_ADDR:.*]] = alloca ptr +// LLVM: store ptr %[[DERIVED_ARG]], ptr %[[DERIVED_ADDR]] +// LLVM: %[[DERIVED:.*]] = load ptr, ptr %[[DERIVED_ADDR]] +// LLVM: call void @_Z7useBaseP4Base(ptr %[[DERIVED]]) + +// OGCG: define {{.*}} void @_Z20callBaseUsingDerivedP7Derived(ptr {{.*}} %[[DERIVED_ARG:.*]]) +// OGCG: %[[DERIVED_ADDR:.*]] = alloca ptr +// OGCG: store ptr %[[DERIVED_ARG]], ptr %[[DERIVED_ADDR]] +// OGCG: %[[DERIVED:.*]] = load ptr, ptr %[[DERIVED_ADDR]] +// OGCG: call void @_Z7useBaseP4Base(ptr {{.*}} %[[DERIVED]]) + +Base *returnBaseFromDerived(Derived* derived) { + return derived; +} + +// CIR: cir.func {{.*}} @_Z21returnBaseFromDerivedP7Derived(%[[DERIVED_ARG:.*]]: !cir.ptr {{.*}}) -> !cir.ptr +// CIR: %[[DERIVED_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["derived", init] +// CIR: %[[BASE_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["__retval"] +// CIR: cir.store %[[DERIVED_ARG]], %[[DERIVED_ADDR]] +// CIR: %[[DERIVED:.*]] = cir.load{{.*}} %[[DERIVED_ADDR]] +// CIR: %[[DERIVED_BASE:.*]] = cir.base_class_addr %[[DERIVED]] : !cir.ptr nonnull [0] -> !cir.ptr +// CIR: cir.store %[[DERIVED_BASE]], %[[BASE_ADDR]] +// CIR: %[[BASE:.*]] = cir.load{{.*}} %[[BASE_ADDR]] +// CIR: cir.return %[[BASE]] : !cir.ptr + +// LLVM: define {{.*}} ptr @_Z21returnBaseFromDerivedP7Derived(ptr %[[DERIVED_ARG:.*]]) +// LLVM: %[[DERIVED_ADDR:.*]] = alloca ptr +// LLVM: store ptr %[[DERIVED_ARG]], ptr %[[DERIVED_ADDR]] +// LLVM: %[[DERIVED:.*]] = load ptr, ptr %[[DERIVED_ADDR]] + +// OGCG: define {{.*}} ptr @_Z21returnBaseFromDerivedP7Derived(ptr {{.*}} %[[DERIVED_ARG:.*]]) +// OGCG: %[[DERIVED_ADDR:.*]] = alloca ptr +// OGCG: store ptr %[[DERIVED_ARG]], ptr %[[DERIVED_ADDR]] +// OGCG: %[[DERIVED:.*]] = load ptr, ptr %[[DERIVED_ADDR]] + +volatile Derived derivedObj; + +void test_volatile_store() { + derivedObj.a = 0; +} + +// CIR: cir.func {{.*}} @_Z19test_volatile_storev() +// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i +// CIR: %[[DERIVED_OBJ:.*]] = cir.get_global @derivedObj : !cir.ptr +// CIR: %[[DERIVED_OBJ_BASE:.*]] = cir.base_class_addr %[[DERIVED_OBJ]] : !cir.ptr nonnull [0] -> !cir.ptr +// CIR: %[[DERIVED_OBJ_A:.*]] = cir.get_member %[[DERIVED_OBJ_BASE]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: cir.store volatile {{.*}} %[[ZERO]], %[[DERIVED_OBJ_A]] : !s32i, !cir.ptr + +// LLVM: define {{.*}} void @_Z19test_volatile_storev() +// LLVM: store volatile i32 0, ptr @derivedObj + +// OGCG: define {{.*}} void @_Z19test_volatile_storev() +// OGCG: store volatile i32 0, ptr @derivedObj + +void test_volatile_load() { + [[maybe_unused]] int val = derivedObj.a; +} + +// CIR: cir.func {{.*}} @_Z18test_volatile_loadv() +// CIR: %[[DERIVED_OBJ:.*]] = cir.get_global @derivedObj : !cir.ptr +// CIR: %[[DERIVED_OBJ_BASE:.*]] = cir.base_class_addr %[[DERIVED_OBJ]] : !cir.ptr nonnull [0] -> !cir.ptr +// CIR: %[[DERIVED_OBJ_A:.*]] = cir.get_member %[[DERIVED_OBJ_BASE]][0] {name = "a"} : !cir.ptr -> !cir.ptr +// CIR: %[[VAL:.*]] = cir.load volatile {{.*}} %[[DERIVED_OBJ_A]] : !cir.ptr, !s32i + +// LLVM: define {{.*}} void @_Z18test_volatile_loadv() +// LLVM: %[[VAL_ADDR:.*]] = alloca i32 +// LLVM: %[[DERIVED_OBJ:.*]] = load volatile i32, ptr @derivedObj + +// OGCG: define {{.*}} void @_Z18test_volatile_loadv() +// OGCG: %[[VAL_ADDR:.*]] = alloca i32 +// OGCG: %[[DERIVED_OBJ:.*]] = load volatile i32, ptr @derivedObj +// OGCG: store i32 %[[DERIVED_OBJ]], ptr %[[VAL_ADDR]] diff --git a/clang/test/CodeGenObjC/nontrivial-c-struct.m b/clang/test/CodeGenObjC/nontrivial-c-struct.m new file mode 100644 index 0000000000000..fa4fa223bc2d9 --- /dev/null +++ b/clang/test/CodeGenObjC/nontrivial-c-struct.m @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple arm64e-apple-ios18 -fptrauth-calls -fptrauth-intrinsics -fobjc-arc -emit-llvm -o - %s | FileCheck %s + +// CHECK: %[[STRUCT_S0:.*]] = type { i32, i32, ptr } +// CHECK: %[[STRUCT_S1:.*]] = type { ptr, ptr } + +// This struct isn't POD because it has an address-discriminated ptrauth +// field. +typedef struct { + int f0, f1; + int * __ptrauth(1,1,50) f2; +} S0; + +// This struct isn't POD because it has an address-discriminated ptrauth +// field and an ARC ObjC pointer field. +typedef struct { + id f0; + int * __ptrauth(1,1,50) f1; +} S1; + +// CHECK: define void @compound_literal_assignment0(ptr noundef %[[P:.*]]) +// CHECK: %[[P_ADDR:.*]] = alloca ptr, align 8 +// CHECK-NEXT: %[[_COMPOUNDLITERAL:.*]] = alloca %[[STRUCT_S0]], align 8 +// CHECK-NEXT: store ptr %[[P]], ptr %[[P_ADDR]], align 8 +// CHECK-NEXT: %[[V0:.*]] = load ptr, ptr %[[P_ADDR]], align 8 +// CHECK-NEXT: %[[F0:.*]] = getelementptr inbounds nuw %[[STRUCT_S0]], ptr %[[_COMPOUNDLITERAL]], i32 0, i32 0 +// CHECK-NEXT: %[[V1:.*]] = load ptr, ptr %[[P_ADDR]], align 8 +// CHECK-NEXT: %[[F1:.*]] = getelementptr inbounds nuw %[[STRUCT_S0]], ptr %[[V1]], i32 0, i32 1 +// CHECK-NEXT: %[[V2:.*]] = load i32, ptr %[[F1]], align 4 +// CHECK-NEXT: store i32 %[[V2]], ptr %[[F0]], align 8 +// CHECK-NEXT: %[[F11:.*]] = getelementptr inbounds nuw %[[STRUCT_S0]], ptr %[[_COMPOUNDLITERAL]], i32 0, i32 1 +// CHECK-NEXT: %[[V3:.*]] = load ptr, ptr %[[P_ADDR]], align 8 +// CHECK-NEXT: %[[F02:.*]] = getelementptr inbounds nuw %[[STRUCT_S0]], ptr %[[V3]], i32 0, i32 0 +// CHECK-NEXT: %[[V4:.*]] = load i32, ptr %[[F02]], align 8 +// CHECK-NEXT: store i32 %[[V4]], ptr %[[F11]], align 4 +// CHECK-NEXT: %[[F2:.*]] = getelementptr inbounds nuw %[[STRUCT_S0]], ptr %[[_COMPOUNDLITERAL]], i32 0, i32 2 +// CHECK-NEXT: store ptr null, ptr %[[F2]], align 8 +// CHECK-NEXT: call void @__copy_assignment_8_8_t0w8_pa1_50_8(ptr %[[V0]], ptr %[[_COMPOUNDLITERAL]]) +// CHECK-NEXT: ret void + +void compound_literal_assignment0(S0 *p) { + *p = (S0){.f0 = p->f1, .f1 = p->f0}; +} + +// CHECK: define void @compound_literal_assignment1(ptr noundef %[[P:.*]]) +// CHECK: %[[P_ADDR:.*]] = alloca ptr, align 8 +// CHECK-NEXT: %[[_COMPOUNDLITERAL:.*]] = alloca %[[STRUCT_S1]], align 8 +// CHECK-NEXT: store ptr %[[P]], ptr %[[P_ADDR]], align 8 +// CHECK-NEXT: %[[V0:.*]] = load ptr, ptr %[[P_ADDR]], align 8 +// CHECK-NEXT: %[[F0:.*]] = getelementptr inbounds nuw %[[STRUCT_S1]], ptr %[[_COMPOUNDLITERAL]], i32 0, i32 0 +// CHECK-NEXT: store ptr null, ptr %[[F0]], align 8 +// CHECK-NEXT: %[[F1:.*]] = getelementptr inbounds nuw %[[STRUCT_S1]], ptr %[[_COMPOUNDLITERAL]], i32 0, i32 1 +// CHECK-NEXT: store ptr null, ptr %[[F1]], align 8 +// CHECK-NEXT: call void @__copy_assignment_8_8_s0_pa1_50_8(ptr %[[V0]], ptr %[[_COMPOUNDLITERAL]]) +// CHECK-NEXT: call void @__destructor_8_s0(ptr %[[_COMPOUNDLITERAL]]) +// CHECK-NEXT: ret void + +void compound_literal_assignment1(S1 *p) { + *p = (S1){}; +} diff --git a/clang/test/Driver/aarch64-vfat.c b/clang/test/Driver/aarch64-vfat.c new file mode 100644 index 0000000000000..bd5eed275489f --- /dev/null +++ b/clang/test/Driver/aarch64-vfat.c @@ -0,0 +1,7 @@ +// ===== Features supported on aarch64 ===== + +// FAT features (Future Architecture Technologies) + +// RUN: %clang -target aarch64 -march=armv9.7a+mops-go -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-MOPS-GO %s +// RUN: %clang -target aarch64 -march=armv9.7-a+mops-go -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-MOPS-GO %s +// VFAT-MOPS-GO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+mops-go" diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index 7294c33959e7e..f2da680b68d70 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -49,6 +49,7 @@ // CHECK-NEXT: lsui FEAT_LSUI Enable Armv9.6-A unprivileged load/store instructions // CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions // CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: mops-go FEAT_MOPS_GO Enable memset acceleration granule only // CHECK-NEXT: mpamv2 FEAT_MPAMv2 Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions // CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension // CHECK-NEXT: mtetc FEAT_MTETC Enable Virtual Memory Tagging Extension diff --git a/clang/test/Driver/ps4-sdk-root.c b/clang/test/Driver/ps4-sdk-root.c index 6e5f1e28958ad..791b96ac12ae6 100644 --- a/clang/test/Driver/ps4-sdk-root.c +++ b/clang/test/Driver/ps4-sdk-root.c @@ -11,6 +11,9 @@ /// /// The default for both headers and libraries is taken from the /// SCE_ORBIS_SDK_DIR environment variable. +/// +/// In ThinLTO code generation mode (-fthinlto-index=) SDK files are not required +/// so all warnings are suppressed. // RUN: echo "-### -Winvalid-or-nonexistent-directory -target x86_64-scei-ps4" > %t.rsp @@ -31,6 +34,10 @@ /// headers and libraries. // RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %s 2>&1 | FileCheck -check-prefixes=WARN-SYS-HEADERS,WARN-SYS-LIBS,NO-WARN %s +/// -fthinlto-index= warning suppression. +// RUN: touch %t_dummy.o +// RUN: env SCE_ORBIS_SDK_DIR=.. %clang @%t.rsp %t_dummy.o -fthinlto-index=ignored -c 2>&1 | FileCheck -check-prefixes=NO-WARN %s + /// If `-c`, `-S`, `-E` or `-emit-ast` is supplied, the existence check for SDK /// libraries is skipped because no linking will be performed. We only expect /// warnings about missing headers. diff --git a/clang/test/Driver/ps5-sdk-root.c b/clang/test/Driver/ps5-sdk-root.c index 16ef2cc01f5e7..a337ce3801456 100644 --- a/clang/test/Driver/ps5-sdk-root.c +++ b/clang/test/Driver/ps5-sdk-root.c @@ -13,6 +13,9 @@ /// /// The default for both headers and libraries is taken from the /// SCE_PROSPERO_SDK_DIR environment variable. +/// +/// In ThinLTO code generation mode (-fthinlto-index=) SDK files are not required +/// so all warnings are suppressed. // RUN: echo "-### -Winvalid-or-nonexistent-directory -target x86_64-sie-ps5" > %t.rsp @@ -33,6 +36,10 @@ /// headers and libraries. // RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %s 2>&1 | FileCheck -check-prefixes=WARN-SYS-HEADERS,WARN-SYS-LIBS,NO-WARN %s +/// -fthinlto-index= warning suppression. +// RUN: touch %t_dummy.o +// RUN: env SCE_PROSPERO_SDK_DIR=.. %clang @%t.rsp %t_dummy.o -fthinlto-index=ignored -c 2>&1 | FileCheck -check-prefixes=NO-WARN %s + /// If `-c`, `-S`, `-E` or `-emit-ast` is supplied, the existence check for SDK /// libraries is skipped because no linking will be performed. We only expect /// warnings about missing headers. diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 31148b990d6bd..e9515b5d61006 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -414,3 +414,20 @@ void test_use_lifetimebound_call() { // CHECK: Expire ([[L_Y]] (Path: y)) // CHECK: Expire ([[L_X]] (Path: x)) } +// CHECK-LABEL: Function: test_conditional_operator +void test_conditional_operator(bool cond) { + MyObj x, y; + MyObj *p = cond ? &x : &y; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: OriginFlow (Dest: [[O_COND_OP:[0-9]+]] (Expr: ConditionalOperator), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_COND_OP]] (Expr: ConditionalOperator), Src: [[O_ADDR_Y]] (Expr: UnaryOperator), Merge) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_COND_OP]] (Expr: ConditionalOperator)) +// CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: Expire ([[L_X]] (Path: x)) +} diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 4f234f0ac6e2d..3460a8675bf04 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -440,6 +440,7 @@ void no_error_loan_from_current_iteration(bool cond) { //===----------------------------------------------------------------------===// View Identity(View v [[clang::lifetimebound]]); +MyObj* Identity(MyObj* v [[clang::lifetimebound]]); View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); MyObj* GetPointer(const MyObj& obj [[clang::lifetimebound]]); @@ -582,3 +583,75 @@ void lifetimebound_ctor() { } (void)v; } + +// Conditional operator. +void conditional_operator_one_unsafe_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + { + MyObj temp; + p = cond ? &temp // expected-warning {{object whose reference is captured may not live long enough}} + : &safe; + } // expected-note {{destroyed here}} + + // This is not a use-after-free for any value of `cond` but the analysis + // cannot reason this and marks the above as a false positive. This + // ensures safety regardless of cond's value. + if (cond) + p = &safe; + (void)*p; // expected-note {{later used here}} +} + +void conditional_operator_two_unsafe_branches(bool cond) { + MyObj* p; + { + MyObj a, b; + p = cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_nested(bool cond) { + MyObj* p; + { + MyObj a, b, c, d; + p = cond ? cond ? &a // expected-warning {{object whose reference is captured does not live long enough}}. + : &b // expected-warning {{object whose reference is captured does not live long enough}}. + : cond ? &c // expected-warning {{object whose reference is captured does not live long enough}}. + : &d; // expected-warning {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} + +void conditional_operator_lifetimebound(bool cond) { + MyObj* p; + { + MyObj a, b; + p = Identity(cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_lifetimebound_nested(bool cond) { + MyObj* p; + { + MyObj a, b; + p = Identity(cond ? Identity(&a) // expected-warning {{object whose reference is captured does not live long enough}} + : Identity(&b)); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_lifetimebound_nested_deep(bool cond) { + MyObj* p; + { + MyObj a, b, c, d; + p = Identity(cond ? Identity(cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b) // expected-warning {{object whose reference is captured does not live long enough}} + : Identity(cond ? &c // expected-warning {{object whose reference is captured does not live long enough}} + : &d)); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 34af476843c0d..9d61d56e078e3 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -689,7 +689,6 @@ TEST_F(LifetimeAnalysisTest, GslPointerConstructFromView) { EXPECT_THAT(Origin("q"), HasLoansTo({"a"}, "p1")); } -// FIXME: Handle loans in ternary operator! TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { SetupTest(R"( void target(bool cond) { @@ -698,7 +697,7 @@ TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { POINT(p1); } )"); - EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); + EXPECT_THAT(Origin("v"), HasLoansTo({"a", "b"}, "p1")); } // FIXME: Handle temporaries. diff --git a/flang/include/flang/Evaluate/check-expression.h b/flang/include/flang/Evaluate/check-expression.h index 2ff78d75325ef..d11fe22c0be7b 100644 --- a/flang/include/flang/Evaluate/check-expression.h +++ b/flang/include/flang/Evaluate/check-expression.h @@ -163,8 +163,8 @@ extern template bool IsErrorExpr(const Expr &); std::optional CheckStatementFunction( const Symbol &, const Expr &, FoldingContext &); -bool MayNeedCopy(const ActualArgument *, const characteristics::DummyArgument *, - FoldingContext &, bool forCopyOut); +std::optional ActualArgNeedsCopy(const ActualArgument *, + const characteristics::DummyArgument *, FoldingContext &, bool forCopyOut); } // namespace Fortran::evaluate #endif diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp index 656fc50044877..e07076e42ec88 100644 --- a/flang/lib/Evaluate/check-expression.cpp +++ b/flang/lib/Evaluate/check-expression.cpp @@ -1478,13 +1478,12 @@ class CopyInOutExplicitInterface { const characteristics::DummyDataObject &dummyObj) : fc_{fc}, actual_{actual}, dummyObj_{dummyObj} {} - // Returns true, if actual and dummy have different contiguity requirements - bool HaveContiguityDifferences() const { - // Check actual contiguity, unless dummy doesn't care + // Returns true if dummy arg needs to be contiguous + bool DummyNeedsContiguity() const { + if (dummyObj_.ignoreTKR.test(common::IgnoreTKR::Contiguous)) { + return false; + } bool dummyTreatAsArray{dummyObj_.ignoreTKR.test(common::IgnoreTKR::Rank)}; - bool actualTreatAsContiguous{ - dummyObj_.ignoreTKR.test(common::IgnoreTKR::Contiguous) || - IsSimplyContiguous(actual_, fc_)}; bool dummyIsExplicitShape{dummyObj_.type.IsExplicitShape()}; bool dummyIsAssumedSize{dummyObj_.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedSize)}; @@ -1501,32 +1500,17 @@ class CopyInOutExplicitInterface { (dummyTreatAsArray && !dummyIsPolymorphic) || dummyIsVoidStar || dummyObj_.attrs.test( characteristics::DummyDataObject::Attr::Contiguous)}; - return !actualTreatAsContiguous && dummyNeedsContiguity; + return dummyNeedsContiguity; } - // Returns true, if actual and dummy have polymorphic differences bool HavePolymorphicDifferences() const { - bool dummyIsAssumedRank{dummyObj_.type.attrs().test( - characteristics::TypeAndShape::Attr::AssumedRank)}; - bool actualIsAssumedRank{semantics::IsAssumedRank(actual_)}; - bool dummyIsAssumedShape{dummyObj_.type.attrs().test( - characteristics::TypeAndShape::Attr::AssumedShape)}; - bool actualIsAssumedShape{semantics::IsAssumedShape(actual_)}; - if ((actualIsAssumedRank && dummyIsAssumedRank) || - (actualIsAssumedShape && dummyIsAssumedShape)) { - // Assumed-rank and assumed-shape arrays are represented by descriptors, - // so don't need to do polymorphic check. - } else if (!dummyObj_.ignoreTKR.test(common::IgnoreTKR::Type)) { - // flang supports limited cases of passing polymorphic to non-polimorphic. - // These cases require temporary of non-polymorphic type. (For example, - // the actual argument could be polymorphic array of child type, - // while the dummy argument could be non-polymorphic array of parent - // type.) + if (dummyObj_.ignoreTKR.test(common::IgnoreTKR::Type)) { + return false; + } + if (auto actualType{ + characteristics::TypeAndShape::Characterize(actual_, fc_)}) { + bool actualIsPolymorphic{actualType->type().IsPolymorphic()}; bool dummyIsPolymorphic{dummyObj_.type.type().IsPolymorphic()}; - auto actualType{ - characteristics::TypeAndShape::Characterize(actual_, fc_)}; - bool actualIsPolymorphic{ - actualType && actualType->type().IsPolymorphic()}; if (actualIsPolymorphic && !dummyIsPolymorphic) { return true; } @@ -1575,28 +1559,32 @@ class CopyInOutExplicitInterface { // procedures with explicit interface, it's expected that "dummy" is not null. // For procedures with implicit interface dummy may be null. // +// Returns std::optional indicating whether the copy is known to be +// needed (true) or not needed (false); returns std::nullopt if the necessity +// of the copy is undetermined. +// // Note that these copy-in and copy-out checks are done from the caller's // perspective, meaning that for copy-in the caller need to do the copy // before calling the callee. Similarly, for copy-out the caller is expected // to do the copy after the callee returns. -bool MayNeedCopy(const ActualArgument *actual, +std::optional ActualArgNeedsCopy(const ActualArgument *actual, const characteristics::DummyArgument *dummy, FoldingContext &fc, bool forCopyOut) { if (!actual) { - return false; + return std::nullopt; } if (actual->isAlternateReturn()) { - return false; + return std::nullopt; } const auto *dummyObj{dummy ? std::get_if(&dummy->u) : nullptr}; - const bool forCopyIn = !forCopyOut; + const bool forCopyIn{!forCopyOut}; if (!evaluate::IsVariable(*actual)) { - // Actual argument expressions that aren’t variables are copy-in, but - // not copy-out. + // Expressions are copy-in, but not copy-out. return forCopyIn; } + auto maybeContigActual{IsContiguous(*actual, fc)}; if (dummyObj) { // Explict interface CopyInOutExplicitInterface check{fc, *actual, *dummyObj}; if (forCopyOut && check.HasIntentIn()) { @@ -1619,28 +1607,25 @@ bool MayNeedCopy(const ActualArgument *actual, if (!check.HaveArrayOrAssumedRankArgs()) { return false; } - if (check.HaveContiguityDifferences()) { - return true; - } - if (check.HavePolymorphicDifferences()) { - return true; + if (maybeContigActual.has_value()) { + // We know whether actual arg is contiguous or not + bool isContiguousActual{maybeContigActual.value()}; + bool actualArgNeedsCopy{ + (!isContiguousActual || check.HavePolymorphicDifferences()) && + check.DummyNeedsContiguity()}; + return actualArgNeedsCopy; + } else { + // We don't know whether actual arg is contiguous or not + return check.DummyNeedsContiguity(); } } else { // Implicit interface - if (ExtractCoarrayRef(*actual)) { - // Coindexed actual args may need copy-in and copy-out with implicit - // interface - return true; - } - if (!IsSimplyContiguous(*actual, fc)) { - // Copy-in: actual arguments that are variables are copy-in when - // non-contiguous. - // Copy-out: vector subscripts could refer to duplicate elements, can't - // copy out. - return !(forCopyOut && HasVectorSubscript(*actual)); + if (maybeContigActual.has_value()) { + // If known contiguous, don't copy in/out. + // If known non-contiguous, copy in/out. + return !*maybeContigActual; } } - // For everything else, no copy-in or copy-out - return false; + return std::nullopt; } } // namespace Fortran::evaluate diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 9bf994e70cf5d..f24a4d9745698 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -1296,10 +1296,14 @@ static PreparedDummyArgument preparePresentUserCallActualArgument( Fortran::evaluate::FoldingContext &foldingContext{ callContext.converter.getFoldingContext()}; - bool suggestCopyIn = Fortran::evaluate::MayNeedCopy( - arg.entity, arg.characteristics, foldingContext, /*forCopyOut=*/false); - bool suggestCopyOut = Fortran::evaluate::MayNeedCopy( - arg.entity, arg.characteristics, foldingContext, /*forCopyOut=*/true); + bool suggestCopyIn = Fortran::evaluate::ActualArgNeedsCopy( + arg.entity, arg.characteristics, foldingContext, + /*forCopyOut=*/false) + .value_or(true); + bool suggestCopyOut = Fortran::evaluate::ActualArgNeedsCopy( + arg.entity, arg.characteristics, foldingContext, + /*forCopyOut=*/true) + .value_or(true); mustDoCopyIn = actual.isArray() && suggestCopyIn; mustDoCopyOut = actual.isArray() && suggestCopyOut; } diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 002b7c1888e73..b1a3c3d3c5439 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -10,7 +10,6 @@ #include "flang/Common/idioms.h" #include "flang/Evaluate/expression.h" -#include "flang/Optimizer/Builder/Todo.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/expression.h" #include "flang/Semantics/openmp-modifiers.h" diff --git a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp index 87b0b59ea698d..1d10c5b8dec41 100644 --- a/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp +++ b/flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp @@ -106,21 +106,31 @@ class MapsForPrivatizedSymbolsPass llvm::SmallVector boundsOps; if (needsBoundsOps(varPtr)) genBoundsOps(builder, varPtr, boundsOps); + mlir::Type varType = varPtr.getType(); mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; - if (fir::isa_trivial(fir::unwrapRefType(varPtr.getType())) || - fir::isa_char(fir::unwrapRefType(varPtr.getType()))) { - if (canPassByValue(fir::unwrapRefType(varPtr.getType()))) { + if (fir::isa_trivial(fir::unwrapRefType(varType)) || + fir::isa_char(fir::unwrapRefType(varType))) { + if (canPassByValue(fir::unwrapRefType(varType))) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } } + // Use tofrom if what we are mapping is not a trivial type. In all + // likelihood, it is a descriptor + mlir::omp::ClauseMapFlags mapFlag; + if (fir::isa_trivial(fir::unwrapRefType(varType)) || + fir::isa_char(fir::unwrapRefType(varType))) + mapFlag = mlir::omp::ClauseMapFlags::to; + else + mapFlag = mlir::omp::ClauseMapFlags::to | mlir::omp::ClauseMapFlags::from; + return omp::MapInfoOp::create( - builder, loc, varPtr.getType(), varPtr, - TypeAttr::get(llvm::cast(varPtr.getType()) - .getElementType()), - builder.getAttr(omp::ClauseMapFlags::to), + builder, loc, varType, varPtr, + TypeAttr::get( + llvm::cast(varType).getElementType()), + builder.getAttr(mapFlag), builder.getAttr(captureKind), /*varPtrPtr=*/Value{}, /*members=*/SmallVector{}, diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 53a22768855e1..022b4289b4e7c 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -800,7 +800,9 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, bool dummyIsAssumedShape{dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedShape)}; bool copyOutNeeded{ - evaluate::MayNeedCopy(&arg, &dummyArg, foldingContext, true)}; + evaluate::ActualArgNeedsCopy(&arg, &dummyArg, foldingContext, + /*forCopyOut=*/true) + .value_or(false)}; if (copyOutNeeded && !dummyIsValue && (dummyIsAsynchronous || dummyIsVolatile)) { if (actualIsAsynchronous || actualIsVolatile) { @@ -837,8 +839,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, // a unread value in the actual argument. // Occurences of `volatileOrAsyncNeedsTempDiagnosticIssued = true` indicate a // more specific error message has already been issued. We might be able to - // clean this up by switching the coding style of MayNeedCopy to be more like - // WhyNotDefinable. + // clean this up by switching the coding style of ActualArgNeedsCopy to be + // more like WhyNotDefinable. if (copyOutNeeded && !volatileOrAsyncNeedsTempDiagnosticIssued) { if ((actualIsVolatile || actualIsAsynchronous) && (dummyIsVolatile || dummyIsAsynchronous)) { diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 index f06951769458b..f1945340d328a 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 @@ -1,9 +1,22 @@ ! Tests delayed privatization for `targets ... private(..)` for allocatables. - +! XFAIL: * ! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --enable-delayed-privatization-staging \ -! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: -o - %s 2>&1 | FileCheck %s --check-prefix=CPU + ! RUN: bbc -emit-hlfir -fopenmp --enable-delayed-privatization-staging -o - %s 2>&1 \ -! RUN: | FileCheck %s +! RUN: | FileCheck %s --check-prefix=CPU + +! RUN: %if amdgpu-registered-target %{ \ +! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir \ +! RUN: -fopenmp -fopenmp-is-target-device \ +! RUN: -mmlir --enable-delayed-privatization-staging \ +! RUN: -o - %s 2>&1 | \ +! RUN: FileCheck %s --check-prefix=GPU \ +! RUN: %} + +! RUN: bbc -emit-hlfir -fopenmp --enable-delayed-privatization-staging \ +! RUN: -fopenmp-is-target-device -fopenmp-is-gpu -o - %s 2>&1 \ +! RUN: | FileCheck %s --check-prefix=GPU subroutine target_allocatable implicit none @@ -14,53 +27,65 @@ subroutine target_allocatable !$omp end target end subroutine target_allocatable -! CHECK-LABEL: omp.private {type = private} -! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] : -! CHECK-SAME: [[DESC_TYPE:!fir.box>]] init { -! CHECK: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE:!fir.ref>>]], %[[PRIV_ALLOC:.*]]: [[TYPE]]): +! CPU-LABEL: omp.private {type = private} +! CPU-SAME: @[[VAR_PRIVATIZER_SYM:.*]] : +! CPU-SAME: [[DESC_TYPE:!fir.box>]] init { +! CPU: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE:!fir.ref>>]], %[[PRIV_ALLOC:.*]]: [[TYPE]]): + +! CPU-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : [[TYPE]] +! CPU-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : ([[DESC_TYPE]]) -> !fir.heap +! CPU-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap) -> i64 +! CPU-NEXT: %[[C0:.*]] = arith.constant 0 : i64 +! CPU-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi eq, %[[PRIV_ARG_ADDR]], %[[C0]] : i64 -! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : [[TYPE]] -! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : ([[DESC_TYPE]]) -> !fir.heap -! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap) -> i64 -! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64 -! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi eq, %[[PRIV_ARG_ADDR]], %[[C0]] : i64 +! CPU-NEXT: fir.if %[[ALLOC_COND]] { +! CPU-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[PRIV_ARG_BOX]] : (!fir.heap) -> [[DESC_TYPE]] +! CPU-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : [[TYPE]] +! CPU-NEXT: } else { +! CPU-NEXT: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 +! CPU-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap) -> [[DESC_TYPE]] +! CPU-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : [[TYPE]] +! CPU-NEXT: } -! CHECK-NEXT: fir.if %[[ALLOC_COND]] { -! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[PRIV_ARG_BOX]] : (!fir.heap) -> [[DESC_TYPE]] -! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : [[TYPE]] -! CHECK-NEXT: } else { -! CHECK-NEXT: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 -! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap) -> [[DESC_TYPE]] -! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : [[TYPE]] -! CHECK-NEXT: } +! CPU-NEXT: omp.yield(%[[PRIV_ALLOC]] : [[TYPE]]) -! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : [[TYPE]]) +! CPU-NEXT: } dealloc { +! CPU-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]): -! CHECK-NEXT: } dealloc { -! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]): +! CPU-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]] +! CPU-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]] +! CPU-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]] +! CPU-NEXT: %[[C0:.*]] = arith.constant 0 : i64 +! CPU-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64 -! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]] -! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]] -! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]] -! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64 -! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64 +! CPU-NEXT: fir.if %[[PRIV_NULL_COND]] { +! CPU-NEXT: fir.freemem %[[PRIV_ADDR]] +! CPU-NEXT: } -! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] { -! CHECK-NEXT: fir.freemem %[[PRIV_ADDR]] -! CHECK-NEXT: } +! CPU-NEXT: omp.yield +! CPU-NEXT: } -! CHECK-NEXT: omp.yield -! CHECK-NEXT: } +! CPU-LABEL: func.func @_QPtarget_allocatable() { -! CHECK-LABEL: func.func @_QPtarget_allocatable() { +! CPU: %[[VAR_ALLOC:.*]] = fir.alloca [[DESC_TYPE]] +! CPU-SAME: {bindc_name = "alloc_var", {{.*}}} +! CPU: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]] +! CPU: %[[BASE_ADDR:.*]] = fir.box_offset %[[VAR_DECL]]#0 base_addr : (!fir.ref>>) -> [[MEMBER_TYPE:.*]] +! CPU: %[[MEMBER:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR]] : [[MEMBER_TYPE:.*]]) -> {{.*}} +! CPU: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]]) map_clauses(to) capture(ByRef) members(%[[MEMBER]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> -! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca [[DESC_TYPE]] -! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}} -! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]] -! CHECK: %[[BASE_ADDR:.*]] = fir.box_offset %[[VAR_DECL]]#0 base_addr : (!fir.ref>>) -> [[MEMBER_TYPE:.*]] -! CHECK: %[[MEMBER:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR]] : [[MEMBER_TYPE:.*]]) -> {{.*}} -! CHECK: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]]) map_clauses(always, descriptor, to, attach) capture(ByRef) members(%[[MEMBER]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> +! CPU: omp.target map_entries(%[[MAP_VAR]] -> %arg0, %[[MEMBER]] -> %arg1 : [[TYPE]], [[MEMBER_TYPE]]) private( +! CPU-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} [map_idx=0] : [[TYPE]]) { -! CHECK: omp.target map_entries(%[[MAP_VAR]] -> %arg0, %[[MEMBER]] -> %arg1 : [[TYPE]], [[MEMBER_TYPE]]) private( -! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} [map_idx=0] : [[TYPE]]) { +! GPU-LABEL: omp.private {type = private} {{.*}} init { +! GPU: fir.if %{{.*}} { +! GPU-NEXT: %[[ZERO_BOX:.*]] = fir.embox %{{.*}} +! GPU-NEXT: fir.store %[[ZERO_BOX]] to %{{.*}} +! GPU-NEXT: } else { +! GPU-NOT: fir.allocmem i32 +! GPU-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 +! GPU-NEXT: %[[PRIV_ALLOC_BOX:.*]] = fir.embox %[[PRIV_ALLOC]] +! GPU-NEXT: fir.store %[[PRIV_ALLOC_BOX]] to %{{.*}} +! GPU-NEXT: } +! GPU-NEXT: omp.yield(%{{.*}}) diff --git a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 b/flang/test/Lower/OpenMP/optional-argument-map-2.f90 index 77ed037e62106..f14ea8ced6256 100644 --- a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 +++ b/flang/test/Lower/OpenMP/optional-argument-map-2.f90 @@ -1,7 +1,7 @@ -! NOTE: Do not check for false delayed privatization flag until all enable-delayed-privatization flags are switched on in amd-staging -!RUN %flang_fc1 -emit-hlfir -fopenmp -mmlir --enable-delayed-privatization-staging=false %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO-FPRIV +!RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --enable-delayed-privatization-staging=false %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO-FPRIV !RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --enable-delayed-privatization-staging=true %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FPRIV +!XFAIL: * module mod implicit none contains @@ -73,8 +73,8 @@ end module mod ! CHECK-FPRIV: %[[VAL_13:.*]] = arith.subi %[[VAL_12]]#1, %[[VAL_11]] : index ! CHECK-FPRIV: %[[VAL_14:.*]] = omp.map.bounds lower_bound(%[[VAL_10]] : index) upper_bound(%[[VAL_13]] : index) extent(%[[VAL_12]]#1 : index) stride(%[[VAL_11]] : index) start_idx(%[[VAL_10]] : index) {stride_in_bytes = true} ! CHECK-FPRIV: %[[VAL_16:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>) -> !fir.llvm_ptr>> -! CHECK-FPRIV: %[[VAL_17:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.char<1,?>) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[VAL_16]] : !fir.llvm_ptr>>) bounds(%[[VAL_14]]) -> !fir.llvm_ptr>> -! CHECK-FPRIV: %[[VAL_18:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.boxchar<1>) map_clauses(always, descriptor, to, attach) capture(ByRef) members(%[[VAL_17]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> +! CHECK-FPRIV: %[[VAL_17:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.char<1,?>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_16]] : !fir.llvm_ptr>>) bounds(%[[VAL_14]]) -> !fir.llvm_ptr>> {name = ""} +! CHECK-FPRIV: %[[VAL_18:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) members(%[[VAL_17]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> ! CHECK-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_19:.*]], %[[VAL_18]] -> %[[VAL_20:.*]], %[[VAL_17]] -> %[[VAL_21:.*]] : !fir.ref>, !fir.ref>, !fir.llvm_ptr>>) private(@_QMmodFroutine_boxcharEa_firstprivate_boxchar_c8xU %[[VAL_3]]#0 -> %[[VAL_22:.*]] [map_idx=1] : !fir.boxchar<1>) { ! CHECK-FPRIV: %[[VAL_23:.*]] = arith.constant 4 : index ! CHECK-FPRIV: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] typeparams %[[VAL_23]] {uniq_name = "_QMmodFroutine_boxcharEb"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/force-temp.f90 b/flang/test/Lower/force-temp.f90 index d9ba543d46313..093e098d10ac7 100644 --- a/flang/test/Lower/force-temp.f90 +++ b/flang/test/Lower/force-temp.f90 @@ -27,6 +27,14 @@ subroutine pass_intent_out(buf) integer, intent(out) :: buf(5) end subroutine end interface + + ! Used by call_s6() and others below + type base + integer :: i = -1 + end type + type, extends (base) :: child + real :: r = -2.0 + end type contains subroutine s1(buf) !CHECK-LABEL: func.func @_QMtestPs1 @@ -79,4 +87,54 @@ subroutine s5() p => x(::2) ! pointer to non-contiguous array section call pass_intent_out(p) end subroutine + subroutine call_s6() + interface + subroutine s6(b) + import :: base + type(base), intent(inout) :: b(:) + end subroutine s6 + end interface + class(base), pointer :: pb(:) + type(child), target :: c(2) +!CHECK-LABEL: func.func @_QMtestPcall_s6 +!CHECK-NOT: hlfir.copy_in +!CHECK: fir.call @_QPs6 +!CHECK-NOT: hlfir.copy_out + pb => c + call s6(pb) + end subroutine call_s6 + subroutine call_s7() + interface + subroutine s7(b1, b2, n) + import :: base + integer :: n + type(base), intent(inout) :: b1(n) + type(base), intent(inout) :: b2(*) + end subroutine + end interface + integer, parameter :: n = 7 + class(base), allocatable :: c1(:), c2(:) +!CHECK-LABEL: func.func @_QMtestPcall_s7 +!CHECK: hlfir.copy_in +!CHECK: hlfir.copy_in +!CHECK: fir.call @_QPs7 +!CHECK: hlfir.copy_out +!CHECK: hlfir.copy_out + call s7(c1, c2, n) + end subroutine call_s7 + subroutine call_s8() + interface + subroutine s8(buf) + ! IGNORE_TKR(C) takes precendence over CONTIGUOUS + !DIR$ IGNORE_TKR(C) buf + real, contiguous :: buf(:) + end subroutine + end interface + real a(10) +!CHECK-LABEL: func.func @_QMtestPcall_s8 +!CHECK-NOT: hlfir.copy_in +!CHECK: fir.call @_QPs8 +!CHECK-NOT: hlfir.copy_out + call s8(a(1:5:2)) + end subroutine call_s8 end module diff --git a/flang/test/Transforms/omp-maps-for-privatized-symbols.fir b/flang/test/Transforms/omp-maps-for-privatized-symbols.fir index 10a76126ed054..6054c70a2700d 100644 --- a/flang/test/Transforms/omp-maps-for-privatized-symbols.fir +++ b/flang/test/Transforms/omp-maps-for-privatized-symbols.fir @@ -6,7 +6,12 @@ module attributes {omp.is_target_device = false} { // extract box address, see if it is null, etc omp.yield(%arg1: !fir.ref>>) } - + omp.private {type = firstprivate} @_QFtarget_simpleEfp_int_firstprivate_i32 : i32 copy { + ^bb0(%arg0: !fir.ref, %arg1: !fir.ref): + %0 = fir.load %arg0 : !fir.ref + hlfir.assign %0 to %arg1 : i32, !fir.ref + omp.yield(%arg1 : !fir.ref) + } func.func @_QPtarget_simple() { %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFtarget_simpleEa"} %1:2 = hlfir.declare %0 {uniq_name = "_QFtarget_simpleEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -15,34 +20,18 @@ module attributes {omp.is_target_device = false} { %4 = fir.embox %3 : (!fir.heap) -> !fir.box> fir.store %4 to %2 : !fir.ref>> %5:2 = hlfir.declare %2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtarget_simpleEsimple_var"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) + %6 = fir.alloca i32 {bindc_name = "fp_int", uniq_name = "_QFtarget_simpleEfp_int"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFtarget_simpleEfp_int"} : (!fir.ref) -> (!fir.ref, !fir.ref) %c2_i32 = arith.constant 2 : i32 hlfir.assign %c2_i32 to %1#0 : i32, !fir.ref - %6 = omp.map.info var_ptr(%1#1 : !fir.ref, i32) map_clauses(to) capture(ByRef) -> !fir.ref {name = "a"} - omp.target map_entries(%6 -> %arg0 : !fir.ref) private(@_QFtarget_simpleEsimple_var_private_ref_box_heap_i32 %5#0 -> %arg1 : !fir.ref>>) { - %11:2 = hlfir.declare %arg0 {uniq_name = "_QFtarget_simpleEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) - %12:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtarget_simpleEsimple_var"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) - %c10_i32 = arith.constant 10 : i32 - %13 = fir.load %11#0 : !fir.ref - %14 = arith.addi %c10_i32, %13 : i32 - hlfir.assign %14 to %12#0 realloc : i32, !fir.ref>> + %8 = omp.map.info var_ptr(%1#1 : !fir.ref, i32) map_clauses(to) capture(ByRef) -> !fir.ref {name = "a"} + omp.target map_entries(%8 -> %arg0 : !fir.ref) private(@_QFtarget_simpleEsimple_var_private_ref_box_heap_i32 %5#0 -> %arg1, @_QFtarget_simpleEfp_int_firstprivate_i32 %7#0 -> %arg2 : !fir.ref>>, !fir.ref) { omp.terminator } - %7 = fir.load %5#1 : !fir.ref>> - %8 = fir.box_addr %7 : (!fir.box>) -> !fir.heap - %9 = fir.convert %8 : (!fir.heap) -> i64 - %c0_i64 = arith.constant 0 : i64 - %10 = arith.cmpi ne, %9, %c0_i64 : i64 - fir.if %10 { - %11 = fir.load %5#1 : !fir.ref>> - %12 = fir.box_addr %11 : (!fir.box>) -> !fir.heap - fir.freemem %12 : !fir.heap - %13 = fir.zero_bits !fir.heap - %14 = fir.embox %13 : (!fir.heap) -> !fir.box> - fir.store %14 to %5#1 : !fir.ref>> - } return } } // CHECK: %[[MAP0:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref, i32) map_clauses(to) capture(ByRef) -> !fir.ref {name = "a"} -// CHECK: %[[MAP1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, !fir.box>) map_clauses(to) capture(ByRef) -> !fir.ref>> -// CHECK: omp.target map_entries(%[[MAP0]] -> %arg0, %[[MAP1]] -> %arg1 : !fir.ref, !fir.ref>>) +// CHECK: %[[MAP1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, !fir.box>) map_clauses(tofrom) capture(ByRef) -> !fir.ref>> +// CHECK: %[[MAP2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref +// CHECK: omp.target map_entries(%[[MAP0]] -> %arg0, %[[MAP1]] -> %arg1, %[[MAP2]] -> %arg2 : !fir.ref, !fir.ref>>, !fir.ref) diff --git a/libc/cmake/modules/LLVMLibCArchitectures.cmake b/libc/cmake/modules/LLVMLibCArchitectures.cmake index d4103f8a5a23f..6c730f807de6d 100644 --- a/libc/cmake/modules/LLVMLibCArchitectures.cmake +++ b/libc/cmake/modules/LLVMLibCArchitectures.cmake @@ -94,17 +94,6 @@ if(NOT libc_compiler_target_info) endif() string(STRIP ${libc_compiler_target_info} libc_compiler_target_info) string(SUBSTRING ${libc_compiler_target_info} 8 -1 libc_compiler_triple) -get_arch_and_system_from_triple(${libc_compiler_triple} - compiler_arch compiler_sys) -if(NOT compiler_arch) - message(FATAL_ERROR - "libc build: Invalid or unknown libc compiler target triple: " - "${libc_compiler_triple}") -endif() - -set(LIBC_TARGET_ARCHITECTURE ${compiler_arch}) -set(LIBC_TARGET_OS ${compiler_sys}) -set(LIBC_CROSSBUILD FALSE) # One should not set LLVM_RUNTIMES_TARGET and LIBC_TARGET_TRIPLE if(LLVM_RUNTIMES_TARGET AND LIBC_TARGET_TRIPLE) @@ -128,12 +117,40 @@ endif() # architecture. if(explicit_target_triple) get_arch_and_system_from_triple(${explicit_target_triple} libc_arch libc_sys) - if(NOT libc_arch) + if(NOT libc_arch OR NOT libc_sys) message(FATAL_ERROR "libc build: Invalid or unknown triple: ${explicit_target_triple}") endif() set(LIBC_TARGET_ARCHITECTURE ${libc_arch}) set(LIBC_TARGET_OS ${libc_sys}) + # If the compiler target triple is not the same as the triple specified by + # LIBC_TARGET_TRIPLE or LLVM_RUNTIMES_TARGET, we will add a --target option + # if the compiler is clang. If the compiler is GCC we just error out as there + # is no equivalent of an option like --target. + if(NOT libc_compiler_triple STREQUAL explicit_target_triple) + set(LIBC_CROSSBUILD TRUE) + if(CMAKE_COMPILER_IS_GNUCXX) + message(FATAL_ERROR + "GCC target triple (${libc_compiler_triple}) and the explicity " + "specified target triple (${explicit_target_triple}) do not match.") + else() + list(APPEND + LIBC_COMPILE_OPTIONS_DEFAULT "--target=${explicit_target_triple}") + endif() + else() + set(LIBC_CROSSBUILD FALSE) + endif() +else() + get_arch_and_system_from_triple(${libc_compiler_triple} + compiler_arch compiler_sys) + if(NOT compiler_arch OR NOT compiler_sys) + message(FATAL_ERROR + "libc build: Unknown compiler default target triple: " + "${libc_compiler_triple}") + endif() + set(LIBC_TARGET_ARCHITECTURE ${compiler_arch}) + set(LIBC_TARGET_OS ${compiler_sys}) + set(LIBC_CROSSBUILD FALSE) endif() if((LIBC_TARGET_OS STREQUAL "unknown") OR (LIBC_TARGET_OS STREQUAL "none")) @@ -198,31 +215,11 @@ else() "Unsupported libc target operating system ${LIBC_TARGET_OS}") endif() - -# If the compiler target triple is not the same as the triple specified by -# LIBC_TARGET_TRIPLE or LLVM_RUNTIMES_TARGET, we will add a --target option -# if the compiler is clang. If the compiler is GCC we just error out as there -# is no equivalent of an option like --target. -if(explicit_target_triple AND - (NOT (libc_compiler_triple STREQUAL explicit_target_triple))) - set(LIBC_CROSSBUILD TRUE) - if(CMAKE_COMPILER_IS_GNUCXX) - message(FATAL_ERROR - "GCC target triple (${libc_compiler_triple}) and the explicity " - "specified target triple (${explicit_target_triple}) do not match.") - else() - list(APPEND - LIBC_COMPILE_OPTIONS_DEFAULT "--target=${explicit_target_triple}") - endif() -endif() - - # Windows does not support full mode build. if (LIBC_TARGET_OS_IS_WINDOWS AND LLVM_LIBC_FULL_BUILD) message(FATAL_ERROR "Windows does not support full mode build.") endif () - message(STATUS - "Building libc for ${LIBC_TARGET_ARCHITECTURE} on ${LIBC_TARGET_OS} with - LIBC_COMPILE_OPTIONS_DEFAULT: ${LIBC_COMPILE_OPTIONS_DEFAULT}") + "Building libc for ${LIBC_TARGET_ARCHITECTURE} on ${LIBC_TARGET_OS} with " + "LIBC_COMPILE_OPTIONS_DEFAULT: ${LIBC_COMPILE_OPTIONS_DEFAULT}") diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp index 4740067722022..db89828a0b45e 100644 --- a/libc/startup/baremetal/arm/start.cpp +++ b/libc/startup/baremetal/arm/start.cpp @@ -131,20 +131,30 @@ namespace LIBC_NAMESPACE_DECL { __arm_wsr("CPSR_c", 0x13); // SVC #endif -#ifdef __ARM_FP -// Enable FPU -#if __ARM_ARCH_PROFILE == 'M' +#if __ARM_ARCH_PROFILE == 'M' && \ + (defined(__ARM_FP) || defined(__ARM_FEATURE_MVE)) + // Enable FPU and MVE. They can't be enabled independently: the two are + // governed by the same bits in CPACR. // Based on // https://developer.arm.com/documentation/dui0646/c/Cortex-M7-Peripherals/Floating-Point-Unit/Enabling-the-FPU - // Set CPACR cp10 and cp11 - auto cpacr = (volatile uint32_t *const)0xE000ED88; + // Set CPACR cp10 and cp11. + auto cpacr = reinterpret_cast(0xE000ED88); *cpacr |= (0xF << 20); __dsb(0xF); __isb(0xF); -#elif __ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R' +#if defined(__ARM_FEATURE_MVE) + // Initialize low-overhead-loop tail predication to its neutral state + uint32_t fpscr; + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(fpscr) : :); + fpscr |= (0x4 << 16); + __asm__ __volatile__("vmsr FPSCR, %0" : : "r"(fpscr) :); +#endif +#elif (__ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R') && \ + defined(__ARM_FP) + // Enable FPU. // Based on // https://developer.arm.com/documentation/dui0472/m/Compiler-Coding-Practices/Enabling-NEON-and-FPU-for-bare-metal - // Set CPACR cp10 and cp11 + // Set CPACR cp10 and cp11. uint32_t cpacr = __arm_rsr("p15:0:c1:c0:2"); cpacr |= (0xF << 20); __arm_wsr("p15:0:c1:c0:2", cpacr); @@ -152,9 +162,8 @@ namespace LIBC_NAMESPACE_DECL { // Set FPEXC.EN uint32_t fpexc; __asm__ __volatile__("vmrs %0, FPEXC" : "=r"(fpexc) : :); - fpexc |= (1 << 30); + fpexc |= (0x1 << 30); __asm__ __volatile__("vmsr FPEXC, %0" : : "r"(fpexc) :); -#endif #endif // Perform the equivalent of scatterloading diff --git a/libcxx/include/__config b/libcxx/include/__config index e907961446201..d079bf8b500b6 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -152,7 +152,7 @@ # ifndef _LIBCPP_CXX03_LANG -# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) +# define _LIBCPP_ALIGNOF(...) alignof(__VA_ARGS__) # define _ALIGNAS_TYPE(x) alignas(x) # define _ALIGNAS(x) alignas(x) # define _NOEXCEPT noexcept @@ -161,7 +161,7 @@ # else -# define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) +# define _LIBCPP_ALIGNOF(...) _Alignof(__VA_ARGS__) # define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x)))) # define _ALIGNAS(x) __attribute__((__aligned__(x))) # define nullptr __nullptr diff --git a/libcxx/include/__type_traits/aligned_storage.h b/libcxx/include/__type_traits/aligned_storage.h index 5c2208ae0c70a..33c0368d0c3c8 100644 --- a/libcxx/include/__type_traits/aligned_storage.h +++ b/libcxx/include/__type_traits/aligned_storage.h @@ -11,8 +11,6 @@ #include <__config> #include <__cstddef/size_t.h> -#include <__type_traits/integral_constant.h> -#include <__type_traits/type_list.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,10 +19,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct __align_type { - static const size_t value = _LIBCPP_PREFERRED_ALIGNOF(_Tp); - typedef _Tp type; -}; +struct _ALIGNAS(_LIBCPP_PREFERRED_ALIGNOF(_Tp)) _AlignedAsT {}; + +template +struct __max_align_impl : _AlignedAsT<_Args>... {}; struct __struct_double { long double __lx; @@ -33,41 +31,16 @@ struct __struct_double4 { double __lx[4]; }; -using __all_types _LIBCPP_NODEBUG = - __type_list<__align_type, - __align_type, - __align_type, - __align_type, - __align_type, - __align_type, - __align_type, - __align_type<__struct_double>, - __align_type<__struct_double4>, - __align_type >; - -template -struct __find_max_align; - -template -struct __find_max_align<__type_list<_Head>, _Len> : public integral_constant {}; - -template -struct __select_align { -private: - static const size_t __min = _A2 < _A1 ? _A2 : _A1; - static const size_t __max = _A1 < _A2 ? _A2 : _A1; - -public: - static const size_t value = _Len < __max ? __min : __max; -}; +inline const size_t __aligned_storage_max_align = + _LIBCPP_ALIGNOF(__max_align_impl); -template -struct __find_max_align<__type_list<_Head, _Tail...>, _Len> - : public integral_constant< - size_t, - __select_align<_Len, _Head::value, __find_max_align<__type_list<_Tail...>, _Len>::value>::value> {}; +template +inline const size_t __aligned_storage_alignment = + _Len > __aligned_storage_max_align + ? __aligned_storage_max_align + : size_t(1) << ((sizeof(size_t) * __CHAR_BIT__) - __builtin_clzg(_Len) - 1); -template ::value> +template > struct _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_NO_SPECIALIZATIONS aligned_storage { union _ALIGNAS(_Align) type { unsigned char __data[(_Len + _Align - 1) / _Align * _Align]; @@ -77,7 +50,7 @@ struct _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_NO_SPECIALIZATIONS aligned_storage { #if _LIBCPP_STD_VER >= 14 _LIBCPP_SUPPRESS_DEPRECATED_PUSH -template ::value> +template > using aligned_storage_t _LIBCPP_DEPRECATED_IN_CXX23 = typename aligned_storage<_Len, _Align>::type; _LIBCPP_SUPPRESS_DEPRECATED_POP diff --git a/libcxx/src/print.cpp b/libcxx/src/print.cpp index 3f2baa6dcc60b..82cf2afd052e2 100644 --- a/libcxx/src/print.cpp +++ b/libcxx/src/print.cpp @@ -22,6 +22,14 @@ # include #elif __has_include() # include +# if defined(_NEWLIB_VERSION) +# if defined(_POSIX_C_SOURCE) && __has_include() +# include +# define HAS_FILENO_AND_ISATTY +# endif +# else +# define HAS_FILENO_AND_ISATTY +# endif #endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -56,7 +64,7 @@ __write_to_windows_console([[maybe_unused]] FILE* __stream, [[maybe_unused]] wst } # endif // _LIBCPP_HAS_WIDE_CHARACTERS -#elif __has_include() // !_LIBCPP_WIN32API +#elif defined(HAS_FILENO_AND_ISATTY) // !_LIBCPP_WIN32API _LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream) { return isatty(fileno(__stream)); } #endif diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c index 79398bac8b531..73a27928e91d1 100644 --- a/libunwind/src/UnwindLevel1.c +++ b/libunwind/src/UnwindLevel1.c @@ -82,7 +82,7 @@ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("mov x0, %0\n\t" \ - "mov x1, wzr\n\t" \ + "mov x1, #0\n\t" \ "br %1\n\t" \ : \ : "r"(shstkRegContext), "r"(shstkJumpAddress) \ diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 75ed87baf636a..c99f6c6e5e2c5 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -171,9 +171,9 @@ class ClangDiagnosticManagerAdapter : public clang::DiagnosticConsumer { : m_options(opts), m_filename(filename) { m_options.ShowPresumedLoc = true; m_options.ShowLevel = false; - m_os = std::make_shared(m_output); + m_os = std::make_unique(m_output); m_passthrough = - std::make_shared(*m_os, m_options); + std::make_unique(*m_os, m_options); } void ResetManager(DiagnosticManager *manager = nullptr) { @@ -315,11 +315,11 @@ class ClangDiagnosticManagerAdapter : public clang::DiagnosticConsumer { private: DiagnosticManager *m_manager = nullptr; DiagnosticOptions m_options; - std::shared_ptr m_passthrough; - /// Output stream of m_passthrough. - std::shared_ptr m_os; /// Output string filled by m_os. std::string m_output; + /// Output stream of m_passthrough. + std::unique_ptr m_os; + std::unique_ptr m_passthrough; StringRef m_filename; }; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index 7635e49051216..e37c84efefdc9 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -67,13 +67,13 @@ class StoringDiagnosticConsumer : public clang::DiagnosticConsumer { IDAndDiagnostic; std::vector m_diagnostics; std::unique_ptr m_diag_opts; + /// Output string filled by m_os. Will be reused for different diagnostics. + std::string m_output; + /// Output stream of m_diag_printer. + std::unique_ptr m_os; /// The DiagnosticPrinter used for creating the full diagnostic messages /// that are stored in m_diagnostics. std::unique_ptr m_diag_printer; - /// Output stream of m_diag_printer. - std::unique_ptr m_os; - /// Output string filled by m_os. Will be reused for different diagnostics. - std::string m_output; /// A Progress with explicitly managed lifetime. std::unique_ptr m_current_progress_up; std::vector m_module_build_stack; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 9fff4adbff79d..83e39f37d8dcf 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -320,7 +320,7 @@ extern "C" static const char *g_get_shared_cache_class_info_name = "__lldb_apple_objc_v2_get_shared_cache_class_info"; -static const char *g_get_shared_cache_class_info_body = R"( +static const char *g_get_shared_cache_class_info_definitions = R"( extern "C" { @@ -411,6 +411,9 @@ struct ClassInfo Class isa; uint32_t hash; } __attribute__((__packed__)); +)"; + +static const char *g_get_shared_cache_class_info_body = R"( uint32_t __lldb_apple_objc_v2_get_shared_cache_class_info (void *objc_opt_ro_ptr, @@ -418,6 +421,7 @@ __lldb_apple_objc_v2_get_shared_cache_class_info (void *objc_opt_ro_ptr, void *class_infos_ptr, uint64_t *relative_selector_offset, uint32_t class_infos_byte_size, + uint32_t *start_idx, uint32_t should_log) { *relative_selector_offset = 0; @@ -426,6 +430,7 @@ __lldb_apple_objc_v2_get_shared_cache_class_info (void *objc_opt_ro_ptr, DEBUG_PRINTF ("shared_cache_base_ptr = %p\n", shared_cache_base_ptr); DEBUG_PRINTF ("class_infos_ptr = %p\n", class_infos_ptr); DEBUG_PRINTF ("class_infos_byte_size = %u (%llu class infos)\n", class_infos_byte_size, (uint64_t)(class_infos_byte_size/sizeof(ClassInfo))); + DEBUG_PRINTF ("start_idx = %u\n", *start_idx); if (objc_opt_ro_ptr) { const objc_opt_t *objc_opt = (objc_opt_t *)objc_opt_ro_ptr; @@ -480,7 +485,11 @@ __lldb_apple_objc_v2_get_shared_cache_class_info (void *objc_opt_ro_ptr, DEBUG_PRINTF ("clsopt->mask = 0x%8.8x\n", clsopt->mask); DEBUG_PRINTF ("classOffsets = %p\n", classOffsets); - for (uint32_t i=0; icapacity; ++i) + const uint32_t original_start_idx = *start_idx; + + // Always start at the start_idx here. If it's greater than the capacity, + // it will skip the loop entirely and go to the duplicate handling below. + for (uint32_t i=*start_idx; icapacity; ++i) { const uint64_t objectCacheOffset = classOffsets[i].objectCacheOffset; DEBUG_PRINTF("objectCacheOffset[%u] = %u\n", i, objectCacheOffset); @@ -524,59 +533,77 @@ __lldb_apple_objc_v2_get_shared_cache_class_info (void *objc_opt_ro_ptr, else { DEBUG_PRINTF("not(class_infos && idx < max_class_infos)\n"); + *start_idx = i; + break; } ++idx; } - const uint32_t *duplicate_count_ptr = (uint32_t *)&classOffsets[clsopt->capacity]; - const uint32_t duplicate_count = *duplicate_count_ptr; - const objc_classheader_v16_t *duplicateClassOffsets = (const objc_classheader_v16_t *)(&duplicate_count_ptr[1]); - - DEBUG_PRINTF ("duplicate_count = %u\n", duplicate_count); - DEBUG_PRINTF ("duplicateClassOffsets = %p\n", duplicateClassOffsets); - - for (uint32_t i=0; icapacity]; + const uint32_t duplicate_count = *duplicate_count_ptr; + const objc_classheader_v16_t *duplicateClassOffsets = (const objc_classheader_v16_t *)(&duplicate_count_ptr[1]); - if (classOffsets[i].isDuplicate) { - DEBUG_PRINTF("isDuplicate = true\n"); - continue; // duplicate - } + DEBUG_PRINTF ("duplicate_count = %u\n", duplicate_count); + DEBUG_PRINTF ("duplicateClassOffsets = %p\n", duplicateClassOffsets); - if (objectCacheOffset == 0) { - DEBUG_PRINTF("objectCacheOffset == invalidEntryOffset\n"); - continue; // invalid offset - } + const uint32_t duplicate_start_idx = + *start_idx < clsopt->capacity ? + 0 : + *start_idx - clsopt->capacity; - if (class_infos && idx < max_class_infos) + for (uint32_t i=duplicate_start_idx; iversion >= 12 && objc_opt->version <= 15) @@ -1938,6 +1965,7 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor:: class_name_getter_function_name.AsCString(), class_name_getter_function_name.AsCString()); + shared_class_expression += g_get_shared_cache_class_info_definitions; shared_class_expression += g_get_shared_cache_class_info_body; auto utility_fn_or_error = exe_ctx.GetTargetRef().CreateUtilityFunction( @@ -1959,6 +1987,9 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor:: CompilerType clang_uint64_t_pointer_type = scratch_ts_sp->GetBuiltinTypeForEncodingAndBitSize(eEncodingUint, 64) .GetPointerType(); + CompilerType clang_uint32_t_pointer_type = + scratch_ts_sp->GetBuiltinTypeForEncodingAndBitSize(eEncodingUint, 32) + .GetPointerType(); // Next make the function caller for our implementation utility function. ValueList arguments; @@ -1976,6 +2007,13 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor:: value.SetValueType(Value::ValueType::Scalar); value.SetCompilerType(clang_uint32_t_type); arguments.PushValue(value); + + value.SetValueType(Value::ValueType::Scalar); + value.SetCompilerType(clang_uint32_t_pointer_type); + arguments.PushValue(value); + + value.SetValueType(Value::ValueType::Scalar); + value.SetCompilerType(clang_uint32_t_type); arguments.PushValue(value); std::unique_ptr utility_fn = std::move(*utility_fn_or_error); @@ -2313,10 +2351,7 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor::UpdateISAToDescriptorMap() { // The number of entries to pre-allocate room for. // Each entry is (addrsize + 4) bytes - // FIXME: It is not sustainable to continue incrementing this value every time - // the shared cache grows. This is because it requires allocating memory in - // the inferior process and some inferior processes have small memory limits. - const uint32_t max_num_classes = 212992; + const uint32_t max_num_classes_in_buffer = 212992; UtilityFunction *get_class_info_code = GetClassInfoUtilityFunction(exe_ctx); if (!get_class_info_code) { @@ -2338,15 +2373,22 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor::UpdateISAToDescriptorMap() { DiagnosticManager diagnostics; const uint32_t class_info_byte_size = addr_size + 4; - const uint32_t class_infos_byte_size = max_num_classes * class_info_byte_size; + const uint32_t class_infos_byte_size = + max_num_classes_in_buffer * class_info_byte_size; lldb::addr_t class_infos_addr = process->AllocateMemory( class_infos_byte_size, ePermissionsReadable | ePermissionsWritable, err); const uint32_t relative_selector_offset_addr_size = 64; lldb::addr_t relative_selector_offset_addr = process->AllocateMemory(relative_selector_offset_addr_size, ePermissionsReadable | ePermissionsWritable, err); + constexpr uint32_t class_info_start_idx_byte_size = sizeof(uint32_t); + lldb::addr_t class_info_start_idx_addr = + process->AllocateMemory(class_info_start_idx_byte_size, + ePermissionsReadable | ePermissionsWritable, err); - if (class_infos_addr == LLDB_INVALID_ADDRESS) { + if (class_infos_addr == LLDB_INVALID_ADDRESS || + relative_selector_offset_addr == LLDB_INVALID_ADDRESS || + class_info_start_idx_addr == LLDB_INVALID_ADDRESS) { LLDB_LOGF(log, "unable to allocate %" PRIu32 " bytes in process for shared cache read", @@ -2354,6 +2396,17 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor::UpdateISAToDescriptorMap() { return DescriptorMapUpdateResult::Fail(); } + const uint32_t start_idx_init_value = 0; + size_t bytes_written = process->WriteMemory( + class_info_start_idx_addr, &start_idx_init_value, sizeof(uint32_t), err); + if (bytes_written != sizeof(uint32_t)) { + LLDB_LOGF(log, + "unable to write %" PRIu32 + " bytes in process for shared cache read", + class_infos_byte_size); + return DescriptorMapUpdateResult::Fail(); + } + std::lock_guard guard(m_mutex); // Fill in our function argument values @@ -2362,12 +2415,13 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor::UpdateISAToDescriptorMap() { arguments.GetValueAtIndex(2)->GetScalar() = class_infos_addr; arguments.GetValueAtIndex(3)->GetScalar() = relative_selector_offset_addr; arguments.GetValueAtIndex(4)->GetScalar() = class_infos_byte_size; + arguments.GetValueAtIndex(5)->GetScalar() = class_info_start_idx_addr; // Only dump the runtime classes from the expression evaluation if the log is // verbose: Log *type_log = GetLog(LLDBLog::Types); bool dump_log = type_log && type_log->GetVerbose(); - arguments.GetValueAtIndex(5)->GetScalar() = dump_log ? 1 : 0; + arguments.GetValueAtIndex(6)->GetScalar() = dump_log ? 1 : 0; bool success = false; @@ -2394,78 +2448,80 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor::UpdateISAToDescriptorMap() { diagnostics.Clear(); - // Run the function - ExpressionResults results = - get_shared_cache_class_info_function->ExecuteFunction( - exe_ctx, &m_args, options, diagnostics, return_value); - - if (results == eExpressionCompleted) { - // The result is the number of ClassInfo structures that were filled in - num_class_infos = return_value.GetScalar().ULong(); - LLDB_LOG(log, "Discovered {0} Objective-C classes in the shared cache", - num_class_infos); - // Assert if there were more classes than we pre-allocated - // room for. - assert(num_class_infos <= max_num_classes); - if (num_class_infos > 0) { - if (num_class_infos > max_num_classes) { - num_class_infos = max_num_classes; - - success = false; - } else { + uint32_t num_class_infos_read = 0; + bool already_read_relative_selector_offset = false; + + do { + // Run the function. + ExpressionResults results = + get_shared_cache_class_info_function->ExecuteFunction( + exe_ctx, &m_args, options, diagnostics, return_value); + + if (results == eExpressionCompleted) { + // The result is the number of ClassInfo structures that were filled in. + num_class_infos_read = return_value.GetScalar().ULong(); + num_class_infos += num_class_infos_read; + LLDB_LOG(log, "Discovered {0} Objective-C classes in the shared cache", + num_class_infos_read); + if (num_class_infos_read > 0) { success = true; - } - // Read the relative selector offset. - DataBufferHeap relative_selector_offset_buffer(64, 0); - if (process->ReadMemory(relative_selector_offset_addr, - relative_selector_offset_buffer.GetBytes(), - relative_selector_offset_buffer.GetByteSize(), - err) == - relative_selector_offset_buffer.GetByteSize()) { - DataExtractor relative_selector_offset_data( - relative_selector_offset_buffer.GetBytes(), - relative_selector_offset_buffer.GetByteSize(), - process->GetByteOrder(), addr_size); - lldb::offset_t offset = 0; - uint64_t relative_selector_offset = - relative_selector_offset_data.GetU64(&offset); - if (relative_selector_offset > 0) { - // The offset is relative to the objc_opt struct. - m_runtime.SetRelativeSelectorBaseAddr(objc_opt_ptr + - relative_selector_offset); + // Read the relative selector offset. This only needs to occur once no + // matter how many times the function is called. + if (!already_read_relative_selector_offset) { + DataBufferHeap relative_selector_offset_buffer(64, 0); + if (process->ReadMemory( + relative_selector_offset_addr, + relative_selector_offset_buffer.GetBytes(), + relative_selector_offset_buffer.GetByteSize(), + err) == relative_selector_offset_buffer.GetByteSize()) { + DataExtractor relative_selector_offset_data( + relative_selector_offset_buffer.GetBytes(), + relative_selector_offset_buffer.GetByteSize(), + process->GetByteOrder(), addr_size); + lldb::offset_t offset = 0; + uint64_t relative_selector_offset = + relative_selector_offset_data.GetU64(&offset); + if (relative_selector_offset > 0) { + // The offset is relative to the objc_opt struct. + m_runtime.SetRelativeSelectorBaseAddr(objc_opt_ptr + + relative_selector_offset); + } + } + already_read_relative_selector_offset = true; } - } - - // Read the ClassInfo structures - DataBufferHeap class_infos_buffer( - num_class_infos * class_info_byte_size, 0); - if (process->ReadMemory(class_infos_addr, class_infos_buffer.GetBytes(), - class_infos_buffer.GetByteSize(), - err) == class_infos_buffer.GetByteSize()) { - DataExtractor class_infos_data(class_infos_buffer.GetBytes(), - class_infos_buffer.GetByteSize(), - process->GetByteOrder(), addr_size); - m_runtime.ParseClassInfoArray(class_infos_data, num_class_infos); + // Read the ClassInfo structures + DataBufferHeap class_infos_buffer( + num_class_infos_read * class_info_byte_size, 0); + if (process->ReadMemory(class_infos_addr, + class_infos_buffer.GetBytes(), + class_infos_buffer.GetByteSize(), + err) == class_infos_buffer.GetByteSize()) { + DataExtractor class_infos_data(class_infos_buffer.GetBytes(), + class_infos_buffer.GetByteSize(), + process->GetByteOrder(), addr_size); + + m_runtime.ParseClassInfoArray(class_infos_data, + num_class_infos_read); + } } - } else { - success = true; - } - } else { - if (log) { + } else if (log) { LLDB_LOGF(log, "Error evaluating our find class name function."); diagnostics.Dump(log); + break; } - } - } else { - if (log) { - LLDB_LOGF(log, "Error writing function arguments."); - diagnostics.Dump(log); - } + } while (num_class_infos_read == max_num_classes_in_buffer); + } else if (log) { + LLDB_LOGF(log, "Error writing function arguments."); + diagnostics.Dump(log); } - // Deallocate the memory we allocated for the ClassInfo array + LLDB_LOG(log, "Processed {0} Objective-C classes total from the shared cache", + num_class_infos); + // Cleanup memory we allocated in the process. + process->DeallocateMemory(relative_selector_offset_addr); + process->DeallocateMemory(class_info_start_idx_addr); process->DeallocateMemory(class_infos_addr); return DescriptorMapUpdateResult(success, false, num_class_infos); diff --git a/lldb/test/API/lang/objc/foundation/TestFoundationDisassembly.py b/lldb/test/API/lang/objc/foundation/TestFoundationDisassembly.py index 245313d683774..75f6651a2845a 100644 --- a/lldb/test/API/lang/objc/foundation/TestFoundationDisassembly.py +++ b/lldb/test/API/lang/objc/foundation/TestFoundationDisassembly.py @@ -12,52 +12,6 @@ class FoundationDisassembleTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True - @skipIfAsan - def test_foundation_disasm(self): - """Do 'disassemble -n func' on each and every 'Code' symbol entry from the Foundation.framework.""" - self.build() - - # Enable synchronous mode - self.dbg.SetAsync(False) - - # Create a target by the debugger. - target = self.dbg.CreateTarget(self.getBuildArtifact("a.out")) - self.assertTrue(target, VALID_TARGET) - - # Now launch the process, and do not stop at entry point. - process = target.LaunchSimple(None, None, self.get_process_working_directory()) - self.assertTrue(process, PROCESS_IS_VALID) - - foundation_framework = None - for module in target.modules: - if module.file.basename == "Foundation": - foundation_framework = module.file.fullpath - break - - self.assertIsNotNone(foundation_framework, "Foundation.framework path located") - self.runCmd("image dump symtab '%s'" % foundation_framework) - raw_output = self.res.GetOutput() - # Now, grab every 'Code' symbol and feed it into the command: - # 'disassemble -n func'. - # - # The symbol name is on the last column and trails the flag column which - # looks like '0xhhhhhhhh', i.e., 8 hexadecimal digits. - codeRE = re.compile( - r""" - \ Code\ {9} # ' Code' followed by 9 SPCs, - .* # the wildcard chars, - 0x[0-9a-f]{8} # the flag column, and - \ (.+)$ # finally the function symbol. - """, - re.VERBOSE, - ) - for line in raw_output.split(os.linesep): - match = codeRE.search(line) - if match: - func = match.group(1) - self.runCmd('image lookup -s "%s"' % func) - self.runCmd('disassemble --force -n "%s"' % func) - @skipIfAsan def test_simple_disasm(self): """Test the lldb 'disassemble' command""" diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 5ae1f394896ef..e45d2027b0279 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24678,7 +24678,7 @@ Examples: .. _int_vp_load_ff: -'``llvm.vp.load_ff``' Intrinsic +'``llvm.vp.load.ff``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index c76717fdc990c..6f386b81476ac 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -104,6 +104,9 @@ Changes to the AArch64 Backend * Assembler/disassembler support has been added for Armv9.7-A (2025) architecture extensions. +* Assembler/disassembler support has been added for 'Virtual Tagging + Extension (vMTE)' Future Architecture Technologies extension. + Changes to the AMDGPU Backend ----------------------------- diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h index 450f4d04c97fc..d7ed2c78749f0 100644 --- a/llvm/include/llvm/ADT/ArrayRef.h +++ b/llvm/include/llvm/ADT/ArrayRef.h @@ -10,8 +10,8 @@ #define LLVM_ADT_ARRAYREF_H #include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include #include @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h index 2498cb7796f1f..807a2e769999c 100644 --- a/llvm/include/llvm/ADT/FunctionExtras.h +++ b/llvm/include/llvm/ADT/FunctionExtras.h @@ -39,7 +39,6 @@ #include "llvm/Support/MemAlloc.h" #include "llvm/Support/type_traits.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/Analysis/TensorSpec.h b/llvm/include/llvm/Analysis/TensorSpec.h index d432ce8a203c4..8b19b6bb976ec 100644 --- a/llvm/include/llvm/Analysis/TensorSpec.h +++ b/llvm/include/llvm/Analysis/TensorSpec.h @@ -15,7 +15,6 @@ #include "llvm/ADT/StringMap.h" #include "llvm/IR/LLVMContext.h" -#include #include #include diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h index 41df86468aa37..faea0b7de525f 100644 --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -24,7 +24,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/GenericDomTree.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index c85e070cbe084..138e2130ed296 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -958,7 +958,7 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo { TypeSize getRegSizeInBits(Register Reg, const MachineRegisterInfo &MRI) const; /// Get the weight in units of pressure for this register unit. - virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0; + virtual unsigned getRegUnitWeight(MCRegUnit RegUnit) const = 0; /// Get the number of dimensions of register pressure. virtual unsigned getNumRegPressureSets() const = 0; @@ -978,7 +978,7 @@ class LLVM_ABI TargetRegisterInfo : public MCRegisterInfo { /// Get the dimensions of register pressure impacted by this register unit. /// Returns a -1 terminated array of pressure set IDs. - virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0; + virtual const int *getRegUnitPressureSets(MCRegUnit RegUnit) const = 0; /// Get the scale factor of spill weight for this register class. virtual float getSpillWeightScaleFactor(const TargetRegisterClass *RC) const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h index 0a1300b4acaa4..e636296b058fd 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h +++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFCFIProgram.h @@ -17,7 +17,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/TargetParser/Triple.h" -#include #include namespace llvm { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h index fc41641fd5cff..79cfc4832fe9a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryResolver.h @@ -21,7 +21,6 @@ #include #include -#include namespace llvm { namespace orc { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h index d1c201306bf54..61aefbda35337 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/LibraryScanner.h @@ -27,8 +27,6 @@ #include #include #include -#include -#include namespace llvm { namespace orc { diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 3918cecfc1e65..36b49e69650d8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -68,6 +68,13 @@ find_unique(Container &&container, Predicate &&pred) { namespace tomp { +enum struct ErrorCode : int { + NoLeafAllowing, // No leaf that allows this clause + NoLeafPrivatizing, // No leaf that has a privatizing clause + InvalidDirNameMod, // Invalid directive name modifier + RedModNotApplied, // Reduction modifier not applied +}; + // ClauseType: Either an instance of ClauseT, or a type derived from ClauseT. // This is the clause representation in the code using this infrastructure. // @@ -114,10 +121,16 @@ struct ConstructDecompositionT { } tomp::ListT> output; + llvm::SmallVector> errors; private: bool split(); + bool error(const ClauseTy *node, ErrorCode ec) { + errors.emplace_back(node, ec); + return false; + } + struct LeafReprInternal { llvm::omp::Directive id = llvm::omp::Directive::OMPD_unknown; tomp::type::ListT clauses; @@ -456,10 +469,9 @@ bool ConstructDecompositionT::applyClause(Specific &&specific, // S Some clauses are permitted only on a single leaf construct of the // S combined or composite construct, in which case the effect is as if // S the clause is applied to that specific construct. (p339, 31-33) - if (applyToUnique(node)) - return true; - - return false; + if (!applyToUnique(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // --- Specific clauses ----------------------------------------------- @@ -487,7 +499,9 @@ bool ConstructDecompositionT::applyClause( }); }); - return applied; + if (!applied) + return error(node, ErrorCode::NoLeafPrivatizing); + return true; } // COLLAPSE @@ -501,18 +515,9 @@ template bool ConstructDecompositionT::applyClause( const tomp::clause::CollapseT &clause, const ClauseTy *node) { - // Apply "collapse" to the innermost directive. If it's not one that - // allows it flag an error. - if (!leafs.empty()) { - auto &last = leafs.back(); - - if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { - last.clauses.push_back(node); - return true; - } - } - - return false; + if (!applyToInnermost(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // DEFAULT @@ -527,7 +532,9 @@ bool ConstructDecompositionT::applyClause( const tomp::clause::DefaultT &clause, const ClauseTy *node) { // [5.2:340:31] - return applyToAll(node); + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // FIRSTPRIVATE @@ -655,7 +662,9 @@ bool ConstructDecompositionT::applyClause( applied = true; } - return applied; + if (!applied) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // IF @@ -690,10 +699,12 @@ bool ConstructDecompositionT::applyClause( hasDir->clauses.push_back(unmodified); return true; } - return false; + return error(node, ErrorCode::InvalidDirNameMod); } - return applyToAll(node); + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // LASTPRIVATE @@ -719,12 +730,9 @@ template bool ConstructDecompositionT::applyClause( const tomp::clause::LastprivateT &clause, const ClauseTy *node) { - bool applied = false; - // [5.2:340:21] - applied = applyToAll(node); - if (!applied) - return false; + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); auto inFirstprivate = [&](const ObjectTy &object) { if (ClauseSet *set = findClausesWith(object)) { @@ -750,7 +758,6 @@ bool ConstructDecompositionT::applyClause( llvm::omp::Clause::OMPC_shared, tomp::clause::SharedT{/*List=*/sharedObjects}); dirParallel->clauses.push_back(shared); - applied = true; } // [5.2:340:24] @@ -759,7 +766,6 @@ bool ConstructDecompositionT::applyClause( llvm::omp::Clause::OMPC_shared, tomp::clause::SharedT{/*List=*/sharedObjects}); dirTeams->clauses.push_back(shared); - applied = true; } } @@ -783,11 +789,10 @@ bool ConstructDecompositionT::applyClause( /*Mapper=*/std::nullopt, /*Iterator=*/std::nullopt, /*LocatorList=*/std::move(tofrom)}}); dirTarget->clauses.push_back(map); - applied = true; } } - return applied; + return true; } // LINEAR @@ -813,7 +818,7 @@ bool ConstructDecompositionT::applyClause( const ClauseTy *node) { // [5.2:341:15.1] if (!applyToInnermost(node)) - return false; + return error(node, ErrorCode::NoLeafAllowing); // [5.2:341:15.2], [5.2:341:19] auto dirSimd = findDirective(llvm::omp::Directive::OMPD_simd); @@ -858,7 +863,9 @@ template bool ConstructDecompositionT::applyClause( const tomp::clause::NowaitT &clause, const ClauseTy *node) { - return applyToOutermost(node); + if (!applyToOutermost(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // OMPX_ATTRIBUTE @@ -866,8 +873,9 @@ template bool ConstructDecompositionT::applyClause( const tomp::clause::OmpxAttributeT &clause, const ClauseTy *node) { - // ERROR: no leaf that allows clause - return applyToAll(node); + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // OMPX_BARE @@ -875,7 +883,9 @@ template bool ConstructDecompositionT::applyClause( const tomp::clause::OmpxBareT &clause, const ClauseTy *node) { - return applyToOutermost(node); + if (!applyToOutermost(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // ORDER @@ -890,7 +900,9 @@ bool ConstructDecompositionT::applyClause( const tomp::clause::OrderT &clause, const ClauseTy *node) { // [5.2:340:31] - return applyToAll(node); + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // PRIVATE @@ -905,7 +917,9 @@ template bool ConstructDecompositionT::applyClause( const tomp::clause::PrivateT &clause, const ClauseTy *node) { - return applyToInnermost(node); + if (!applyToInnermost(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // REDUCTION @@ -1007,31 +1021,37 @@ bool ConstructDecompositionT::applyClause( /*List=*/objects}}); ReductionModifier effective = modifier.value_or(ReductionModifier::Default); - bool effectiveApplied = false; + bool modifierApplied = false; + bool allowingLeaf = false; // Walk over the leaf constructs starting from the innermost, and apply // the clause as required by the spec. for (auto &leaf : llvm::reverse(leafs)) { if (!llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version)) continue; + // Found a leaf that allows this clause. Keep track of this for better + // error reporting. + allowingLeaf = true; if (!applyToParallel && &leaf == dirParallel) continue; if (!applyToTeams && &leaf == dirTeams) continue; // Some form of the clause will be applied past this point. - if (isValidModifier(leaf.id, effective, effectiveApplied)) { + if (isValidModifier(leaf.id, effective, modifierApplied)) { // Apply clause with modifier. leaf.clauses.push_back(node); - effectiveApplied = true; + modifierApplied = true; } else { // Apply clause without modifier. leaf.clauses.push_back(unmodified); } // The modifier must be applied to some construct. - applied = effectiveApplied; + applied = modifierApplied; } + if (!allowingLeaf) + return error(node, ErrorCode::NoLeafAllowing); if (!applied) - return false; + return error(node, ErrorCode::RedModNotApplied); tomp::ObjectListT sharedObjects; llvm::transform(objects, std::back_inserter(sharedObjects), @@ -1078,11 +1098,10 @@ bool ConstructDecompositionT::applyClause( /*LocatorList=*/std::move(tofrom)}}); dirTarget->clauses.push_back(map); - applied = true; } } - return applied; + return true; } // SHARED @@ -1097,7 +1116,9 @@ bool ConstructDecompositionT::applyClause( const tomp::clause::SharedT &clause, const ClauseTy *node) { // [5.2:340:31] - return applyToAll(node); + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // THREAD_LIMIT @@ -1112,7 +1133,9 @@ bool ConstructDecompositionT::applyClause( const tomp::clause::ThreadLimitT &clause, const ClauseTy *node) { // [5.2:340:31] - return applyToAll(node); + if (!applyToAll(node)) + return error(node, ErrorCode::NoLeafAllowing); + return true; } // --- Splitting ------------------------------------------------------ diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 636e88898a55e..3907e864bed1e 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -387,6 +387,12 @@ class PowerPC_VSX_Sca_DDD_Intrinsic [llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; +/// PowerPC_VSX_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_VSX_WWW_Intrinsic + : PowerPC_VSX_Intrinsic; //===----------------------------------------------------------------------===// // PowerPC Altivec Intrinsic Definitions. @@ -1214,6 +1220,7 @@ def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">; def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">; def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">; def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">; +def int_ppc_vsx_xvrlw : PowerPC_VSX_WWW_Intrinsic<"xvrlw">; def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">; let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h index 3cdbf84748c79..b6b5b5979dec9 100644 --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -26,8 +26,6 @@ #include "llvm/Support/raw_ostream.h" #endif -#include - namespace llvm { namespace mca { diff --git a/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h b/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h index 25ba27c7c7a22..a70c2388c5168 100644 --- a/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h +++ b/llvm/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h @@ -21,7 +21,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/YAMLTraits.h" #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 799938ab901c1..67834f72c2400 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -244,7 +244,6 @@ #include #include #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/Jobserver.h b/llvm/include/llvm/Support/Jobserver.h index 3c0c04537735d..1fd4f7ed007af 100644 --- a/llvm/include/llvm/Support/Jobserver.h +++ b/llvm/include/llvm/Support/Jobserver.h @@ -67,7 +67,6 @@ #define LLVM_SUPPORT_JOBSERVER_H #include "llvm/ADT/StringRef.h" -#include namespace llvm { diff --git a/llvm/include/llvm/Support/LSP/Logging.h b/llvm/include/llvm/Support/LSP/Logging.h index fe65899b1d4ce..f19cc49dbb606 100644 --- a/llvm/include/llvm/Support/LSP/Logging.h +++ b/llvm/include/llvm/Support/LSP/Logging.h @@ -11,7 +11,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" -#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index d3276a18dc2c6..1be7779f2c72c 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h index 527d67f3b360c..097eaf3422ca3 100644 --- a/llvm/include/llvm/Support/Timer.h +++ b/llvm/include/llvm/Support/Timer.h @@ -15,7 +15,6 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Mutex.h" #include -#include #include #include diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h index 3bdcf9a18fe40..c695784641b4e 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h @@ -17,6 +17,8 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" +#include + namespace llvm { using AnchorList = std::vector>; diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 5e92ca1d38e70..fbe74d21c7199 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -30,7 +30,6 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include -#include #include using namespace llvm; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7588a47b2b975..1b77d53c1a387 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -653,9 +653,9 @@ struct FwdRegParamInfo { /// Register worklist for finding call site values. using FwdRegWorklist = MapVector>; -/// Container for the set of registers known to be clobbered on the path to a -/// call site. -using ClobberedRegSet = SmallSet; +/// Container for the set of register units known to be clobbered on the path +/// to a call site. +using ClobberedRegUnitSet = SmallSet; /// Append the expression \p Addition to \p Original and return the result. static const DIExpression *combineDIExpressions(const DIExpression *Original, @@ -727,7 +727,7 @@ static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg, static void interpretValues(const MachineInstr *CurMI, FwdRegWorklist &ForwardedRegWorklist, ParamSet &Params, - ClobberedRegSet &ClobberedRegUnits) { + ClobberedRegUnitSet &ClobberedRegUnits) { const MachineFunction *MF = CurMI->getMF(); const DIExpression *EmptyExpr = @@ -759,7 +759,7 @@ static void interpretValues(const MachineInstr *CurMI, // If the MI is an instruction defining one or more parameters' forwarding // registers, add those defines. - ClobberedRegSet NewClobberedRegUnits; + ClobberedRegUnitSet NewClobberedRegUnits; auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, SmallSetVector &Defs) { if (MI.isDebugInstr()) @@ -842,7 +842,7 @@ static void interpretValues(const MachineInstr *CurMI, static bool interpretNextInstr(const MachineInstr *CurMI, FwdRegWorklist &ForwardedRegWorklist, ParamSet &Params, - ClobberedRegSet &ClobberedRegUnits) { + ClobberedRegUnitSet &ClobberedRegUnits) { // Skip bundle headers. if (CurMI->isBundle()) return true; @@ -912,7 +912,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); // Search for a loading value in forwarding registers inside call delay slot. - ClobberedRegSet ClobberedRegUnits; + ClobberedRegUnitSet ClobberedRegUnits; if (CallMI->hasDelaySlot()) { auto Suc = std::next(CallMI->getIterator()); // Only one-instruction delay slot is supported. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 2832fbf1227e5..12d2096bbddb3 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3485,7 +3485,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, bool IRTranslator::translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) { - if (!MF->getTarget().getTargetTriple().isSPIRV() && containsBF16Type(U)) + if (containsBF16Type(U) && !targetSupportsBF16Type(MF)) return false; const AtomicRMWInst &I = cast(U); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 005e44fc7080b..25c4375a73ce0 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -79,9 +79,9 @@ static cl::opt EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden); -static cl::opt - EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), - cl::init(cl::BOU_UNSET), cl::Hidden); +static cl::opt UseTerminalRule("terminal-rule", + cl::desc("Apply the terminal rule"), + cl::init(true), cl::Hidden); /// Temporary flag to test critical edge unsplitting. static cl::opt EnableJoinSplits( @@ -134,7 +134,6 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { SlotIndexes *SI = nullptr; const MachineLoopInfo *Loops = nullptr; RegisterClassInfo RegClassInfo; - bool UseTerminalRule = false; /// Position and VReg of a PHI instruction during coalescing. struct PHIValPos { @@ -4321,11 +4320,6 @@ bool RegisterCoalescer::run(MachineFunction &fn) { else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); - if (EnableTerminalRule == cl::BOU_UNSET) - UseTerminalRule = STI.enableTerminalRule(); - else - UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE; - // If there are PHIs tracked by debug-info, they will need updating during // coalescing. Build an index of those PHIs to ease updating. SlotIndexes *Slots = LIS->getSlotIndexes(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index dd5c011bfe784..6284ded3be922 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6057,11 +6057,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDLoc dl(N); - // Build a vector with undefined for the new nodes. + // Build a vector with poison for the new nodes. EVT VT = N->getValueType(0); // Integer BUILD_VECTOR operands may be larger than the node's vector element - // type. The UNDEFs need to have the same type as the existing operands. + // type. The POISONs need to have the same type as the existing operands. EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -6070,7 +6070,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SmallVector NewOps(N->ops()); assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); - NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); + NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT)); return DAG.getBuildVector(WidenVT, dl, NewOps); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2e2e4f58b4f38..cad66c2b0d381 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6105,7 +6105,17 @@ bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const { if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true)) return !C1->isNegative(); - return Op.getOpcode() == ISD::FABS; + switch (Op.getOpcode()) { + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FEXP10: + return true; + default: + return false; + } + + llvm_unreachable("covered opcode switch"); } bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index efd55af767746..5b980648e76e2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10727,8 +10727,22 @@ SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass( if (Classes == fcNone) return Op; - return DAG.getNode(ISD::AssertNoFPClass, SDLoc(Op), Op.getValueType(), Op, - DAG.getTargetConstant(Classes, SDLoc(), MVT::i32)); + SDLoc SL = getCurSDLoc(); + SDValue TestConst = DAG.getTargetConstant(Classes, SDLoc(), MVT::i32); + + if (Op.getOpcode() != ISD::MERGE_VALUES) { + return DAG.getNode(ISD::AssertNoFPClass, SL, Op.getValueType(), Op, + TestConst); + } + + SmallVector Ops(Op.getNumOperands()); + for (unsigned I = 0, E = Ops.size(); I != E; ++I) { + SDValue MergeOp = Op.getOperand(I); + Ops[I] = DAG.getNode(ISD::AssertNoFPClass, SL, MergeOp.getValueType(), + MergeOp, TestConst); + } + + return DAG.getMergeValues(Ops, SL); } /// Populate a CallLowerinInfo (into \p CLI) based on the properties of diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 3f2961cd83bab..c306fe6012c11 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -794,29 +794,36 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr( if (!NewMI) return false; - LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); - LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - - // If the old instruction is debug value tracked, an update is required. - if (auto OldInstrNum = mi->peekDebugInstrNum()) { - assert(mi->getNumExplicitDefs() == 1); - assert(NewMI->getNumExplicitDefs() == 1); - - // Find the old and new def location. - unsigned OldIdx = mi->defs().begin()->getOperandNo(); - unsigned NewIdx = NewMI->defs().begin()->getOperandNo(); - - // Record that one def has been replaced by the other. - unsigned NewInstrNum = NewMI->getDebugInstrNum(); - MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx), - std::make_pair(NewInstrNum, NewIdx)); - } - - MBB->erase(mi); // Nuke the old inst. - for (MachineInstr &MI : MIS) DistanceMap.insert(std::make_pair(&MI, Dist++)); - Dist--; + + if (&*mi == NewMI) { + LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi); + } else { + LLVM_DEBUG({ + dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi; + dbgs() << "2addr: TO 3-ADDR: " << *NewMI; + }); + + // If the old instruction is debug value tracked, an update is required. + if (auto OldInstrNum = mi->peekDebugInstrNum()) { + assert(mi->getNumExplicitDefs() == 1); + assert(NewMI->getNumExplicitDefs() == 1); + + // Find the old and new def location. + unsigned OldIdx = mi->defs().begin()->getOperandNo(); + unsigned NewIdx = NewMI->defs().begin()->getOperandNo(); + + // Record that one def has been replaced by the other. + unsigned NewInstrNum = NewMI->getDebugInstrNum(); + MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx), + std::make_pair(NewInstrNum, NewIdx)); + } + + MBB->erase(mi); // Nuke the old inst. + Dist--; + } + mi = NewMI; nmi = std::next(mi); @@ -1329,6 +1336,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); + // Give targets a chance to convert bundled instructions. + bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle); + // If the instruction is convertible to 3 Addr, instead // of returning try 3 Addr transformation aggressively and // use this variable to check later. Because it might be better. @@ -1337,7 +1347,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( // addl %esi, %edi // movl %edi, %eax // ret - if (Commuted && !MI.isConvertibleTo3Addr()) + if (Commuted && !ConvertibleTo3Addr) return false; if (shouldOnlyCommute) @@ -1357,7 +1367,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( regBKilled = isKilled(MI, regB, true); } - if (MI.isConvertibleTo3Addr()) { + if (ConvertibleTo3Addr) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 2fd1dd5f84a91..53d166d277cb8 100644 --- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -34,7 +34,6 @@ #include #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp index bb3411bb9568e..7890bcce6c7ca 100644 --- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp +++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index b493337b39317..11e42118a29ef 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.cpp b/llvm/lib/ObjCopy/MachO/MachOObject.cpp index 8d2c02dc37c99..e45cc547ee446 100644 --- a/llvm/lib/ObjCopy/MachO/MachOObject.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOObject.cpp @@ -9,7 +9,6 @@ #include "MachOObject.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/SystemZ/zOSSupport.h" -#include using namespace llvm; using namespace llvm::objcopy::macho; diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 753470dfb5374..a0ade6412024e 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -19,7 +19,6 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/SMLoc.h" #include -#include #include #include diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 0e94b78d11d83..7fd5254dfa536 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -625,6 +625,13 @@ def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32 def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM", "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>; +//===----------------------------------------------------------------------===// +// Future Architecture Technologies +//===----------------------------------------------------------------------===// + +def FeatureMOPS_GO: ExtensionWithMArch<"mops-go", "MOPS_GO", "FEAT_MOPS_GO", + "Enable memset acceleration granule only">; + //===----------------------------------------------------------------------===// // Other Features //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index bb2f083db19ef..2bce5c89f8ba6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -12588,12 +12588,10 @@ class MOPSMemoryCopy opcode, bits<2> op1, bits<2> op2, string asm> class MOPSMemoryMove opcode, bits<2> op1, bits<2> op2, string asm> : MOPSMemoryCopyMoveBase<1, opcode, op1, op2, asm>; -class MOPSMemorySetBase opcode, bit op1, bit op2, - string asm> - : I<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), - (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), - asm, "\t[$Rd]!, $Rn!, $Rm", - "$Rd = $Rd_wb,$Rn = $Rn_wb", []>, +class MOPSMemorySetBase opcode, + bit op1, bit op2, bit op3, string asm> + : I<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), ins, + asm, operands, "$Rd = $Rd_wb,$Rn = $Rn_wb", []>, Sched<[]> { bits<5> Rd; bits<5> Rn; @@ -12605,20 +12603,34 @@ class MOPSMemorySetBase opcode, bit op1, bit op2, let Inst{15-14} = opcode; let Inst{13} = op2; let Inst{12} = op1; - let Inst{11-10} = 0b01; + let Inst{11} = 0b0; + let Inst{10} = op3; let Inst{9-5} = Rn; let Inst{4-0} = Rd; - let DecoderMethod = "DecodeSETMemOpInstruction"; let mayLoad = 0; let mayStore = 1; } -class MOPSMemorySet opcode, bit op1, bit op2, string asm> - : MOPSMemorySetBase<0, opcode, op1, op2, asm>; +class MOPSMemorySet opcode, bit op1, bit op2, bit op3, string asm> + : MOPSMemorySetBase<(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + "\t[$Rd]!, $Rn!, $Rm", 0, opcode, op1, op2, op3, asm> { + let DecoderMethod = "DecodeSETMemOpInstruction"; +} + +class MOPSMemorySetTagging opcode, bit op1, bit op2, bit op3, string asm> + : MOPSMemorySetBase<(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + "\t[$Rd]!, $Rn!, $Rm", 1, opcode, op1, op2, op3, asm> { + let DecoderMethod = "DecodeSETMemOpInstruction"; +} -class MOPSMemorySetTagging opcode, bit op1, bit op2, string asm> - : MOPSMemorySetBase<1, opcode, op1, op2, asm>; +class MOPSGoMemorySetTagging opcode, bit op1, bit op2, bit op3, string asm> + : MOPSMemorySetBase<(ins GPR64common:$Rd, GPR64:$Rn), + "\t[$Rd]!, $Rn!", 1, opcode, op1, op2, op3, asm> { + // No `Rm` operand, as all bits must be set to 1 + let Inst{20-16} = 0b11111; + let DecoderMethod = "DecodeSETMemGoOpInstruction"; +} multiclass MOPSMemoryCopyInsns opcode, string asm> { def "" : MOPSMemoryCopy; @@ -12659,17 +12671,27 @@ multiclass MOPSMemoryMoveInsns opcode, string asm> { } multiclass MOPSMemorySetInsns opcode, string asm> { - def "" : MOPSMemorySet; - def T : MOPSMemorySet; - def N : MOPSMemorySet; - def TN : MOPSMemorySet; + def "" : MOPSMemorySet; + def T : MOPSMemorySet; + def N : MOPSMemorySet; + def TN : MOPSMemorySet; } multiclass MOPSMemorySetTaggingInsns opcode, string asm> { - def "" : MOPSMemorySetTagging; - def T : MOPSMemorySetTagging; - def N : MOPSMemorySetTagging; - def TN : MOPSMemorySetTagging; + def "" : MOPSMemorySetTagging; + def T : MOPSMemorySetTagging; + def N : MOPSMemorySetTagging; + def TN : MOPSMemorySetTagging; +} + +//---------------------------------------------------------------------------- +// MOPS Granule Only - FEAT_MOPS_GO +//---------------------------------------------------------------------------- +multiclass MOPSGoMemorySetTaggingInsns opcode, string asm> { + def "" : MOPSGoMemorySetTagging; + def T : MOPSGoMemorySetTagging; + def N : MOPSGoMemorySetTagging; + def TN : MOPSGoMemorySetTagging; } //---------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b30e3d06b2c9f..34a20f09d2806 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -405,6 +405,8 @@ def HasMTETC : Predicate<"Subtarget->hasMTETC()">, AssemblerPredicateWithAll<(all_of FeatureMTETC), "mtetc">; def HasGCIE : Predicate<"Subtarget->hasGCIE()">, AssemblerPredicateWithAll<(all_of FeatureGCIE), "gcie">; +def HasMOPS_GO : Predicate<"Subtarget->hasMOPS_GO()">, + AssemblerPredicateWithAll<(all_of FeatureMOPS_GO), "mops-go">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -10867,6 +10869,15 @@ let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, maySt [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; } +//----------------------------------------------------------------------------- +// MOPS Granule Only Protection (FEAT_MOPS_GO) + +let Predicates = [HasMOPS_GO, HasMTE] in { + defm SETGOP : MOPSGoMemorySetTaggingInsns<0b00, "setgop">; + defm SETGOM : MOPSGoMemorySetTaggingInsns<0b01, "setgom">; + defm SETGOE : MOPSGoMemorySetTaggingInsns<0b10, "setgoe">; +} + //----------------------------------------------------------------------------- // v8.3 Pointer Authentication late patterns diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index ab4004e30f629..8974965c41fe3 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -157,7 +157,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } bool enableSubRegLiveness() const override { return EnableSubregLiveness; } - bool enableTerminalRule() const override { return true; } + bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f5dfbdc596510..7293b7fdb0d20 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3893,6 +3893,7 @@ static const struct Extension { {"f16mm", {AArch64::FeatureF16MM}}, {"f16f32dot", {AArch64::FeatureF16F32DOT}}, {"f16f32mm", {AArch64::FeatureF16F32MM}}, + {"mops-go", {AArch64::FeatureMOPS_GO}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { @@ -5994,6 +5995,33 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc, " registers are the same"); break; } + case AArch64::SETGOP: + case AArch64::SETGOPT: + case AArch64::SETGOPN: + case AArch64::SETGOPTN: + case AArch64::SETGOM: + case AArch64::SETGOMT: + case AArch64::SETGOMN: + case AArch64::SETGOMTN: + case AArch64::SETGOE: + case AArch64::SETGOET: + case AArch64::SETGOEN: + case AArch64::SETGOETN: { + MCRegister Xd_wb = Inst.getOperand(0).getReg(); + MCRegister Xn_wb = Inst.getOperand(1).getReg(); + MCRegister Xd = Inst.getOperand(2).getReg(); + MCRegister Xn = Inst.getOperand(3).getReg(); + if (Xd_wb != Xd) + return Error(Loc[0], + "invalid SET instruction, Xd_wb and Xd do not match"); + if (Xn_wb != Xn) + return Error(Loc[0], + "invalid SET instruction, Xn_wb and Xn do not match"); + if (Xd == Xn) + return Error(Loc[0], "invalid SET instruction, destination and size" + " registers are the same"); + break; + } } // Now check immediate ranges. Separate from the above as there is overlap diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index dc2feba42c871..4eb762a00d477 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1532,6 +1532,32 @@ static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn, return MCDisassembler::Success; } +static DecodeStatus DecodeSETMemGoOpInstruction(MCInst &Inst, uint32_t insn, + uint64_t Addr, + const MCDisassembler *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + + // None of the registers may alias: if they do, then the instruction is not + // merely unpredictable but actually entirely unallocated. + if (Rd == Rn) + return MCDisassembler::Fail; + + // Rd and Rn register operands are written back, so they appear + // twice in the operand list, once as outputs and once as inputs. + if (!DecodeSimpleRegisterClass( + Inst, Rd, Addr, Decoder) || + !DecodeSimpleRegisterClass( + Inst, Rn, Addr, Decoder) || + !DecodeSimpleRegisterClass( + Inst, Rd, Addr, Decoder) || + !DecodeSimpleRegisterClass( + Inst, Rn, Addr, Decoder)) + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + static DecodeStatus DecodePRFMRegInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 2da6a8e5652ef..305eaefbe097b 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1228,18 +1228,20 @@ bool UnclusteredHighRPStage::initGCNSchedStage() { createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry)); InitialOccupancy = DAG.MinOccupancy; - // Aggressivly try to reduce register pressure in the unclustered high RP + // Aggressively try to reduce register pressure in the unclustered high RP // stage. Temporarily increase occupancy target in the region. + TempTargetOccupancy = MFI.getMaxWavesPerEU() > DAG.MinOccupancy + ? InitialOccupancy + 1 + : InitialOccupancy; + IsAnyRegionScheduled = false; S.SGPRLimitBias = S.HighRPSGPRBias; S.VGPRLimitBias = S.HighRPVGPRBias; - if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) - MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); LLVM_DEBUG( dbgs() << "Retrying function scheduling without clustering. " - "Aggressivly try to reduce register pressure to achieve occupancy " - << DAG.MinOccupancy << ".\n"); + "Aggressively try to reduce register pressure to achieve occupancy " + << TempTargetOccupancy << ".\n"); return true; } @@ -1320,9 +1322,16 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() { SavedMutations.swap(DAG.Mutations); S.SGPRLimitBias = S.VGPRLimitBias = 0; if (DAG.MinOccupancy > InitialOccupancy) { + assert(IsAnyRegionScheduled); LLVM_DEBUG(dbgs() << StageID << " stage successfully increased occupancy to " << DAG.MinOccupancy << '\n'); + } else if (!IsAnyRegionScheduled) { + assert(DAG.MinOccupancy == InitialOccupancy); + LLVM_DEBUG(dbgs() << StageID + << ": No regions scheduled, min occupancy stays at " + << DAG.MinOccupancy << ", MFI occupancy stays at " + << MFI.getOccupancy() << ".\n"); } GCNSchedStage::finalizeGCNSchedStage(); @@ -1396,13 +1405,27 @@ bool UnclusteredHighRPStage::initGCNRegion() { // rescheduling of previous regions did not make occupancy drop back down to // the initial minimum). unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize(); + // If no region has been scheduled yet, the DAG has not yet been updated with + // the occupancy target. So retrieve it from the temporary. + unsigned CurrentTargetOccupancy = + IsAnyRegionScheduled ? DAG.MinOccupancy : TempTargetOccupancy; if (!DAG.RegionsWithExcessRP[RegionIdx] && - (DAG.MinOccupancy <= InitialOccupancy || + (CurrentTargetOccupancy <= InitialOccupancy || DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) != InitialOccupancy)) return false; - return GCNSchedStage::initGCNRegion(); + bool IsSchedulingThisRegion = GCNSchedStage::initGCNRegion(); + // If this is the first region scheduled during this stage, make the target + // occupancy changes in the DAG and MFI. + if (!IsAnyRegionScheduled && IsSchedulingThisRegion) { + IsAnyRegionScheduled = true; + if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) { + DAG.MinOccupancy = TempTargetOccupancy; + MFI.increaseOccupancy(MF, TempTargetOccupancy); + } + } + return IsSchedulingThisRegion; } bool ClusteredLowOccStage::initGCNRegion() { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index f357981ac91de..95a931b9beb2a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -417,6 +417,10 @@ class UnclusteredHighRPStage : public GCNSchedStage { private: // Save the initial occupancy before starting this stage. unsigned InitialOccupancy; + // Save the temporary target occupancy before starting this stage. + unsigned TempTargetOccupancy; + // Track whether any region was scheduled by this stage. + bool IsAnyRegionScheduled; public: bool initGCNSchedStage() override; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index da4bd878b8853..f377b8aaf1333 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1040,8 +1040,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return true; } - bool enableTerminalRule() const override { return true; } - bool useAA() const override; bool enableSubRegLiveness() const override { diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h index efd99dbc1a08b..22e56b66e1827 100644 --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -126,8 +126,6 @@ class R600Subtarget final : public R600GenSubtargetInfo, return true; } - bool enableTerminalRule() const override { return true; } - bool enableSubRegLiveness() const override { return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ca49e553ca7fa..e0336c7dafa43 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4091,10 +4091,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const { MachineBasicBlock &MBB = *MI.getParent(); + MachineInstr *CandidateMI = &MI; + + if (MI.isBundle()) { + // This is a temporary placeholder for bundle handling that enables us to + // exercise the relevant code paths in the two-address instruction pass. + if (MI.getBundleSize() != 1) + return nullptr; + CandidateMI = MI.getNextNode(); + } + ThreeAddressUpdates U; - MachineInstr *NewMI = convertToThreeAddressImpl(MI, U); + MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U); + if (!NewMI) + return nullptr; - if (NewMI) { + if (MI.isBundle()) { + CandidateMI->eraseFromBundle(); + + for (MachineOperand &MO : MI.all_defs()) { + if (MO.isTied()) + MI.untieRegOperand(MO.getOperandNo()); + } + } else { updateLiveVariables(LV, MI, *NewMI); if (LIS) { LIS->ReplaceMachineInstrInMaps(MI, *NewMI); @@ -4135,7 +4154,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, LV->getVarInfo(DefReg).AliveBlocks.clear(); } - if (LIS) { + if (MI.isBundle()) { + VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg); + if (!VRI.Reads && !VRI.Writes) { + for (MachineOperand &MO : MI.all_uses()) { + if (MO.isReg() && MO.getReg() == DefReg) { + assert(MO.getSubReg() == 0 && + "tied sub-registers in bundles currently not supported"); + MI.removeOperand(MO.getOperandNo()); + break; + } + } + + if (LIS) + LIS->shrinkToUses(&LIS->getInterval(DefReg)); + } + } else if (LIS) { LiveInterval &DefLI = LIS->getInterval(DefReg); // We cannot delete the original instruction here, so hack out the use @@ -4150,11 +4184,26 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, } } + if (MI.isBundle()) { + VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg); + if (!VRI.Reads && !VRI.Writes) { + for (MachineOperand &MIOp : MI.uses()) { + if (MIOp.isReg() && MIOp.getReg() == DefReg) { + MIOp.setIsUndef(true); + MIOp.setReg(DummyReg); + } + } + } + + MI.addOperand(MachineOperand::CreateReg(DummyReg, false, false, false, + false, /*isUndef=*/true)); + } + LIS->shrinkToUses(&DefLI); } } - return NewMI; + return MI.isBundle() ? &MI : NewMI; } MachineInstr * diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 1bb3d23a14e6a..ba6ea3357eeb8 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -3983,7 +3983,7 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, llvm_unreachable("Unexpected register pressure set!"); } -const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { +const int *SIRegisterInfo::getRegUnitPressureSets(MCRegUnit RegUnit) const { static const int Empty[] = { -1 }; if (RegPressureIgnoredUnits[RegUnit]) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 7aac424bef897..1d846321d3073 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -369,7 +369,7 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; - const int *getRegUnitPressureSets(unsigned RegUnit) const override; + const int *getRegUnitPressureSets(MCRegUnit RegUnit) const override; MCRegister getReturnAddressReg(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 36b99087e0a32..2d2e62c80c702 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -97,7 +97,8 @@ void ARMAsmPrinter::emitXXStructor(const DataLayout &DL, const Constant *CV) { const MCExpr *E = MCSymbolRefExpr::create( GetARMGVSymbol(GV, ARMII::MO_NO_FLAG), - (Subtarget->isTargetELF() ? ARM::S_TARGET1 : ARM::S_None), OutContext); + (TM.getTargetTriple().isOSBinFormatELF() ? ARM::S_TARGET1 : ARM::S_None), + OutContext); OutStreamer->emitValue(E, Size); } @@ -595,8 +596,7 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) { ARMTargetStreamer &ATS = static_cast(TS); if (OptimizationGoals > 0 && - (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || - Subtarget->isTargetMuslAEABI())) + (TT.isTargetAEABI() || TT.isTargetGNUAEABI() || TT.isTargetMuslAEABI())) ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals); OptimizationGoals = -1; @@ -884,9 +884,10 @@ static uint8_t getModifierSpecifier(ARMCP::ARMCPModifier Modifier) { MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags) { - if (Subtarget->isTargetMachO()) { + const Triple &TT = TM.getTargetTriple(); + if (TT.isOSBinFormatMachO()) { bool IsIndirect = - (TargetFlags & ARMII::MO_NONLAZY) && Subtarget->isGVIndirectSymbol(GV); + (TargetFlags & ARMII::MO_NONLAZY) && getTM().isGVIndirectSymbol(GV); if (!IsIndirect) return getSymbol(GV); @@ -903,9 +904,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); return MCSym; - } else if (Subtarget->isTargetCOFF()) { - assert(Subtarget->isTargetWindows() && - "Windows is the only supported COFF target"); + } else if (TT.isOSBinFormatCOFF()) { + assert(TT.isOSWindows() && "Windows is the only supported COFF target"); bool IsIndirect = (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)); @@ -932,7 +932,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, } return MCSym; - } else if (Subtarget->isTargetELF()) { + } else if (TT.isOSBinFormatELF()) { return getSymbolPreferLocal(*GV); } llvm_unreachable("unexpected target"); @@ -978,7 +978,8 @@ void ARMAsmPrinter::emitMachineConstantPoolValue( // On Darwin, const-pool entries may get the "FOO$non_lazy_ptr" mangling, so // flag the global as MO_NONLAZY. - unsigned char TF = Subtarget->isTargetMachO() ? ARMII::MO_NONLAZY : 0; + unsigned char TF = + TM.getTargetTriple().isOSBinFormatMachO() ? ARMII::MO_NONLAZY : 0; MCSym = GetARMGVSymbol(GV, TF); } else if (ACPV->isMachineBasicBlock()) { const MachineBasicBlock *MBB = cast(ACPV)->getMBB(); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 58bc338b25856..7ec232ae9bac5 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -318,17 +318,7 @@ bool ARMSubtarget::isRWPI() const { } bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { - if (!TM.shouldAssumeDSOLocal(GV)) - return true; - - // 32 bit macho has no relocation for a-b if a is undefined, even if b is in - // the section that is being relocated. This means we have to use o load even - // for GVs that are known to be local to the dso. - if (isTargetMachO() && TM.isPositionIndependent() && - (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) - return true; - - return false; + return TM.isGVIndirectSymbol(GV); } bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const { diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 34baa3108402c..4a0883cc662e7 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -377,7 +377,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } - bool enableTerminalRule() const override { return true; } bool useMachinePipeliner() const { return UseMIPipeliner; } bool hasMinSize() const { return OptMinSize; } bool isThumb1Only() const { return isThumb() && !hasThumb2(); } diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index c417c4c8bae65..1f74e9fdd1dc9 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -98,6 +98,20 @@ class ARMBaseTargetMachine : public CodeGenTargetMachineImpl { return true; } + bool isGVIndirectSymbol(const GlobalValue *GV) const { + if (!shouldAssumeDSOLocal(GV)) + return true; + + // 32 bit macho has no relocation for a-b if a is undefined, even if b is in + // the section that is being relocated. This means we have to use o load + // even for GVs that are known to be local to the dso. + if (getTargetTriple().isOSBinFormatMachO() && isPositionIndependent() && + (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) + return true; + + return false; + } + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h index f94f7997436ac..a0820572e5fed 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.h +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h @@ -22,7 +22,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include -#include namespace llvm { class Module; diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 7dfede249c63c..30794f61218a1 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -294,8 +294,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; - bool enableTerminalRule() const override { return true; } - // Always use the TargetLowering default scheduler. // FIXME: This will use the vliw scheduler which is probably just hurting // compiler time and will be removed eventually anyway. diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index c5e57d0df22a7..712bdbe2af187 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -21,7 +21,6 @@ #include "llvm/TargetParser/SubtargetFeature.h" #include #include -#include namespace llvm { diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index f588e56f2ea18..6b28531764db9 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -151,7 +151,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool IsCpRestoreSet; bool CurForbiddenSlotAttr; int CpRestoreOffset; - unsigned GPReg; + MCRegister GPReg; unsigned CpSaveLocation; /// If true, then CpSaveLocation is a register, otherwise it's an offset. bool CpSaveLocationIsRegister; @@ -823,7 +823,7 @@ class MipsOperand : public MCParsedAsmOperand { }; struct RegListOp { - SmallVector *List; + SmallVector *List; }; union { @@ -1377,15 +1377,15 @@ class MipsOperand : public MCParsedAsmOperand { if (Size < 2 || Size > 5) return false; - unsigned R0 = RegList.List->front(); - unsigned R1 = RegList.List->back(); + MCRegister R0 = RegList.List->front(); + MCRegister R1 = RegList.List->back(); if (!((R0 == Mips::S0 && R1 == Mips::RA) || (R0 == Mips::S0_64 && R1 == Mips::RA_64))) return false; - int PrevReg = *RegList.List->begin(); + MCRegister PrevReg = RegList.List->front(); for (int i = 1; i < Size - 1; i++) { - int Reg = (*(RegList.List))[i]; + MCRegister Reg = (*(RegList.List))[i]; if ( Reg != PrevReg + 1) return false; PrevReg = Reg; @@ -1447,7 +1447,7 @@ class MipsOperand : public MCParsedAsmOperand { return static_cast(getMemOff())->getValue(); } - const SmallVectorImpl &getRegList() const { + const SmallVectorImpl &getRegList() const { assert((Kind == k_RegList) && "Invalid access!"); return *(RegList.List); } @@ -1548,12 +1548,13 @@ class MipsOperand : public MCParsedAsmOperand { } static std::unique_ptr - CreateRegList(SmallVectorImpl &Regs, SMLoc StartLoc, SMLoc EndLoc, + CreateRegList(SmallVectorImpl &Regs, SMLoc StartLoc, SMLoc EndLoc, MipsAsmParser &Parser) { - assert(Regs.size() > 0 && "Empty list not allowed"); + assert(!Regs.empty() && "Empty list not allowed"); auto Op = std::make_unique(k_RegList, Parser); - Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); + Op->RegList.List = + new SmallVector(Regs.begin(), Regs.end()); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; return Op; @@ -1684,7 +1685,7 @@ class MipsOperand : public MCParsedAsmOperand { case k_RegList: OS << "RegList< "; for (auto Reg : (*RegList.List)) - OS << Reg << " "; + OS << Reg.id() << " "; OS << ">"; break; } @@ -6848,9 +6849,9 @@ ParseStatus MipsAsmParser::parseInvNum(OperandVector &Operands) { ParseStatus MipsAsmParser::parseRegisterList(OperandVector &Operands) { MCAsmParser &Parser = getParser(); - SmallVector Regs; - unsigned RegNo; - unsigned PrevReg = Mips::NoRegister; + SmallVector Regs; + MCRegister Reg; + MCRegister PrevReg; bool RegRange = false; SmallVector, 8> TmpOperands; @@ -6860,46 +6861,47 @@ ParseStatus MipsAsmParser::parseRegisterList(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); while (parseAnyRegister(TmpOperands).isSuccess()) { SMLoc E = getLexer().getLoc(); - MipsOperand &Reg = static_cast(*TmpOperands.back()); - RegNo = isGP64bit() ? Reg.getGPR64Reg() : Reg.getGPR32Reg(); + MipsOperand &RegOpnd = static_cast(*TmpOperands.back()); + Reg = isGP64bit() ? RegOpnd.getGPR64Reg() : RegOpnd.getGPR32Reg(); if (RegRange) { // Remove last register operand because registers from register range // should be inserted first. - if ((isGP64bit() && RegNo == Mips::RA_64) || - (!isGP64bit() && RegNo == Mips::RA)) { - Regs.push_back(RegNo); + if ((isGP64bit() && Reg == Mips::RA_64) || + (!isGP64bit() && Reg == Mips::RA)) { + Regs.push_back(Reg); } else { - unsigned TmpReg = PrevReg + 1; - while (TmpReg <= RegNo) { + MCRegister TmpReg = PrevReg + 1; + while (TmpReg <= Reg) { if ((((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) && !isGP64bit()) || (((TmpReg < Mips::S0_64) || (TmpReg > Mips::S7_64)) && isGP64bit())) return Error(E, "invalid register operand"); PrevReg = TmpReg; - Regs.push_back(TmpReg++); + Regs.push_back(TmpReg); + TmpReg = TmpReg.id() + 1; } } RegRange = false; } else { - if ((PrevReg == Mips::NoRegister) && - ((isGP64bit() && (RegNo != Mips::S0_64) && (RegNo != Mips::RA_64)) || - (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA)))) + if (!PrevReg.isValid() && + ((isGP64bit() && (Reg != Mips::S0_64) && (Reg != Mips::RA_64)) || + (!isGP64bit() && (Reg != Mips::S0) && (Reg != Mips::RA)))) return Error(E, "$16 or $31 expected"); - if (!(((RegNo == Mips::FP || RegNo == Mips::RA || - (RegNo >= Mips::S0 && RegNo <= Mips::S7)) && + if (!(((Reg == Mips::FP || Reg == Mips::RA || + (Reg >= Mips::S0 && Reg <= Mips::S7)) && !isGP64bit()) || - ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 || - (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) && + ((Reg == Mips::FP_64 || Reg == Mips::RA_64 || + (Reg >= Mips::S0_64 && Reg <= Mips::S7_64)) && isGP64bit()))) return Error(E, "invalid register operand"); - if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) && - ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) || - (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 && isGP64bit()))) + if (PrevReg.isValid() && (Reg != PrevReg + 1) && + ((Reg != Mips::FP && Reg != Mips::RA && !isGP64bit()) || + (Reg != Mips::FP_64 && Reg != Mips::RA_64 && isGP64bit()))) return Error(E, "consecutive register numbers expected"); - Regs.push_back(RegNo); + Regs.push_back(Reg); } if (Parser.getTok().is(AsmToken::Minus)) @@ -6913,7 +6915,7 @@ ParseStatus MipsAsmParser::parseRegisterList(OperandVector &Operands) { if (Parser.getTok().isNot(AsmToken::Dollar)) break; - PrevReg = RegNo; + PrevReg = Reg; } SMLoc E = Parser.getTok().getLoc(); @@ -7780,7 +7782,7 @@ bool MipsAsmParser::parseDirectiveCpLocal(SMLoc Loc) { } getParser().Lex(); // Consume the EndOfStatement. - unsigned NewReg = RegOpnd.getGPR32Reg(); + MCRegister NewReg = RegOpnd.getGPR32Reg(); if (IsPicEnabled) GPReg = NewReg; @@ -7835,7 +7837,6 @@ bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) { bool MipsAsmParser::parseDirectiveCPSetup() { MCAsmParser &Parser = getParser(); - unsigned FuncReg; unsigned Save; bool SaveIsReg = true; @@ -7852,7 +7853,7 @@ bool MipsAsmParser::parseDirectiveCPSetup() { return false; } - FuncReg = FuncRegOpnd.getGPR32Reg(); + MCRegister FuncReg = FuncRegOpnd.getGPR32Reg(); TmpReg.clear(); if (!eatComma("unexpected token, expected comma")) @@ -7878,7 +7879,7 @@ bool MipsAsmParser::parseDirectiveCPSetup() { reportParseError(SaveOpnd.getStartLoc(), "invalid register"); return false; } - Save = SaveOpnd.getGPR32Reg(); + Save = SaveOpnd.getGPR32Reg().id(); } if (!eatComma("unexpected token, expected comma")) @@ -8696,7 +8697,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { "expected general purpose register"); return false; } - unsigned StackReg = StackRegOpnd.getGPR32Reg(); + MCRegister StackReg = StackRegOpnd.getGPR32Reg(); if (Parser.getTok().is(AsmToken::Comma)) Parser.Lex(); diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 12e31c07aa15a..fd9eb9b8fe9a3 100644 --- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -103,7 +103,7 @@ LLVMInitializeMipsDisassembler() { createMipselDisassembler); } -static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) { +static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) { const MCRegisterInfo *RegInfo = D->getContext().getRegisterInfo(); return RegInfo->getRegClass(RC).getRegister(RegNo); } @@ -123,7 +123,7 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 30 || RegNo % 2) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo / 2); + MCRegister Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo / 2); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -134,7 +134,7 @@ static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::ACC64DSPRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::ACC64DSPRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -145,7 +145,7 @@ static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::HI32DSPRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::HI32DSPRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -156,7 +156,7 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::LO32DSPRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::LO32DSPRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -167,7 +167,7 @@ static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -178,7 +178,7 @@ static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::MSA128HRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::MSA128HRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -189,7 +189,7 @@ static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::MSA128WRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::MSA128WRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -200,7 +200,7 @@ static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::MSA128DRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::MSA128DRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -211,7 +211,7 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 7) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::MSACtrlRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::MSACtrlRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -222,7 +222,7 @@ static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::COP0RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::COP0RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -233,7 +233,7 @@ static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -881,7 +881,7 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::GPR64RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::GPR64RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -891,7 +891,7 @@ static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, unsigned RegNo, const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::GPRMM16RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::GPRMM16RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -901,7 +901,7 @@ DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::GPRMM16ZeroRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::GPRMM16ZeroRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -911,7 +911,7 @@ DecodeGPRMM16MovePRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::GPRMM16MovePRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::GPRMM16MovePRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -948,7 +948,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -974,7 +974,7 @@ static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -985,7 +985,7 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -995,7 +995,7 @@ static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo, const MCDisassembler *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::CCRRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::CCRRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -1005,7 +1005,7 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, unsigned RegNo, const MCDisassembler *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::FCCRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::FCCRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -1016,7 +1016,7 @@ static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, if (RegNo > 31) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::FGRCCRegClassID, RegNo); + MCRegister Reg = getReg(Decoder, Mips::FGRCCRegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -1024,11 +1024,11 @@ static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned RegNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); if (Inst.getOpcode() == Mips::SC || Inst.getOpcode() == Mips::SC64 || Inst.getOpcode() == Mips::SCD) @@ -1044,14 +1044,14 @@ static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeMemEVA(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn >> 7); - unsigned Reg = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned RegNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); - if (Inst.getOpcode() == Mips::SCE) - Inst.addOperand(MCOperand::createReg(Reg)); + if (Inst.getOpcode() == Mips::SCE) + Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1064,11 +1064,11 @@ static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); + unsigned RegNo = fieldFromInstruction(Insn, 21, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1081,9 +1081,9 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); unsigned Hint = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Offset)); @@ -1096,10 +1096,10 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<12>(Insn & 0xfff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); unsigned Hint = fieldFromInstruction(Insn, 21, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Offset)); @@ -1112,10 +1112,10 @@ static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn & 0x1ff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); unsigned Hint = fieldFromInstruction(Insn, 21, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Offset)); @@ -1129,9 +1129,9 @@ static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, unsigned Insn, const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn >> 7); unsigned Hint = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Offset)); @@ -1143,9 +1143,9 @@ static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Offset)); @@ -1157,9 +1157,9 @@ static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Offset)); @@ -1170,9 +1170,9 @@ static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeSynciR6(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Immediate = SignExtend32<16>(Insn & 0xffff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Base)); Inst.addOperand(MCOperand::createImm(Immediate)); @@ -1184,11 +1184,11 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10)); - unsigned Reg = fieldFromInstruction(Insn, 6, 5); - unsigned Base = fieldFromInstruction(Insn, 11, 5); + unsigned RegNo = fieldFromInstruction(Insn, 6, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 11, 5); - Reg = getReg(Decoder, Mips::MSA128BRegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1288,9 +1288,9 @@ static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { unsigned Offset = Insn & 0x1F; - unsigned Reg = fieldFromInstruction(Insn, 5, 5); + unsigned RegNo = fieldFromInstruction(Insn, 5, 5); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Mips::SP)); @@ -1303,9 +1303,9 @@ static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { unsigned Offset = Insn & 0x7F; - unsigned Reg = fieldFromInstruction(Insn, 7, 3); + unsigned RegNo = fieldFromInstruction(Insn, 7, 3); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Mips::GP)); @@ -1342,11 +1342,11 @@ static DecodeStatus DecodeMemMMImm9(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<9>(Insn & 0x1ff); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned RegNo = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); if (Inst.getOpcode() == Mips::SCE_MM || Inst.getOpcode() == Mips::SC_MMR6) Inst.addOperand(MCOperand::createReg(Reg)); @@ -1362,11 +1362,11 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<12>(Insn & 0x0fff); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned RegNo = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); switch (Inst.getOpcode()) { case Mips::SWM32_MM: @@ -1396,11 +1396,11 @@ static DecodeStatus DecodeMemMMImm16(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned RegNo = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1412,11 +1412,11 @@ static DecodeStatus DecodeMemMMImm16(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned RegNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1431,11 +1431,11 @@ static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn, // This function is the same as DecodeFMem but with the Reg and Base fields // swapped according to microMIPS spec. int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); + unsigned RegNo = fieldFromInstruction(Insn, 21, 5); - Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1447,11 +1447,11 @@ static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned RegNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Reg = getReg(Decoder, Mips::COP2RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1463,11 +1463,11 @@ static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn, uint64_t Address, static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned RegNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Reg = getReg(Decoder, Mips::COP3RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::COP3RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1480,11 +1480,11 @@ static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<11>(Insn & 0x07ff); - unsigned Reg = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 11, 5); + unsigned RegNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 11, 5); - Reg = getReg(Decoder, Mips::COP2RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1497,11 +1497,11 @@ static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int Offset = SignExtend32<11>(Insn & 0x07ff); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); - unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned RegNo = fieldFromInstruction(Insn, 21, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 16, 5); - Reg = getReg(Decoder, Mips::COP2RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1514,11 +1514,11 @@ static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn, uint64_t Address, const MCDisassembler *Decoder) { int64_t Offset = SignExtend64<9>((Insn >> 7) & 0x1ff); - unsigned Rt = fieldFromInstruction(Insn, 16, 5); - unsigned Base = fieldFromInstruction(Insn, 21, 5); + unsigned RtNo = fieldFromInstruction(Insn, 16, 5); + unsigned BaseNo = fieldFromInstruction(Insn, 21, 5); - Rt = getReg(Decoder, Mips::GPR32RegClassID, Rt); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + MCRegister Rt = getReg(Decoder, Mips::GPR32RegClassID, RtNo); + MCRegister Base = getReg(Decoder, Mips::GPR32RegClassID, BaseNo); if(Inst.getOpcode() == Mips::SC_R6 || Inst.getOpcode() == Mips::SCD_R6){ Inst.addOperand(MCOperand::createReg(Rt)); diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 6b013de274772..fd8eb33e20b26 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -67,7 +67,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { Streamer->popSection(); } -void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg, +void MipsRegInfoRecord::SetPhysRegUsed(MCRegister Reg, const MCRegisterInfo *MCRegInfo) { unsigned Value = 0; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 1e1b9703d8062..01f18acf050d7 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -126,9 +126,9 @@ void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips3D() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMips3D() { forbidModuleDirective(); } -void MipsTargetStreamer::emitDirectiveCpAdd(unsigned RegNo) {} -void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} -void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) { +void MipsTargetStreamer::emitDirectiveCpAdd(MCRegister Reg) {} +void MipsTargetStreamer::emitDirectiveCpLoad(MCRegister Reg) {} +void MipsTargetStreamer::emitDirectiveCpLocal(MCRegister Reg) { // .cplocal $reg // This directive forces to use the alternate register for context pointer. // For example @@ -141,17 +141,17 @@ void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) { if (!getABI().IsN32() && !getABI().IsN64()) return; - GPReg = RegNo; + GPReg = Reg; forbidModuleDirective(); } bool MipsTargetStreamer::emitDirectiveCpRestore( - int Offset, function_ref GetATReg, SMLoc IDLoc, + int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { forbidModuleDirective(); return true; } -void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, +void MipsTargetStreamer::emitDirectiveCpsetup(MCRegister Reg, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { } void MipsTargetStreamer::emitDirectiveCpreturn(unsigned SaveLocation, @@ -324,7 +324,7 @@ void MipsTargetStreamer::emitGPRestore(int Offset, SMLoc IDLoc, /// Emit a store instruction with an immediate offset. void MipsTargetStreamer::emitStoreWithImmOffset( unsigned Opcode, MCRegister SrcReg, MCRegister BaseReg, int64_t Offset, - function_ref GetATReg, SMLoc IDLoc, + function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { if (isInt<16>(Offset)) { emitRRI(Opcode, SrcReg, BaseReg, Offset, IDLoc, STI); @@ -729,38 +729,38 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask, OS << "," << FPUTopSavedRegOff << '\n'; } -void MipsTargetAsmStreamer::emitDirectiveCpAdd(unsigned RegNo) { +void MipsTargetAsmStreamer::emitDirectiveCpAdd(MCRegister Reg) { OS << "\t.cpadd\t$" - << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; + << StringRef(MipsInstPrinter::getRegisterName(Reg)).lower() << "\n"; forbidModuleDirective(); } -void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) { +void MipsTargetAsmStreamer::emitDirectiveCpLoad(MCRegister Reg) { OS << "\t.cpload\t$" - << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; + << StringRef(MipsInstPrinter::getRegisterName(Reg)).lower() << "\n"; forbidModuleDirective(); } -void MipsTargetAsmStreamer::emitDirectiveCpLocal(unsigned RegNo) { +void MipsTargetAsmStreamer::emitDirectiveCpLocal(MCRegister Reg) { OS << "\t.cplocal\t$" - << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; - MipsTargetStreamer::emitDirectiveCpLocal(RegNo); + << StringRef(MipsInstPrinter::getRegisterName(Reg)).lower() << "\n"; + MipsTargetStreamer::emitDirectiveCpLocal(Reg); } bool MipsTargetAsmStreamer::emitDirectiveCpRestore( - int Offset, function_ref GetATReg, SMLoc IDLoc, + int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { MipsTargetStreamer::emitDirectiveCpRestore(Offset, GetATReg, IDLoc, STI); OS << "\t.cprestore\t" << Offset << "\n"; return true; } -void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, +void MipsTargetAsmStreamer::emitDirectiveCpsetup(MCRegister Reg, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { OS << "\t.cpsetup\t$" - << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << ", "; + << StringRef(MipsInstPrinter::getRegisterName(Reg)).lower() << ", "; if (IsReg) OS << "$" @@ -1229,18 +1229,18 @@ void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask, FPROffset = FPUTopSavedRegOff; } -void MipsTargetELFStreamer::emitDirectiveCpAdd(unsigned RegNo) { +void MipsTargetELFStreamer::emitDirectiveCpAdd(MCRegister Reg) { // .cpadd $reg // This directive inserts code to add $gp to the argument's register // when support for position independent code is enabled. if (!Pic) return; - emitAddu(RegNo, RegNo, GPReg, getABI().IsN64(), &STI); + emitAddu(Reg, Reg, GPReg, getABI().IsN64(), &STI); forbidModuleDirective(); } -void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { +void MipsTargetELFStreamer::emitDirectiveCpLoad(MCRegister Reg) { // .cpload $reg // This directive expands to: // lui $gp, %hi(_gp_disp) @@ -1283,19 +1283,19 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { TmpInst.setOpcode(Mips::ADDu); TmpInst.addOperand(MCOperand::createReg(GPReg)); TmpInst.addOperand(MCOperand::createReg(GPReg)); - TmpInst.addOperand(MCOperand::createReg(RegNo)); + TmpInst.addOperand(MCOperand::createReg(Reg)); getStreamer().emitInstruction(TmpInst, STI); forbidModuleDirective(); } -void MipsTargetELFStreamer::emitDirectiveCpLocal(unsigned RegNo) { +void MipsTargetELFStreamer::emitDirectiveCpLocal(MCRegister Reg) { if (Pic) - MipsTargetStreamer::emitDirectiveCpLocal(RegNo); + MipsTargetStreamer::emitDirectiveCpLocal(Reg); } bool MipsTargetELFStreamer::emitDirectiveCpRestore( - int Offset, function_ref GetATReg, SMLoc IDLoc, + int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { MipsTargetStreamer::emitDirectiveCpRestore(Offset, GetATReg, IDLoc, STI); // .cprestore offset @@ -1315,7 +1315,7 @@ bool MipsTargetELFStreamer::emitDirectiveCpRestore( return true; } -void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, +void MipsTargetELFStreamer::emitDirectiveCpsetup(MCRegister Reg, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { @@ -1353,9 +1353,9 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, // (d)addu $gp, $gp, $funcreg if (getABI().IsN32()) - emitRRR(Mips::ADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); + emitRRR(Mips::ADDu, GPReg, GPReg, Reg, SMLoc(), &STI); else - emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); + emitRRR(Mips::DADDu, GPReg, GPReg, Reg, SMLoc(), &STI); } void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation, diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.h index b726a80ce6b72..71b5d165a9cb3 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.h @@ -98,13 +98,13 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveSetHardFloat(); // PIC support - virtual void emitDirectiveCpAdd(unsigned RegNo); - virtual void emitDirectiveCpLoad(unsigned RegNo); - virtual void emitDirectiveCpLocal(unsigned RegNo); + virtual void emitDirectiveCpAdd(MCRegister Reg); + virtual void emitDirectiveCpLoad(MCRegister Reg); + virtual void emitDirectiveCpLocal(MCRegister Reg); virtual bool emitDirectiveCpRestore(int Offset, - function_ref GetATReg, + function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + virtual void emitDirectiveCpsetup(MCRegister Reg, int RegOrOffset, const MCSymbol &Sym, bool IsReg); virtual void emitDirectiveCpreturn(unsigned SaveLocation, bool SaveLocationIsRegister); @@ -164,7 +164,7 @@ class MipsTargetStreamer : public MCTargetStreamer { /// by reporting an error). void emitStoreWithImmOffset(unsigned Opcode, MCRegister SrcReg, MCRegister BaseReg, int64_t Offset, - function_ref GetATReg, SMLoc IDLoc, + function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); void emitLoadWithImmOffset(unsigned Opcode, MCRegister DstReg, MCRegister BaseReg, int64_t Offset, @@ -205,7 +205,7 @@ class MipsTargetStreamer : public MCTargetStreamer { bool FrameInfoSet; int FrameOffset; unsigned FrameReg; - unsigned GPReg; + MCRegister GPReg; unsigned ReturnReg; private: @@ -290,9 +290,9 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { void emitDirectiveSetHardFloat() override; // PIC support - void emitDirectiveCpAdd(unsigned RegNo) override; - void emitDirectiveCpLoad(unsigned RegNo) override; - void emitDirectiveCpLocal(unsigned RegNo) override; + void emitDirectiveCpAdd(MCRegister Reg) override; + void emitDirectiveCpLoad(MCRegister Reg) override; + void emitDirectiveCpLocal(MCRegister Reg) override; /// Emit a .cprestore directive. If the offset is out of range then it will /// be synthesized using the assembler temporary. @@ -301,9 +301,9 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { /// temporary and is only called when the assembler temporary is required. It /// must handle the case where no assembler temporary is available (typically /// by reporting an error). - bool emitDirectiveCpRestore(int Offset, function_ref GetATReg, + bool emitDirectiveCpRestore(int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) override; - void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + void emitDirectiveCpsetup(MCRegister Reg, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; void emitDirectiveCpreturn(unsigned SaveLocation, bool SaveLocationIsRegister) override; @@ -370,12 +370,12 @@ class MipsTargetELFStreamer : public MipsTargetStreamer { void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override; // PIC support - void emitDirectiveCpAdd(unsigned RegNo) override; - void emitDirectiveCpLoad(unsigned RegNo) override; - void emitDirectiveCpLocal(unsigned RegNo) override; - bool emitDirectiveCpRestore(int Offset, function_ref GetATReg, + void emitDirectiveCpAdd(MCRegister Reg) override; + void emitDirectiveCpLoad(MCRegister Reg) override; + void emitDirectiveCpLocal(MCRegister Reg) override; + bool emitDirectiveCpRestore(int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) override; - void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + void emitDirectiveCpsetup(MCRegister Reg, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; void emitDirectiveCpreturn(unsigned SaveLocation, bool SaveLocationIsRegister) override; diff --git a/llvm/lib/Target/Mips/MipsOptionRecord.h b/llvm/lib/Target/Mips/MipsOptionRecord.h index 7897095ef8941..2107baf9f14e5 100644 --- a/llvm/lib/Target/Mips/MipsOptionRecord.h +++ b/llvm/lib/Target/Mips/MipsOptionRecord.h @@ -58,7 +58,7 @@ class MipsRegInfoRecord : public MipsOptionRecord { ~MipsRegInfoRecord() override = default; void EmitMipsOptionRecord() override; - void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo); + void SetPhysRegUsed(MCRegister Reg, const MCRegisterInfo *MCRegInfo); private: MipsELFStreamer *Streamer; diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 23d6d8853800f..fe1eea2b33615 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -889,6 +889,7 @@ def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)), (v16i8 (VRLB v16i8:$vA, v16i8:$vB))>; def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)), (v8i16 (VRLH v8i16:$vA, v8i16:$vB))>; +let Predicates = [IsNotISAFuture] in def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index dfbbba0116f25..e417ffe6d3677 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -420,8 +420,10 @@ let Predicates = [HasVSX, IsISAFuture] in { : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), "vucmprlh $VRT, $VRA, $VRB", []>; - def XVRLW: XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xvrlw $XT, $XA, $XB", []>; + def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvrlw $XT, $XA, $XB", + [(set v4i32:$XT, (int_ppc_vsx_xvrlw v4i32:$XA, + v4i32:$XB))]>; // AES Acceleration Instructions def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp), @@ -550,6 +552,10 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp, $RA, $RB)>; def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp, $RA, $RB)>; +let Predicates = [HasVSX, IsISAFuture] in { + def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA, + v4i32:$vB))>; +} //---------------------------- Instruction aliases ---------------------------// // Predicate combinations available: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 637f1943b8511..5a081d54d0726 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16878,12 +16878,23 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, break; } - // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X) int ShX; if (int ShY = isShifted359(MulAmt - 1, ShX)) { assert(ShX != 0 && "MulAmt=4,6,10 handled before"); + // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X) if (ShX <= 3) return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift); + // 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X) + if (Shift == 0) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue X = N->getOperand(0); + SDValue Shl = + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT)); + SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl, + DAG.getTargetConstant(ShY, DL, VT), Shl); + return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X); + } } return SDValue(); } @@ -16944,7 +16955,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, DAG.getTargetConstant(Shift, DL, VT), Shift1); } - // TODO: 2^(C1>3) * 3,5,9 +/- 1 + // TODO: 2^(C1>3) * 3/5/9 - 1 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X)) if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) { diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 41071b29e5c9e..4271a6816e05b 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -750,39 +750,82 @@ foreach mx = SchedMxList in { } // 16. Vector Permutation Instructions +// Slide foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX.c; - defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>; + // Latency for slide up: 4/4/8/16, ReleaseAtCycles is 2/4/8/16 + defvar VSlideUpLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + defvar VSlideUpOcc = ConstOneUntilMF2ThenDouble.c; + let Latency = VSlideUpLat, ReleaseAtCycles =[VSlideUpOcc] in { + defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>; + } - defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>; + // Latency for slide down: 4/5/9/17, ReleaseAtCycles is 3/5/9/17 + defvar VSlideDownLat = GetLMULValue<[4, 4, 4, 4, 5, 9, 17], mx>.c; + defvar VSlideDownOcc = GetLMULValue<[1, 1, 1, 3, 5, 9, 17], mx>.c; + let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in { + defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>; + } + // The following group slide up and down together, so we use the worst-case + // (slide down) for all. + let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in { + defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>; + } } -def : WriteRes; -def : WriteRes; - -def : WriteRes; -def : WriteRes; +// ReleaseAtCycles is 2/2/2/2/2/3/6, but we can't set based on MX for now +// TODO: Split this into separate WriteRes for each MX +let Latency = 6, ReleaseAtCycles = [6] in { + def : WriteRes; +} -// Gather and Compress -foreach mx = SchedMxList in { - foreach sew = SchedSEWSet.val in { - defvar IsWorstCase = SMX60IsWorstCaseMXSEW.c; - defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCase>; - defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCase>; - } +// ReleaseAtCycles is 1/1/1/1/1/2/4, but we can't set based on MX for now +// TODO: Split this into separate WriteRes for each MX +let Latency = 4, ReleaseAtCycles = [4] in { + def : WriteRes; + def : WriteRes; + def : WriteRes; } +// Integer LMUL Gather and Compress foreach mx = SchedMxList in { defvar IsWorstCase = SMX60IsWorstCaseMX.c; - defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>; - defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>; + defvar VRGatherLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c; + let Latency = VRGatherLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble.c] in { + defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>; + } + + foreach sew = SchedSEWSet.val in { + defvar IsWorstCaseSEW = SMX60IsWorstCaseMXSEW.c; + + defvar VRGatherVVLat = GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c; + defvar VRGatherVVOcc = GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c; + let Latency = VRGatherVVLat, ReleaseAtCycles = [VRGatherVVOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>; + } + // For sew == 8, latency is half of the other cases, except for the fractional LMULs (const 4 cycles) + defvar VRGatherEI16Lat = !if(!eq(sew, 8), + GetLMULValue<[4, 4, 4, 8, 32, 128, 256], mx>.c, + GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c); + defvar VRGatherEI16Occ = !if(!eq(sew, 8), + GetLMULValue<[1, 1, 2, 8, 32, 128, 256], mx>.c, + GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c); + let Latency = VRGatherEI16Lat, ReleaseAtCycles = [VRGatherEI16Occ] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>; + } + + defvar VCompressVLat = GetLMULValue<[4, 4, 4, 4, 10, 36, 136], mx>.c; + defvar VCompressVOcc = GetLMULValue<[1, 1, 1, 3, 10, 36, 136], mx>.c; + let Latency = VCompressVLat, ReleaseAtCycles = [VCompressVOcc] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>; + } + } } // Others diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index f05115dbeb8cb..29df53c6c9893 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -146,7 +146,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { } bool enableMachineScheduler() const override { return true; } - bool enableTerminalRule() const override { return true; } bool enablePostRAScheduler() const override { return UsePostRAScheduler; } diff --git a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp index 3e4a58a20f942..0798483462e18 100644 --- a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp @@ -21,6 +21,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include #include +#include #define DEBUG_TYPE "spirv-convergence-region-analysis" diff --git a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h index ed0a1e10562a8..7f4e1a1791e9e 100644 --- a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h +++ b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h @@ -20,7 +20,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" #include -#include namespace llvm { class IntrinsicInst; diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h index f9ba5e2d55cba..d36453a4f078d 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h @@ -15,7 +15,6 @@ #include "llvm/Support/DataTypes.h" #include -#include namespace llvm { class MCAsmBackend; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 74de51c7eb1cc..e67b138afafec 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -1391,7 +1391,7 @@ class DarwinX86AsmBackend : public X86AsmBackend { return CU::UNWIND_MODE_DWARF; MCRegister Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); - SavedRegs[SavedRegIdx++] = Reg; + SavedRegs[SavedRegIdx++] = Reg.id(); StackAdjust += OffsetSize; MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset())); InstrOffset += PushInstrSize(Reg); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index 759d95e5a18ea..88dd5431f586b 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -451,7 +451,7 @@ void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo, // the assembly would look something like: // "vp2intersect %zmm5, %zmm7, {%k2, %k3}" // but this can work too. - switch (MI->getOperand(OpNo).getReg()) { + switch (MI->getOperand(OpNo).getReg().id()) { case X86::K0_K1: printRegName(OS, X86::K0); return; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index af5a69899844c..0c874b7e6d674 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -535,7 +535,7 @@ bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, const MCRegisterClass &VR128XRC = MRI.getRegClass(X86::VR128XRegClassID); const MCRegisterClass &VR256XRC = MRI.getRegClass(X86::VR256XRegClassID); - auto ClearsSuperReg = [=](unsigned RegID) { + auto ClearsSuperReg = [=](MCRegister RegID) { // On X86-64, a general purpose integer register is viewed as a 64-bit // register internal to the processor. // An update to the lower 32 bits of a 64 bit integer register is diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp index 9c442319c220f..b722964a571b3 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp @@ -55,6 +55,7 @@ struct FPOInstruction { StackAlign, SetFrame, } Op; + // FIXME: This should be a union of MCRegister and unsigned. unsigned RegOrOffset; }; @@ -215,7 +216,7 @@ bool X86WinCOFFTargetStreamer::emitFPOSetFrame(MCRegister Reg, SMLoc L) { FPOInstruction Inst; Inst.Label = emitFPOLabel(); Inst.Op = FPOInstruction::SetFrame; - Inst.RegOrOffset = Reg; + Inst.RegOrOffset = Reg.id(); CurFPOData->Instructions.push_back(Inst); return false; } @@ -226,7 +227,7 @@ bool X86WinCOFFTargetStreamer::emitFPOPushReg(MCRegister Reg, SMLoc L) { FPOInstruction Inst; Inst.Label = emitFPOLabel(); Inst.Op = FPOInstruction::PushReg; - Inst.RegOrOffset = Reg; + Inst.RegOrOffset = Reg.id(); CurFPOData->Instructions.push_back(Inst); return false; } diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 2b83d575ace91..200ca80adb232 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -84,7 +84,14 @@ FunctionPass *createX86AvoidStoreForwardingBlocks(); FunctionPass *createX86FlagsCopyLoweringPass(); /// Return a pass that expands DynAlloca pseudo-instructions. -FunctionPass *createX86DynAllocaExpander(); +class X86DynAllocaExpanderPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + +FunctionPass *createX86DynAllocaExpanderLegacyPass(); /// Return a pass that config the tile registers. FunctionPass *createX86TileConfigPass(); @@ -108,7 +115,6 @@ FunctionPass *createX86LowerTileCopyPass(); class X86AvoidTrailingCallPass : public PassInfoMixin { public: - X86AvoidTrailingCallPass() = default; PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM); static bool isRequired() { return true; } @@ -237,7 +243,7 @@ void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); void initializeX86DAGToDAGISelLegacyPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); -void initializeX86DynAllocaExpanderPass(PassRegistry &); +void initializeX86DynAllocaExpanderLegacyPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FastPreTileConfigPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp index c2a06efd4d46e..10f46f71bbbbd 100644 --- a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp +++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp @@ -20,22 +20,22 @@ #include "X86Subtarget.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionAnalysisManager.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/Analysis.h" #include "llvm/IR/Function.h" using namespace llvm; namespace { -class X86DynAllocaExpander : public MachineFunctionPass { +class X86DynAllocaExpander { public: - X86DynAllocaExpander() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override; + bool run(MachineFunction &MF); private: /// Strategies for lowering a DynAlloca. @@ -61,22 +61,30 @@ class X86DynAllocaExpander : public MachineFunctionPass { unsigned SlotSize = 0; int64_t StackProbeSize = 0; bool NoStackArgProbe = false; +}; + +class X86DynAllocaExpanderLegacy : public MachineFunctionPass { +public: + X86DynAllocaExpanderLegacy() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; +private: StringRef getPassName() const override { return "X86 DynAlloca Expander"; } public: static char ID; }; -char X86DynAllocaExpander::ID = 0; +char X86DynAllocaExpanderLegacy::ID = 0; } // end anonymous namespace -INITIALIZE_PASS(X86DynAllocaExpander, "x86-dyn-alloca-expander", +INITIALIZE_PASS(X86DynAllocaExpanderLegacy, "x86-dyn-alloca-expander", "X86 DynAlloca Expander", false, false) -FunctionPass *llvm::createX86DynAllocaExpander() { - return new X86DynAllocaExpander(); +FunctionPass *llvm::createX86DynAllocaExpanderLegacyPass() { + return new X86DynAllocaExpanderLegacy(); } /// Return the allocation amount for a DynAlloca instruction, or -1 if unknown. @@ -277,7 +285,7 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { AmountDef->eraseFromParent(); } -bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) { +bool X86DynAllocaExpander::run(MachineFunction &MF) { if (!MF.getInfo()->hasDynAlloca()) return false; @@ -299,3 +307,19 @@ bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) { return true; } + +bool X86DynAllocaExpanderLegacy::runOnMachineFunction(MachineFunction &MF) { + return X86DynAllocaExpander().run(MF); +} + +PreservedAnalyses +X86DynAllocaExpanderPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + bool Changed = X86DynAllocaExpander().run(MF); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = PreservedAnalyses::none(); + PA.preserveSet(); + return PA; +} diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def index 52463622026d7..0d7095b18daa8 100644 --- a/llvm/lib/Target/X86/X86PassRegistry.def +++ b/llvm/lib/Target/X86/X86PassRegistry.def @@ -30,6 +30,7 @@ DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass()) #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) #endif MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass()) +MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpanderPass()) MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this)) #undef MACHINE_FUNCTION_PASS @@ -42,7 +43,6 @@ DUMMY_MACHINE_FUNCTION_PASS("x86-cmov-conversion", X86CmovConverterPass()) DUMMY_MACHINE_FUNCTION_PASS("x86-codege", FPS()) DUMMY_MACHINE_FUNCTION_PASS("x86-compress-evex", CompressEVEXPass()) DUMMY_MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignment()) -DUMMY_MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpander()) DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix()) DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig()) DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig()) diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 4f5aadca361fe..868f41375b96b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -419,8 +419,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } - bool enableTerminalRule() const override { return true; } - bool enableEarlyIfConversion() const override; void getPostRAMutations(std::vector> diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index d4ad98af9b30c..c1214149dfa1d 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,7 +104,7 @@ extern "C" LLVM_C_ABI void LLVMInitializeX86Target() { initializeX86AsmPrinterPass(PR); initializeX86FixupInstTuningPassPass(PR); initializeX86FixupVectorConstantsPassPass(PR); - initializeX86DynAllocaExpanderPass(PR); + initializeX86DynAllocaExpanderLegacyPass(PR); initializeX86SuppressAPXForRelocationPassPass(PR); initializeX86WinEHUnwindV2Pass(PR); } @@ -516,7 +516,7 @@ void X86PassConfig::addPreRegAlloc() { addPass(createX86SpeculativeLoadHardeningPass()); addPass(createX86FlagsCopyLoweringPass()); - addPass(createX86DynAllocaExpander()); + addPass(createX86DynAllocaExpanderLegacyPass()); if (getOptLevel() != CodeGenOptLevel::None) addPass(createX86PreTileConfigPass()); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index aa1346d9ee56a..94663ff928a0b 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -78,7 +78,6 @@ #include #include #include -#include #include #include #include diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 70b8614826826..b9fb7a3ae4b5b 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -18,6 +18,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/LongestCommonSequence.h" +#include + using namespace llvm; using namespace sampleprof; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index bb517aad31f19..e8fea6851dae5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1431,11 +1431,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { if (auto *Store = dyn_cast(U)) { // VPWidenStore doesn't have users, and stores are always - // profitable to widen: hence, permitting single-scalar stored - // values is an important leaf condition. The assert must hold as - // we checked the RepOrWidenR operand against - // vputils::isSingleScalar. - assert(RepOrWidenR == Store->getAddr() || + // profitable to widen: hence, permitting address and mask + // operands, and single-scalar stored values is an important leaf + // condition. The assert must hold as we checked the RepOrWidenR + // operand against vputils::isSingleScalar. + assert(RepOrWidenR != Store->getStoredValue() || vputils::isSingleScalar(Store->getStoredValue())); return true; } diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index ed3a0a0ab023d..f1890e4f5fb95 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -129,7 +129,9 @@ class VectorCombine { bool foldExtractedCmps(Instruction &I); bool foldBinopOfReductions(Instruction &I); bool foldSingleElementStore(Instruction &I); - bool scalarizeLoadExtract(Instruction &I); + bool scalarizeLoad(Instruction &I); + bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy, Value *Ptr); + bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy, Value *Ptr); bool scalarizeExtExtract(Instruction &I); bool foldConcatOfBoolMasks(Instruction &I); bool foldPermuteOfBinops(Instruction &I); @@ -1852,11 +1854,9 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) { return false; } -/// Try to scalarize vector loads feeding extractelement instructions. -bool VectorCombine::scalarizeLoadExtract(Instruction &I) { - if (!TTI.allowVectorElementIndexingUsingGEP()) - return false; - +/// Try to scalarize vector loads feeding extractelement or bitcast +/// instructions. +bool VectorCombine::scalarizeLoad(Instruction &I) { Value *Ptr; if (!match(&I, m_Load(m_Value(Ptr)))) return false; @@ -1866,35 +1866,30 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { if (LI->isVolatile() || !DL->typeSizeEqualsStoreSize(VecTy->getScalarType())) return false; - InstructionCost OriginalCost = - TTI.getMemoryOpCost(Instruction::Load, VecTy, LI->getAlign(), - LI->getPointerAddressSpace(), CostKind); - InstructionCost ScalarizedCost = 0; - + bool AllExtracts = true; + bool AllBitcasts = true; Instruction *LastCheckedInst = LI; unsigned NumInstChecked = 0; - DenseMap NeedFreeze; - auto FailureGuard = make_scope_exit([&]() { - // If the transform is aborted, discard the ScalarizationResults. - for (auto &Pair : NeedFreeze) - Pair.second.discard(); - }); - // Check if all users of the load are extracts with no memory modifications - // between the load and the extract. Compute the cost of both the original - // code and the scalarized version. + // Check what type of users we have (must either all be extracts or + // bitcasts) and ensure no memory modifications between the load and + // its users. for (User *U : LI->users()) { - auto *UI = dyn_cast(U); + auto *UI = dyn_cast(U); if (!UI || UI->getParent() != LI->getParent()) return false; - // If any extract is waiting to be erased, then bail out as this will + // If any user is waiting to be erased, then bail out as this will // distort the cost calculation and possibly lead to infinite loops. if (UI->use_empty()) return false; - // Check if any instruction between the load and the extract may modify - // memory. + if (!isa(UI)) + AllExtracts = false; + if (!isa(UI)) + AllBitcasts = false; + + // Check if any instruction between the load and the user may modify memory. if (LastCheckedInst->comesBefore(UI)) { for (Instruction &I : make_range(std::next(LI->getIterator()), UI->getIterator())) { @@ -1906,6 +1901,35 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { } LastCheckedInst = UI; } + } + + if (AllExtracts) + return scalarizeLoadExtract(LI, VecTy, Ptr); + if (AllBitcasts) + return scalarizeLoadBitcast(LI, VecTy, Ptr); + return false; +} + +/// Try to scalarize vector loads feeding extractelement instructions. +bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy, + Value *Ptr) { + if (!TTI.allowVectorElementIndexingUsingGEP()) + return false; + + DenseMap NeedFreeze; + auto FailureGuard = make_scope_exit([&]() { + // If the transform is aborted, discard the ScalarizationResults. + for (auto &Pair : NeedFreeze) + Pair.second.discard(); + }); + + InstructionCost OriginalCost = + TTI.getMemoryOpCost(Instruction::Load, VecTy, LI->getAlign(), + LI->getPointerAddressSpace(), CostKind); + InstructionCost ScalarizedCost = 0; + + for (User *U : LI->users()) { + auto *UI = cast(U); auto ScalarIdx = canScalarizeAccess(VecTy, UI->getIndexOperand(), LI, AC, DT); @@ -1927,7 +1951,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { nullptr, nullptr, CostKind); } - LLVM_DEBUG(dbgs() << "Found all extractions of a vector load: " << I + LLVM_DEBUG(dbgs() << "Found all extractions of a vector load: " << *LI << "\n LoadExtractCost: " << OriginalCost << " vs ScalarizedCost: " << ScalarizedCost << "\n"); @@ -1973,6 +1997,72 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { return true; } +/// Try to scalarize vector loads feeding bitcast instructions. +bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy, + Value *Ptr) { + InstructionCost OriginalCost = + TTI.getMemoryOpCost(Instruction::Load, VecTy, LI->getAlign(), + LI->getPointerAddressSpace(), CostKind); + + Type *TargetScalarType = nullptr; + unsigned VecBitWidth = DL->getTypeSizeInBits(VecTy); + + for (User *U : LI->users()) { + auto *BC = cast(U); + + Type *DestTy = BC->getDestTy(); + if (!DestTy->isIntegerTy() && !DestTy->isFloatingPointTy()) + return false; + + unsigned DestBitWidth = DL->getTypeSizeInBits(DestTy); + if (DestBitWidth != VecBitWidth) + return false; + + // All bitcasts must target the same scalar type. + if (!TargetScalarType) + TargetScalarType = DestTy; + else if (TargetScalarType != DestTy) + return false; + + OriginalCost += + TTI.getCastInstrCost(Instruction::BitCast, TargetScalarType, VecTy, + TTI.getCastContextHint(BC), CostKind, BC); + } + + if (!TargetScalarType) + return false; + + assert(!LI->user_empty() && "Unexpected load without bitcast users"); + InstructionCost ScalarizedCost = + TTI.getMemoryOpCost(Instruction::Load, TargetScalarType, LI->getAlign(), + LI->getPointerAddressSpace(), CostKind); + + LLVM_DEBUG(dbgs() << "Found vector load feeding only bitcasts: " << *LI + << "\n OriginalCost: " << OriginalCost + << " vs ScalarizedCost: " << ScalarizedCost << "\n"); + + if (ScalarizedCost >= OriginalCost) + return false; + + // Ensure we add the load back to the worklist BEFORE its users so they can + // erased in the correct order. + Worklist.push(LI); + + Builder.SetInsertPoint(LI); + auto *ScalarLoad = + Builder.CreateLoad(TargetScalarType, Ptr, LI->getName() + ".scalar"); + ScalarLoad->setAlignment(LI->getAlign()); + ScalarLoad->copyMetadata(*LI); + + // Replace all bitcast users with the scalar load. + for (User *U : LI->users()) { + auto *BC = cast(U); + replaceValue(*BC, *ScalarLoad, false); + } + + return true; +} + bool VectorCombine::scalarizeExtExtract(Instruction &I) { if (!TTI.allowVectorElementIndexingUsingGEP()) return false; @@ -4585,7 +4675,7 @@ bool VectorCombine::run() { if (IsVectorType) { if (scalarizeOpOrCmp(I)) return true; - if (scalarizeLoadExtract(I)) + if (scalarizeLoad(I)) return true; if (scalarizeExtExtract(I)) return true; diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll index e1bc7426ad63e..179df026e25d6 100644 --- a/llvm/test/CodeGen/AArch64/frem-power2.ll +++ b/llvm/test/CodeGen/AArch64/frem-power2.ll @@ -85,6 +85,84 @@ entry: ret float %fmod } +define float @frem2_exp(float %x) #0 { +; CHECK-SD-LABEL: frem2_exp: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: bl expf +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: frem2_exp: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: bl expf +; CHECK-GI-NEXT: fmov s1, #2.00000000 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: b fmodf +entry: + %a = tail call float @llvm.exp.f32(float %x) + %fmod = frem float %a, 2.0 + ret float %fmod +} + +define float @frem2_exp2(float %x) #0 { +; CHECK-SD-LABEL: frem2_exp2: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: bl exp2f +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: frem2_exp2: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: bl exp2f +; CHECK-GI-NEXT: fmov s1, #2.00000000 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: b fmodf +entry: + %a = tail call float @llvm.exp2.f32(float %x) + %fmod = frem float %a, 2.0 + ret float %fmod +} + +define float @frem2_exp10(float %x) #0 { +; CHECK-SD-LABEL: frem2_exp10: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: bl exp10f +; CHECK-SD-NEXT: fmov s1, #0.50000000 +; CHECK-SD-NEXT: fmov s2, #-2.00000000 +; CHECK-SD-NEXT: fmul s1, s0, s1 +; CHECK-SD-NEXT: frintz s1, s1 +; CHECK-SD-NEXT: fmadd s0, s1, s2, s0 +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: frem2_exp10: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: bl exp10f +; CHECK-GI-NEXT: fmov s1, #2.00000000 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: b fmodf +entry: + %a = tail call float @llvm.exp10.f32(float %x) + %fmod = frem float %a, 2.0 + ret float %fmod +} + define half @hrem2_nsz(half %x) { ; CHECK-SD-LABEL: hrem2_nsz: ; CHECK-SD: // %bb.0: // %entry @@ -630,3 +708,5 @@ entry: %fmod = frem float -12.50, %y ret float %fmod } + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll index 7f07ef476b8aa..1db776ea6f616 100644 --- a/llvm/test/CodeGen/AArch64/fsh.ll +++ b/llvm/test/CodeGen/AArch64/fsh.ll @@ -3537,27 +3537,22 @@ define <7 x i32> @rotl_v7i32_c(<7 x i32> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fmov s0, w0 ; CHECK-SD-NEXT: fmov s1, w4 -; CHECK-SD-NEXT: adrp x8, .LCPI108_0 -; CHECK-SD-NEXT: adrp x9, .LCPI108_1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI108_0] -; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI108_1] ; CHECK-SD-NEXT: mov v0.s[1], w1 ; CHECK-SD-NEXT: mov v1.s[1], w5 ; CHECK-SD-NEXT: mov v0.s[2], w2 ; CHECK-SD-NEXT: mov v1.s[2], w6 ; CHECK-SD-NEXT: mov v0.s[3], w3 -; CHECK-SD-NEXT: ushl v2.4s, v1.4s, v2.4s -; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v3.4s -; CHECK-SD-NEXT: shl v4.4s, v0.4s, #3 -; CHECK-SD-NEXT: usra v4.4s, v0.4s, #29 -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: mov w1, v4.s[1] -; CHECK-SD-NEXT: mov w2, v4.s[2] -; CHECK-SD-NEXT: mov w3, v4.s[3] -; CHECK-SD-NEXT: mov w5, v0.s[1] -; CHECK-SD-NEXT: mov w6, v0.s[2] -; CHECK-SD-NEXT: fmov w0, s4 -; CHECK-SD-NEXT: fmov w4, s0 +; CHECK-SD-NEXT: shl v3.4s, v1.4s, #3 +; CHECK-SD-NEXT: usra v3.4s, v1.4s, #29 +; CHECK-SD-NEXT: shl v2.4s, v0.4s, #3 +; CHECK-SD-NEXT: mov w5, v3.s[1] +; CHECK-SD-NEXT: mov w6, v3.s[2] +; CHECK-SD-NEXT: fmov w4, s3 +; CHECK-SD-NEXT: usra v2.4s, v0.4s, #29 +; CHECK-SD-NEXT: mov w1, v2.s[1] +; CHECK-SD-NEXT: mov w2, v2.s[2] +; CHECK-SD-NEXT: mov w3, v2.s[3] +; CHECK-SD-NEXT: fmov w0, s2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: rotl_v7i32_c: @@ -3614,27 +3609,22 @@ define <7 x i32> @rotr_v7i32_c(<7 x i32> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fmov s0, w0 ; CHECK-SD-NEXT: fmov s1, w4 -; CHECK-SD-NEXT: adrp x8, .LCPI109_0 -; CHECK-SD-NEXT: adrp x9, .LCPI109_1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI109_0] -; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI109_1] ; CHECK-SD-NEXT: mov v0.s[1], w1 ; CHECK-SD-NEXT: mov v1.s[1], w5 ; CHECK-SD-NEXT: mov v0.s[2], w2 ; CHECK-SD-NEXT: mov v1.s[2], w6 ; CHECK-SD-NEXT: mov v0.s[3], w3 -; CHECK-SD-NEXT: ushl v2.4s, v1.4s, v2.4s -; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v3.4s -; CHECK-SD-NEXT: shl v4.4s, v0.4s, #29 -; CHECK-SD-NEXT: usra v4.4s, v0.4s, #3 -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: mov w1, v4.s[1] -; CHECK-SD-NEXT: mov w2, v4.s[2] -; CHECK-SD-NEXT: mov w3, v4.s[3] -; CHECK-SD-NEXT: mov w5, v0.s[1] -; CHECK-SD-NEXT: mov w6, v0.s[2] -; CHECK-SD-NEXT: fmov w0, s4 -; CHECK-SD-NEXT: fmov w4, s0 +; CHECK-SD-NEXT: shl v3.4s, v1.4s, #29 +; CHECK-SD-NEXT: usra v3.4s, v1.4s, #3 +; CHECK-SD-NEXT: shl v2.4s, v0.4s, #29 +; CHECK-SD-NEXT: mov w5, v3.s[1] +; CHECK-SD-NEXT: mov w6, v3.s[2] +; CHECK-SD-NEXT: fmov w4, s3 +; CHECK-SD-NEXT: usra v2.4s, v0.4s, #3 +; CHECK-SD-NEXT: mov w1, v2.s[1] +; CHECK-SD-NEXT: mov w2, v2.s[2] +; CHECK-SD-NEXT: mov w3, v2.s[3] +; CHECK-SD-NEXT: fmov w0, s2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: rotr_v7i32_c: @@ -4132,36 +4122,31 @@ define <7 x i32> @fshl_v7i32_c(<7 x i32> %a, <7 x i32> %b) { ; CHECK-SD-LABEL: fshl_v7i32_c: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fmov s2, w4 -; CHECK-SD-NEXT: ldr s1, [sp, #24] -; CHECK-SD-NEXT: fmov s3, w7 +; CHECK-SD-NEXT: fmov s1, w4 ; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: fmov s2, w7 +; CHECK-SD-NEXT: ldr s3, [sp, #24] ; CHECK-SD-NEXT: add x9, sp, #32 -; CHECK-SD-NEXT: ld1 { v1.s }[1], [x9] -; CHECK-SD-NEXT: add x9, sp, #40 -; CHECK-SD-NEXT: adrp x10, .LCPI134_1 ; CHECK-SD-NEXT: mov v0.s[1], w1 -; CHECK-SD-NEXT: mov v2.s[1], w5 -; CHECK-SD-NEXT: ldr q5, [x10, :lo12:.LCPI134_1] -; CHECK-SD-NEXT: ld1 { v3.s }[1], [x8] +; CHECK-SD-NEXT: mov v1.s[1], w5 +; CHECK-SD-NEXT: ld1 { v3.s }[1], [x9] +; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8] ; CHECK-SD-NEXT: add x8, sp, #8 -; CHECK-SD-NEXT: ld1 { v1.s }[2], [x9] -; CHECK-SD-NEXT: add x9, sp, #16 +; CHECK-SD-NEXT: add x9, sp, #40 +; CHECK-SD-NEXT: ld1 { v3.s }[2], [x9] ; CHECK-SD-NEXT: mov v0.s[2], w2 -; CHECK-SD-NEXT: mov v2.s[2], w6 -; CHECK-SD-NEXT: ld1 { v3.s }[2], [x8] -; CHECK-SD-NEXT: adrp x8, .LCPI134_0 -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI134_0] -; CHECK-SD-NEXT: ld1 { v3.s }[3], [x9] +; CHECK-SD-NEXT: mov v1.s[2], w6 +; CHECK-SD-NEXT: ld1 { v2.s }[2], [x8] +; CHECK-SD-NEXT: add x8, sp, #16 +; CHECK-SD-NEXT: ld1 { v2.s }[3], [x8] ; CHECK-SD-NEXT: mov v0.s[3], w3 -; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v5.4s -; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: shl v1.4s, v1.4s, #3 +; CHECK-SD-NEXT: usra v1.4s, v3.4s, #29 ; CHECK-SD-NEXT: shl v0.4s, v0.4s, #3 ; CHECK-SD-NEXT: mov w5, v1.s[1] ; CHECK-SD-NEXT: mov w6, v1.s[2] ; CHECK-SD-NEXT: fmov w4, s1 -; CHECK-SD-NEXT: usra v0.4s, v3.4s, #29 +; CHECK-SD-NEXT: usra v0.4s, v2.4s, #29 ; CHECK-SD-NEXT: mov w1, v0.s[1] ; CHECK-SD-NEXT: mov w2, v0.s[2] ; CHECK-SD-NEXT: mov w3, v0.s[3] @@ -4225,36 +4210,31 @@ define <7 x i32> @fshr_v7i32_c(<7 x i32> %a, <7 x i32> %b) { ; CHECK-SD-LABEL: fshr_v7i32_c: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fmov s2, w4 -; CHECK-SD-NEXT: ldr s1, [sp, #24] -; CHECK-SD-NEXT: fmov s3, w7 +; CHECK-SD-NEXT: fmov s1, w4 ; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: fmov s2, w7 +; CHECK-SD-NEXT: ldr s3, [sp, #24] ; CHECK-SD-NEXT: add x9, sp, #32 -; CHECK-SD-NEXT: ld1 { v1.s }[1], [x9] -; CHECK-SD-NEXT: add x9, sp, #40 -; CHECK-SD-NEXT: adrp x10, .LCPI135_1 ; CHECK-SD-NEXT: mov v0.s[1], w1 -; CHECK-SD-NEXT: mov v2.s[1], w5 -; CHECK-SD-NEXT: ldr q5, [x10, :lo12:.LCPI135_1] -; CHECK-SD-NEXT: ld1 { v3.s }[1], [x8] +; CHECK-SD-NEXT: mov v1.s[1], w5 +; CHECK-SD-NEXT: ld1 { v3.s }[1], [x9] +; CHECK-SD-NEXT: ld1 { v2.s }[1], [x8] ; CHECK-SD-NEXT: add x8, sp, #8 -; CHECK-SD-NEXT: ld1 { v1.s }[2], [x9] -; CHECK-SD-NEXT: add x9, sp, #16 +; CHECK-SD-NEXT: add x9, sp, #40 +; CHECK-SD-NEXT: ld1 { v3.s }[2], [x9] ; CHECK-SD-NEXT: mov v0.s[2], w2 -; CHECK-SD-NEXT: mov v2.s[2], w6 -; CHECK-SD-NEXT: ld1 { v3.s }[2], [x8] -; CHECK-SD-NEXT: adrp x8, .LCPI135_0 -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI135_0] -; CHECK-SD-NEXT: ld1 { v3.s }[3], [x9] +; CHECK-SD-NEXT: mov v1.s[2], w6 +; CHECK-SD-NEXT: ld1 { v2.s }[2], [x8] +; CHECK-SD-NEXT: add x8, sp, #16 +; CHECK-SD-NEXT: ld1 { v2.s }[3], [x8] ; CHECK-SD-NEXT: mov v0.s[3], w3 -; CHECK-SD-NEXT: ushl v1.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: ushl v2.4s, v2.4s, v5.4s -; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: shl v1.4s, v1.4s, #29 +; CHECK-SD-NEXT: usra v1.4s, v3.4s, #3 ; CHECK-SD-NEXT: shl v0.4s, v0.4s, #29 ; CHECK-SD-NEXT: mov w5, v1.s[1] ; CHECK-SD-NEXT: mov w6, v1.s[2] ; CHECK-SD-NEXT: fmov w4, s1 -; CHECK-SD-NEXT: usra v0.4s, v3.4s, #3 +; CHECK-SD-NEXT: usra v0.4s, v2.4s, #3 ; CHECK-SD-NEXT: mov w1, v0.s[1] ; CHECK-SD-NEXT: mov w2, v0.s[2] ; CHECK-SD-NEXT: mov w3, v0.s[3] diff --git a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll index 5eb9c9f4ed3ae..702e5e782582f 100644 --- a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll +++ b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll @@ -189,3 +189,150 @@ define <2 x half> @call_nofpclass_intrinsic_v2f16(float %x, float %y, float %z, %min = select nsz <2 x i1> %lt, <2 x half> %call0, <2 x half> %call1 ret <2 x half> %min } + +define nofpclass(nan inf) { double, double } @aggregate() { +; CHECK-LABEL: aggregate: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, aggregate@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, aggregate@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: v_readlane_b32 s4, v40, 2 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_mov_b32 s33, s4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %call.i.i = call { double, double } @aggregate() + ret { double, double } %call.i.i +} + +declare hidden nofpclass(nan inf) { float, float } @aggregate_f32() + +define { float, float } @aggregate_use(float %z) { +; CHECK-LABEL: aggregate_use: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s16, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: v_writelane_b32 v41, s16, 2 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v41, s30, 0 +; CHECK-NEXT: v_writelane_b32 v41, s31, 1 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, aggregate_f32@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, aggregate_f32@rel32@hi+12 +; CHECK-NEXT: v_mov_b32_e32 v40, v0 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] +; CHECK-NEXT: v_max_f32_e32 v2, v40, v40 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v41, 0 +; CHECK-NEXT: v_min_f32_e32 v0, v0, v2 +; CHECK-NEXT: v_min_f32_e32 v1, v1, v2 +; CHECK-NEXT: v_readlane_b32 s31, v41, 1 +; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: v_readlane_b32 s4, v41, 2 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_mov_b32 s33, s4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %call = call nofpclass(nan inf) { float, float } @aggregate_f32() + %i = extractvalue { float, float } %call, 0 + %i1 = extractvalue { float, float } %call, 1 + %min0 = call float @llvm.minnum.f32(float %i, float %z) + %min1 = call float @llvm.minnum.f32(float %i1, float %z) + %insert.0 = insertvalue { float, float } poison, float %min0, 0 + %insert.1 = insertvalue { float, float } %insert.0, float %min1, 1 + ret { float, float } %insert.1 +} + +define internal <5 x double> @func_v5f64(ptr addrspace(1) %ptr) { +; CHECK-LABEL: func_v5f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v11, v1 +; CHECK-NEXT: v_mov_b32_e32 v10, v0 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v[10:11], off glc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[10:11], off offset:16 glc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_load_dwordx2 v[8:9], v[10:11], off offset:32 glc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %ld = load volatile <5 x double>, ptr addrspace(1) %ptr + ret <5 x double> %ld +} + +define <5 x double> @call_nofpclass_funcs_v5f64_non_mvt_vector(ptr addrspace(1) %ptr) { +; CHECK-LABEL: call_nofpclass_funcs_v5f64_non_mvt_vector: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s18, s33 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 +; CHECK-NEXT: buffer_store_dword v24, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[16:17] +; CHECK-NEXT: v_writelane_b32 v24, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v24, s31, 1 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, func_v5f64@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, func_v5f64@rel32@hi+12 +; CHECK-NEXT: v_mov_b32_e32 v22, v1 +; CHECK-NEXT: v_mov_b32_e32 v23, v0 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] +; CHECK-NEXT: v_mov_b32_e32 v12, v0 +; CHECK-NEXT: v_mov_b32_e32 v13, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, v23 +; CHECK-NEXT: v_mov_b32_e32 v1, v22 +; CHECK-NEXT: v_mov_b32_e32 v14, v2 +; CHECK-NEXT: v_mov_b32_e32 v15, v3 +; CHECK-NEXT: v_mov_b32_e32 v16, v4 +; CHECK-NEXT: v_mov_b32_e32 v17, v5 +; CHECK-NEXT: v_mov_b32_e32 v18, v6 +; CHECK-NEXT: v_mov_b32_e32 v19, v7 +; CHECK-NEXT: v_mov_b32_e32 v20, v8 +; CHECK-NEXT: v_mov_b32_e32 v21, v9 +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] +; CHECK-NEXT: v_readlane_b32 s30, v24, 0 +; CHECK-NEXT: v_min_f64 v[0:1], v[12:13], v[0:1] +; CHECK-NEXT: v_min_f64 v[2:3], v[14:15], v[2:3] +; CHECK-NEXT: v_min_f64 v[4:5], v[16:17], v[4:5] +; CHECK-NEXT: v_min_f64 v[6:7], v[18:19], v[6:7] +; CHECK-NEXT: v_min_f64 v[8:9], v[20:21], v[8:9] +; CHECK-NEXT: v_readlane_b32 s31, v24, 1 +; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v24, off, s[0:3], s33 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b32 s33, s18 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %call0 = call nofpclass(nan) <5 x double> @func_v5f64(ptr addrspace(1) %ptr) + %call1 = call nofpclass(nan) <5 x double> @func_v5f64(ptr addrspace(1) %ptr) + %min = call <5 x double> @llvm.minnum.v5f64(<5 x double> %call0, <5 x double> %call1) + ret <5 x double> %min +} diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-no-unclustered-regions.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-no-unclustered-regions.mir new file mode 100644 index 0000000000000..f08facb503f24 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-no-unclustered-regions.mir @@ -0,0 +1,56 @@ +# REQUIRES: asserts +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -start-before=machine-scheduler -stop-after=greedy,2 -stress-regalloc=4 -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s + +--- | + define amdgpu_kernel void @no_sched_metric_due_to_spills() #0 { + ret void + } + + attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } +... + +# This test checks for the following scenario: Unclustered high-RP-reschedule +# stage raises the occupancy target temporarily but no region gets scheduled +# because of constraints. Then, DAG and MFI min-occupancy should not be changed +# at the end of the unclustered schedule stage. +# CHECK: Retrying function scheduling without clustering. Aggressively try to reduce register pressure to achieve occupancy 5. +# CHECK: Unclustered High Register Pressure Reschedule: No regions scheduled, min occupancy stays at 4, MFI occupancy stays at 4. + +--- +name: no_sched_metric_due_to_spills +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + occupancy: 4 +body: | + bb.0: + liveins: $vgpr0, $sgpr0_sgpr1, $sgpr15 + + %0:sgpr_32 = COPY $sgpr15 + %1:sgpr_64 = COPY $sgpr0_sgpr1 + %2:vgpr_32 = COPY $vgpr0 + %3:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (dereferenceable invariant load (s128), addrspace 4) + undef %4.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 16, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) + %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 32, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %6:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 64, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %7:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 84, 0 :: (dereferenceable invariant load (s32), addrspace 4) + %8:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 112, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %9:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 128, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %10:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 176, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1, 192, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4) + %12:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 216, 0 :: (dereferenceable invariant load (s64), addrspace 4) + %13:sreg_32 = S_ADD_I32 %12.sub0, 127, implicit-def dead $scc + %14:sreg_32 = S_ASHR_I32 %13, 31, implicit-def dead $scc + %15:sreg_32 = S_LSHR_B32 %14, 25, implicit-def dead $scc + %16:sreg_32 = S_ADD_I32 %13, %15, implicit-def dead $scc + %17:sreg_32 = S_ASHR_I32 %16, 7, implicit-def dead $scc + %18:sreg_32 = S_ADD_I32 %12.sub1, 255, implicit-def dead $scc + %19:sreg_32 = S_ASHR_I32 %18, 31, implicit-def dead $scc + %20:sreg_32 = S_LSHR_B32 %19, 24, implicit-def dead $scc + %21:sreg_32 = S_ADD_I32 %18, %20, implicit-def dead $scc + %22:sreg_32 = S_ASHR_I32 %21, 8, implicit-def dead $scc + %23:sreg_32 = nsw S_MUL_I32 %22, %17 + %24:sreg_32 = S_ASHR_I32 %0, 31, implicit-def dead $scc + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir index 696962a88c8b8..8ae50d8e0e071 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir @@ -31,7 +31,7 @@ body: | ... -# This test is an example where conversion to three-address form would be beneficial. +# This test is an example where conversion to three-address form is beneficial. --- name: test_fmac_reuse_bundle body: | @@ -41,11 +41,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec { - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec + ; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec { + ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: } - ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec %2:vgpr_32 = COPY $vgpr0 %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll index b85cb3a4f191c..6fff0d9b155ef 100644 --- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll @@ -450,7 +450,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; ARM7-NEXT: .short 9 @ 0x9 ; ARM7-NEXT: .short 10 @ 0xa ; ARM7-NEXT: .short 10 @ 0xa -; ARM7-NEXT: .short 10 @ 0xa +; ARM7-NEXT: .short 0 @ 0x0 ; ARM7-NEXT: .LCPI4_4: ; ARM7-NEXT: .short 341 @ 0x155 ; ARM7-NEXT: .short 292 @ 0x124 @@ -502,7 +502,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; ARM8-NEXT: .short 9 @ 0x9 ; ARM8-NEXT: .short 10 @ 0xa ; ARM8-NEXT: .short 10 @ 0xa -; ARM8-NEXT: .short 10 @ 0xa +; ARM8-NEXT: .short 0 @ 0x0 ; ARM8-NEXT: .LCPI4_4: ; ARM8-NEXT: .short 341 @ 0x155 ; ARM8-NEXT: .short 292 @ 0x124 @@ -554,7 +554,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; NEON7-NEXT: .short 9 @ 0x9 ; NEON7-NEXT: .short 10 @ 0xa ; NEON7-NEXT: .short 10 @ 0xa -; NEON7-NEXT: .short 10 @ 0xa +; NEON7-NEXT: .short 0 @ 0x0 ; NEON7-NEXT: .LCPI4_4: ; NEON7-NEXT: .short 341 @ 0x155 ; NEON7-NEXT: .short 292 @ 0x124 @@ -606,7 +606,7 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; NEON8-NEXT: .short 9 @ 0x9 ; NEON8-NEXT: .short 10 @ 0xa ; NEON8-NEXT: .short 10 @ 0xa -; NEON8-NEXT: .short 10 @ 0xa +; NEON8-NEXT: .short 0 @ 0x0 ; NEON8-NEXT: .LCPI4_4: ; NEON8-NEXT: .short 341 @ 0x155 ; NEON8-NEXT: .short 292 @ 0x124 diff --git a/llvm/test/CodeGen/ARM/xxstructor-nodef.ll b/llvm/test/CodeGen/ARM/xxstructor-nodef.ll new file mode 100644 index 0000000000000..db17b2b1c21ab --- /dev/null +++ b/llvm/test/CodeGen/ARM/xxstructor-nodef.ll @@ -0,0 +1,7 @@ +; RUN: llc -mtriple=arm-unknown-linux-gnueabihf < %s | FileCheck %s + +; This test contains a llvm.global_ctors with no other definitions. Make sure we do not crash in that case. +; CHECK: .section .init_array,"aw",%init_array + +declare ccc void @ghczmbignum_GHCziNumziBackendziSelected_init__prof_init() +@llvm.global_ctors = appending global [1 x {i32, void ()*, i8* }] [{i32, void ()*, i8* }{i32 65535, void ()* @ghczmbignum_GHCziNumziBackendziSelected_init__prof_init, i8* null } ] diff --git a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll index 895b68b5a9145..ce40085feb0d0 100644 --- a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll +++ b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll @@ -25,8 +25,7 @@ define i32 @test(i32, i32) local_unnamed_addr #0 { %11 = sub nsw i32 %7, %9 %12 = icmp slt i32 %10, %11 br i1 %12, label %5, label %13 -; CHECK: r1 = r3 -; CHECK: if r2 s> r3 goto -10 +; CHECK: if r2 s> r1 goto -10 ;