[CIR][CodeGen] Inline assembly: store the results (llvm#512)

This PR adds storing of the results of inline assembly operation. This is a **final** step (I hope: ) ) from my side to support inline assembly. There are some features that remains unimplemented, but basic things should work now, For example, we can do addition and get the results - I explicitly added several tests for that, so you can test them in real. For instance, the next program being compiled with CIR should give you 7 as the result: ``` int add(int x, int y) { int a; __asm__("addl %[y], %[x]" : "=r" (a) : [x] "r" (x), [y] "r" (y) ); return a; } int main() { printf("run %d\n", add(3, 4)); return 0; } ``` So, the main thing remains is pretty printing. As I said I added several examples, and may be it will become more clear how to print better. Also, I added several tests from original codegen in order to check that we don't fail. And I can add some checks there as well when we come to better solution on printing.
pysuxing · Apr 17, 2024 · 730ffda · 730ffda
1 parent 2db13d8
commit 730ffda
Show file tree

Hide file tree

Showing 8 changed files with 796 additions and 82 deletions.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -31,6 +31,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/IR/BuiltinAttributeInterfaces.td"
 include "mlir/IR/EnumAttr.td"
 include "mlir/IR/SymbolInterfaces.td"
+include "mlir/IR/CommonAttrConstraints.td"
 
 //===----------------------------------------------------------------------===//
 // CIR Ops
@@ -3329,9 +3330,13 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
     - the output variable index referenced by the input operands.
     - the index of early-clobber operand
 
-    Operand attributes is a storage of attributes, where each element corresponds
-    to the operand with the same index. The first index relates to the operation
-    result.
+    Operand attributes is a storage, where each element corresponds to the operand with
+    the same index. The first index relates to the operation result (if any). 
+    Note, the operands themselves are stored as VariadicOfVariadic in the next order:
+    output, input and then in/out operands.
+
+    Note, when several output operands are present, the result type may be represented as
+    an anon struct type.
 
     Example:
     ```C++
@@ -3341,38 +3346,58 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
     ```
 
     ```mlir
+    !ty_22anon2E022 = !cir.struct<struct "anon.0" {!cir.int<s, 32>, !cir.int<s, 32>}>
+    !ty_22anon2E122 = !cir.struct<struct "anon.1" {!cir.int<s, 32>, !cir.int<s, 32>}>
+    ...
     %0 = cir.alloca !s32i, cir.ptr <!s32i>, ["x", init] 
     %1 = cir.alloca !s32i, cir.ptr <!s32i>, ["y", init]
     ... 
     %2 = cir.load %0 : cir.ptr <!s32i>, !s32i
     %3 = cir.load %1 : cir.ptr <!s32i>, !s32i
-    cir.asm(x86_att, {"foo" "~{dirflag},~{fpsr},~{flags}"}) side_effects : () -> ()
-    cir.asm(x86_att, {"bar $$42 $0" "=r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) %2 : (!s32i) -> ()
-    cir.asm(x86_att, {"baz $$42 $0" "=r,=&r,0,1,~{dirflag},~{fpsr},~{flags}"}) %3, %2 : (!s32i, !s32i) -> ()
+
+    cir.asm(x86_att,
+      out = [],
+      in = [],
+      in_out = [],
+      {"foo" "~{dirflag},~{fpsr},~{flags}"}) side_effects
+
+    cir.asm(x86_att,
+      out = [],
+      in = [],
+      in_out = [%2 : !s32i],
+      {"bar $$42 $0" "=r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) -> !ty_22anon2E022
+
+    cir.asm(x86_att,
+      out = [],
+      in = [%3 : !s32i],
+      in_out = [%2 : !s32i],
+      {"baz $$42 $0" "=r,=&r,0,1,~{dirflag},~{fpsr},~{flags}"}) -> !ty_22anon2E122
     ```
   }];
 
   let results = (outs Optional<CIR_AnyType>:$res);
 
   let arguments = (
-    ins Variadic<AnyType>:$operands,
+    ins VariadicOfVariadic<AnyType, "operands_segments">:$operands,
         StrAttr:$asm_string,
         StrAttr:$constraints,
         UnitAttr:$side_effects,
         AsmFlavor:$asm_flavor,
-        OptionalAttr<ArrayAttr>:$operand_attrs);
-
-  let assemblyFormat = [{
-    `(`
-    $asm_flavor`,`
-    `{` $asm_string $constraints `}`
-    `)`
-    (`operand_attrs` `=` $operand_attrs^)?
-    (`side_effects` $side_effects^)?
-    attr-dict
-    operands `:` functional-type(operands, results)
-   }];
+        ArrayAttr:$operand_attrs,
+        DenseI32ArrayAttr:$operands_segments
+        );
 
+  let builders = [OpBuilder<(ins
+    "ArrayRef<ValueRange>":$operands,
+    "StringRef":$asm_string,
+    "StringRef":$constraints,
+    "bool":$side_effects,
+    "AsmFlavor":$asm_flavor,
+    "ArrayRef<Attribute>":$operand_attrs
+  )>
+  ];
+
+  let hasCustomAssemblyFormat = 1;
 }
 
 //===----------------------------------------------------------------------===//

diff --git a/clang/lib/CIR/CodeGen/CIRAsm.cpp b/clang/lib/CIR/CodeGen/CIRAsm.cpp
@@ -251,15 +251,96 @@ CIRGenFunction::buildAsmInput(const TargetInfo::ConstraintInfo &Info,
 
   if (Info.allowsRegister() || !Info.allowsMemory())
     if (CIRGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
-      return {buildScalarExpr(InputExpr), nullptr};
+      return {buildScalarExpr(InputExpr), mlir::Type()};
   if (InputExpr->getStmtClass() == Expr::CXXThisExprClass)
-    return {buildScalarExpr(InputExpr), nullptr};
+    return {buildScalarExpr(InputExpr), mlir::Type()};
   InputExpr = InputExpr->IgnoreParenNoopCasts(getContext());
   LValue Dest = buildLValue(InputExpr);
   return buildAsmInputLValue(Info, Dest, InputExpr->getType(), ConstraintStr,
                              InputExpr->getExprLoc());
 }
 
+static void buildAsmStores(CIRGenFunction &CGF, const AsmStmt &S,
+                           const llvm::ArrayRef<mlir::Value> RegResults,
+                           const llvm::ArrayRef<mlir::Type> ResultRegTypes,
+                           const llvm::ArrayRef<mlir::Type> ResultTruncRegTypes,
+                           const llvm::ArrayRef<LValue> ResultRegDests,
+                           const llvm::ArrayRef<QualType> ResultRegQualTys,
+                           const llvm::BitVector &ResultTypeRequiresCast,
+                           const llvm::BitVector &ResultRegIsFlagReg) {
+  CIRGenBuilderTy &Builder = CGF.getBuilder();
+  CIRGenModule &CGM = CGF.CGM;
+  auto CTX = Builder.getContext();
+
+  assert(RegResults.size() == ResultRegTypes.size());
+  assert(RegResults.size() == ResultTruncRegTypes.size());
+  assert(RegResults.size() == ResultRegDests.size());
+  // ResultRegDests can be also populated by addReturnRegisterOutputs() above,
+  // in which case its size may grow.
+  assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
+  assert(ResultRegIsFlagReg.size() <= ResultRegDests.size());
+
+  for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
+    mlir::Value Tmp = RegResults[i];
+    mlir::Type TruncTy = ResultTruncRegTypes[i];
+
+    if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
+      assert(!UnimplementedFeature::asm_llvm_assume());
+    }
+
+    // If the result type of the LLVM IR asm doesn't match the result type of
+    // the expression, do the conversion.
+    if (ResultRegTypes[i] != TruncTy) {
+
+      // Truncate the integer result to the right size, note that TruncTy can be
+      // a pointer.
+      if (TruncTy.isa<mlir::FloatType>())
+        Tmp = Builder.createFloatingCast(Tmp, TruncTy);
+      else if (isa<mlir::cir::PointerType>(TruncTy) &&
+               isa<mlir::cir::IntType>(Tmp.getType())) {
+        uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy);
+        Tmp = Builder.createIntCast(
+            Tmp, mlir::cir::IntType::get(CTX, (unsigned)ResSize, false));
+        Tmp = Builder.createIntToPtr(Tmp, TruncTy);
+      } else if (isa<mlir::cir::PointerType>(Tmp.getType()) &&
+                 isa<mlir::cir::IntType>(TruncTy)) {
+        uint64_t TmpSize = CGM.getDataLayout().getTypeSizeInBits(Tmp.getType());
+        Tmp = Builder.createPtrToInt(
+            Tmp, mlir::cir::IntType::get(CTX, (unsigned)TmpSize, false));
+        Tmp = Builder.createIntCast(Tmp, TruncTy);
+      } else if (isa<mlir::cir::IntType>(TruncTy)) {
+        Tmp = Builder.createIntCast(Tmp, TruncTy);
+      } else if (false /*TruncTy->isVectorTy()*/) {
+        assert(!UnimplementedFeature::asm_vector_type());
+      }
+    }
+
+    LValue Dest = ResultRegDests[i];
+    // ResultTypeRequiresCast elements correspond to the first
+    // ResultTypeRequiresCast.size() elements of RegResults.
+    if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
+      unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
+      Address A = Dest.getAddress().withElementType(ResultRegTypes[i]);
+      if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
+        Builder.createStore(CGF.getLoc(S.getAsmLoc()), Tmp, A);
+        continue;
+      }
+
+      QualType Ty =
+          CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
+      if (Ty.isNull()) {
+        const Expr *OutExpr = S.getOutputExpr(i);
+        CGM.getDiags().Report(OutExpr->getExprLoc(),
+                              diag::err_store_value_to_reg);
+        return;
+      }
+      Dest = CGF.makeAddrLValue(A, Ty);
+    }
+
+    CGF.buildStoreThroughLValue(RValue::get(Tmp), Dest);
+  }
+}
+
 mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
   // Assemble the final asm string.
   std::string AsmString = S.generateAsmString(getContext());
@@ -277,19 +358,24 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
   std::vector<mlir::Type> ResultTruncRegTypes;
   std::vector<mlir::Type> ArgTypes;
   std::vector<mlir::Type> ArgElemTypes;
+  std::vector<mlir::Value> OutArgs;
+  std::vector<mlir::Value> InArgs;
+  std::vector<mlir::Value> InOutArgs;
   std::vector<mlir::Value> Args;
   llvm::BitVector ResultTypeRequiresCast;
   llvm::BitVector ResultRegIsFlagReg;
 
   // Keep track of input constraints.
   std::string InOutConstraints;
-  std::vector<mlir::Value> InOutArgs;
   std::vector<mlir::Type> InOutArgTypes;
   std::vector<mlir::Type> InOutArgElemTypes;
 
   // Keep track of out constraints for tied input operand.
   std::vector<std::string> OutputConstraints;
 
+  // Keep track of defined physregs.
+  llvm::SmallSet<std::string, 8> PhysRegOutputs;
+
   // An inline asm can be marked readonly if it meets the following conditions:
   //  - it doesn't have any sideeffects
   //  - it doesn't clobber memory
@@ -314,6 +400,10 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
         AddVariableConstraints(OutputConstraint, *OutExpr, getTarget(), CGM, S,
                                Info.earlyClobber(), &GCCReg);
 
+    // Give an error on multiple outputs to same physreg.
+    if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second)
+      CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg);
+
     OutputConstraints.push_back(OutputConstraint);
     LValue Dest = buildLValue(OutExpr);
 
@@ -392,6 +482,7 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
 
       ArgTypes.push_back(DestAddr.getType());
       ArgElemTypes.push_back(DestAddr.getElementType());
+      OutArgs.push_back(DestAddr.getPointer());
       Args.push_back(DestAddr.getPointer());
       Constraints += "=*";
       Constraints += OutputConstraint;
@@ -412,6 +503,9 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
               *this, OutputConstraint, Arg.getType()))
         Arg = builder.createBitcast(Arg, AdjTy);
 
+      // Update largest vector width for any vector types.
+      assert(!UnimplementedFeature::asm_vector_type());
+
       // Only tie earlyclobber physregs.
       if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber()))
         InOutConstraints += llvm::utostr(i);
@@ -424,11 +518,28 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
     }
   } // iterate over output operands
 
+  // If this is a Microsoft-style asm blob, store the return registers (EAX:EDX)
+  // to the return value slot. Only do this when returning in registers.
+  if (isa<MSAsmStmt>(&S)) {
+    const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo();
+    if (RetAI.isDirect() || RetAI.isExtend()) {
+      // Make a fake lvalue for the return value slot.
+      LValue ReturnSlot = makeAddrLValue(ReturnValue, FnRetTy);
+      CGM.getTargetCIRGenInfo().addReturnRegisterOutputs(
+          *this, ReturnSlot, Constraints, ResultRegTypes, ResultTruncRegTypes,
+          ResultRegDests, AsmString, S.getNumOutputs());
+      SawAsmBlock = true;
+    }
+  }
+
   for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
     const Expr *InputExpr = S.getInputExpr(i);
 
     TargetInfo::ConstraintInfo &Info = InputConstraintInfos[i];
 
+    if (Info.allowsMemory())
+      ReadNone = false;
+
     if (!Constraints.empty())
       Constraints += ',';
 
@@ -481,17 +592,21 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
       CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input)
           << InputExpr->getType() << InputConstraint;
 
+    // Update largest vector width for any vector types.
+    assert(!UnimplementedFeature::asm_vector_type());
+
     ArgTypes.push_back(Arg.getType());
     ArgElemTypes.push_back(ArgElemType);
+    InArgs.push_back(Arg);
     Args.push_back(Arg);
     Constraints += InputConstraint;
   } // iterate over input operands
 
   // Append the "input" part of inout constraints.
   for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) {
+    Args.push_back(InOutArgs[i]);
     ArgTypes.push_back(InOutArgTypes[i]);
     ArgElemTypes.push_back(InOutArgElemTypes[i]);
-    Args.push_back(InOutArgs[i]);
   }
   Constraints += InOutConstraints;
 
@@ -509,9 +624,15 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
   }
 
   bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0;
+  std::vector<mlir::Value> RegResults;
+
+  llvm::SmallVector<mlir::ValueRange, 8> operands;
+  operands.push_back(OutArgs);
+  operands.push_back(InArgs);
+  operands.push_back(InOutArgs);
 
   auto IA = builder.create<mlir::cir::InlineAsmOp>(
-      getLoc(S.getAsmLoc()), ResultType, Args, AsmString, Constraints,
+      getLoc(S.getAsmLoc()), ResultType, operands, AsmString, Constraints,
       HasSideEffect, inferFlavor(CGM, S), mlir::ArrayAttr());
 
   if (false /*IsGCCAsmGoto*/) {
@@ -525,28 +646,55 @@ mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
     if (IA.getNumResults())
       result = IA.getResult(0);
 
-    std::vector<mlir::Attribute> operandAttrs;
-
-    // this is for the lowering to LLVM from LLVm dialect. Otherwise, if we
-    // don't have the result (i.e. void type as a result of operation), the
-    // element type attribute will be attached to the whole instruction, but not
-    // to the operand
-    if (!IA.getNumResults())
-      operandAttrs.push_back(OptNoneAttr::get(builder.getContext()));
+    llvm::SmallVector<mlir::Attribute> operandAttrs;
 
+    int i = 0;
     for (auto typ : ArgElemTypes) {
       if (typ) {
-        operandAttrs.push_back(mlir::TypeAttr::get(typ));
+        auto op = Args[i++];
+        assert(op.getType().isa<mlir::cir::PointerType>() &&
+               "pointer type expected");
+        assert(cast<mlir::cir::PointerType>(op.getType()).getPointee() == typ &&
+               "element type differs from pointee type!");
+
+        operandAttrs.push_back(mlir::UnitAttr::get(builder.getContext()));
       } else {
         // We need to add an attribute for every arg since later, during
         // the lowering to LLVM IR the attributes will be assigned to the
         // CallInsn argument by index, i.e. we can't skip null type here
-        operandAttrs.push_back(OptNoneAttr::get(builder.getContext()));
+        operandAttrs.push_back(mlir::Attribute());
       }
     }
 
+    assert(Args.size() == operandAttrs.size() &&
+           "The number of attributes is not even with the number of operands");
+
     IA.setOperandAttrsAttr(builder.getArrayAttr(operandAttrs));
+
+    if (ResultRegTypes.size() == 1) {
+      RegResults.push_back(result);
+    } else if (ResultRegTypes.size() > 1) {
+      auto alignment = CharUnits::One();
+      auto sname = cast<mlir::cir::StructType>(ResultType).getName();
+      auto dest = buildAlloca(sname, ResultType, getLoc(S.getAsmLoc()),
+                              alignment, false);
+      auto addr = Address(dest, alignment);
+      builder.createStore(getLoc(S.getAsmLoc()), result, addr);
+
+      for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
+        auto typ = builder.getPointerTo(ResultRegTypes[i]);
+        auto ptr =
+            builder.createGetMember(getLoc(S.getAsmLoc()), typ, dest, "", i);
+        auto tmp =
+            builder.createLoad(getLoc(S.getAsmLoc()), Address(ptr, alignment));
+        RegResults.push_back(tmp);
+      }
+    }
   }
 
+  buildAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
+                 ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast,
+                 ResultRegIsFlagReg);
+
   return mlir::success();
 }