Revert "[lld] Support thumb PLTs" (#93631)

Reverts #86223 windows pre-merge is broken.
llvm · May 29, 2024 · 7832769 · 7832769
1 parent bd135c3
commit 7832769
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 262 deletions.
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
@@ -231,71 +231,36 @@ static void writePltHeaderLong(uint8_t *buf) {
 // The default PLT header requires the .got.plt to be within 128 Mb of the
 // .plt in the positive direction.
 void ARM::writePltHeader(uint8_t *buf) const {
-  if (config->armThumbPLTs) {
-    // The instruction sequence for thumb:
-    //
-    // 0: b500          push    {lr}
-    // 2: f8df e008     ldr.w   lr, [pc, #0x8]          @ 0xe <func+0xe>
-    // 6: 44fe          add     lr, pc
-    // 8: f85e ff08     ldr     pc, [lr, #8]!
-    // e:               .word   .got.plt - .plt - 16
-    //
-    // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
-    // `pc` in the add instruction and 8 bytes for the `lr` adjustment.
-    //
-    uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
-    assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
-    write16(buf + 0, 0xb500);
-    // Split into two halves to support endianness correctly.
-    write16(buf + 2, 0xf8df);
-    write16(buf + 4, 0xe008);
-    write16(buf + 6, 0x44fe);
-    // Split into two halves to support endianness correctly.
-    write16(buf + 8, 0xf85e);
-    write16(buf + 10, 0xff08);
-    write32(buf + 12, offset);
-
-    memcpy(buf + 16, trapInstr.data(), 4);  // Pad to 32-byte boundary
-    memcpy(buf + 20, trapInstr.data(), 4);
-    memcpy(buf + 24, trapInstr.data(), 4);
-    memcpy(buf + 28, trapInstr.data(), 4);
-  } else {
-    // Use a similar sequence to that in writePlt(), the difference is the
-    // calling conventions mean we use lr instead of ip. The PLT entry is
-    // responsible for saving lr on the stack, the dynamic loader is responsible
-    // for reloading it.
-    const uint32_t pltData[] = {
-        0xe52de004, // L1: str lr, [sp,#-4]!
-        0xe28fe600, //     add lr, pc,  #0x0NN00000 &(.got.plt - L1 - 4)
-        0xe28eea00, //     add lr, lr,  #0x000NN000 &(.got.plt - L1 - 4)
-        0xe5bef000, //     ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
-    };
-
-    uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
-    if (!llvm::isUInt<27>(offset)) {
-      // We cannot encode the Offset, use the long form.
-      writePltHeaderLong(buf);
-      return;
-    }
-    write32(buf + 0, pltData[0]);
-    write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
-    write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
-    write32(buf + 12, pltData[3] | (offset & 0xfff));
-    memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
-    memcpy(buf + 20, trapInstr.data(), 4);
-    memcpy(buf + 24, trapInstr.data(), 4);
-    memcpy(buf + 28, trapInstr.data(), 4);
+  // Use a similar sequence to that in writePlt(), the difference is the calling
+  // conventions mean we use lr instead of ip. The PLT entry is responsible for
+  // saving lr on the stack, the dynamic loader is responsible for reloading
+  // it.
+  const uint32_t pltData[] = {
+      0xe52de004, // L1: str lr, [sp,#-4]!
+      0xe28fe600, //     add lr, pc,  #0x0NN00000 &(.got.plt - L1 - 4)
+      0xe28eea00, //     add lr, lr,  #0x000NN000 &(.got.plt - L1 - 4)
+      0xe5bef000, //     ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
+  };
+
+  uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
+  if (!llvm::isUInt<27>(offset)) {
+    // We cannot encode the Offset, use the long form.
+    writePltHeaderLong(buf);
+    return;
   }
+  write32(buf + 0, pltData[0]);
+  write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
+  write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
+  write32(buf + 12, pltData[3] | (offset & 0xfff));
+  memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
+  memcpy(buf + 20, trapInstr.data(), 4);
+  memcpy(buf + 24, trapInstr.data(), 4);
+  memcpy(buf + 28, trapInstr.data(), 4);
 }
 
 void ARM::addPltHeaderSymbols(InputSection &isec) const {
-  if (config->armThumbPLTs) {
-    addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
-    addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
-  } else {
-    addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
-    addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
-  }
+  addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
+  addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
 }
 
 // Long form PLT entries that do not have any restrictions on the displacement
@@ -314,65 +279,32 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
 // .plt in the positive direction.
 void ARM::writePlt(uint8_t *buf, const Symbol &sym,
                    uint64_t pltEntryAddr) const {
+  // The PLT entry is similar to the example given in Appendix A of ELF for
+  // the Arm Architecture. Instead of using the Group Relocations to find the
+  // optimal rotation for the 8-bit immediate used in the add instructions we
+  // hard code the most compact rotations for simplicity. This saves a load
+  // instruction over the long plt sequences.
+  const uint32_t pltData[] = {
+      0xe28fc600, // L1: add ip, pc,  #0x0NN00000  Offset(&(.got.plt) - L1 - 8
+      0xe28cca00, //     add ip, ip,  #0x000NN000  Offset(&(.got.plt) - L1 - 8
+      0xe5bcf000, //     ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
+  };
 
-  if (!config->armThumbPLTs) {
-    uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
-
-    // The PLT entry is similar to the example given in Appendix A of ELF for
-    // the Arm Architecture. Instead of using the Group Relocations to find the
-    // optimal rotation for the 8-bit immediate used in the add instructions we
-    // hard code the most compact rotations for simplicity. This saves a load
-    // instruction over the long plt sequences.
-    const uint32_t pltData[] = {
-        0xe28fc600, // L1: add ip, pc,  #0x0NN00000  Offset(&(.got.plt) - L1 - 8
-        0xe28cca00, //     add ip, ip,  #0x000NN000  Offset(&(.got.plt) - L1 - 8
-        0xe5bcf000, //     ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
-    };
-    if (!llvm::isUInt<27>(offset)) {
-      // We cannot encode the Offset, use the long form.
-      writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
-      return;
-    }
-    write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
-    write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
-    write32(buf + 8, pltData[2] | (offset & 0xfff));
-    memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
-  } else {
-    uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;
-    assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
-
-    // A PLT entry will be:
-    //
-    //       movw ip, #<lower 16 bits>
-    //       movt ip, #<upper 16 bits>
-    //       add ip, pc
-    //   L1: ldr.w pc, [ip]
-    //       b L1
-    //
-    // where ip = r12 = 0xc
-
-    // movw ip, #<lower 16 bits>
-    write16(buf + 2, 0x0c00); // use `ip`
-    relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);
-
-    // movt ip, #<upper 16 bits>
-    write16(buf + 6, 0x0c00); // use `ip`
-    relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);
-
-    write16(buf + 8, 0x44fc);       // add ip, pc
-    write16(buf + 10, 0xf8dc);      // ldr.w   pc, [ip] (bottom half)
-    write16(buf + 12, 0xf000);      // ldr.w   pc, [ip] (upper half)
-    write16(buf + 14, 0xe7fc);      // Branch to previous instruction
+  uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
+  if (!llvm::isUInt<27>(offset)) {
+    // We cannot encode the Offset, use the long form.
+    writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
+    return;
   }
+  write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
+  write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
+  write32(buf + 8, pltData[2] | (offset & 0xfff));
+  memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
 }
 
 void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
-  if (config->armThumbPLTs) {
-    addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
-  } else {
-    addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
-    addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
-  }
+  addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
+  addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
 }
 
 bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
@@ -393,8 +325,6 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_JUMP24:
     // Source is ARM, all PLT entries are ARM so no interworking required.
     // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
-    assert(!config->armThumbPLTs &&
-           "If the source is ARM, we should not need Thumb PLTs");
     if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
       return true;
     [[fallthrough]];
@@ -405,9 +335,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   }
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
-    // Source is Thumb, when all PLT entries are ARM interworking is required.
+    // Source is Thumb, all PLT entries are ARM so interworking is required.
     // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
-    if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0))
+    if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
       return true;
     [[fallthrough]];
   case R_ARM_THM_CALL: {
@@ -617,6 +547,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     // STT_FUNC we choose whether to write a BL or BLX depending on the
     // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
     // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we don't need to interwork.
     assert(rel.sym); // R_ARM_CALL is always reached via relocate().
     bool bit0Thumb = val & 1;
     bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000;
@@ -675,13 +606,12 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     // PLT entries are always ARM state so we know we need to interwork.
     assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
     bool bit0Thumb = val & 1;
-    bool useThumb = bit0Thumb || config->armThumbPLTs;
     bool isBlx = (read16(loc + 2) & 0x1000) == 0;
     // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
-    // even when type not STT_FUNC.
-    if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb)
+    // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
+    if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
       stateChangeWarning(loc, rel.type, *rel.sym);
-    if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {
+    if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
       // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
       // the BLX instruction may only be two byte aligned. This must be done
       // before overflow check.

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
@@ -217,7 +217,6 @@ struct Config {
   bool allowMultipleDefinition;
   bool fatLTOObjects;
   bool androidPackDynRelocs = false;
-  bool armThumbPLTs = false;
   bool armHasBlx = false;
   bool armHasMovtMovw = false;
   bool armJ1J2BranchEncoding = false;

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
@@ -194,18 +194,6 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
   if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
       profile == ARMBuildAttrs::MicroControllerProfile)
     config->armCMSESupport = true;
-
-  // The thumb PLT entries require Thumb2 which can be used on multiple archs.
-  // For now, let's limit it to ones where ARM isn't available and we know have
-  // Thumb2.
-  std::optional<unsigned> armISA =
-      attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use);
-  std::optional<unsigned> thumb =
-      attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
-  bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed;
-  bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
-  if (noArmISA && hasThumb2)
-    config->armThumbPLTs = true;
 }
 
 InputFile::InputFile(Kind k, MemoryBufferRef m)

diff --git a/lld/test/ELF/armv8-thumb-plt-reloc.s b/lld/test/ELF/armv8-thumb-plt-reloc.s