From: Sylvestre Ledru Date: Wed, 14 Dec 2016 08:22:02 +0000 (+0000) Subject: Import llvm-toolchain-3.9_3.9.1.orig-lld.tar.bz2 X-Git-Tag: archive/raspbian/1%3.9.1-5+rpi1~51^5 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=fce3050c856710b5bf35fab0124f74bd5cd7fd23;p=llvm-toolchain-3.9.git Import llvm-toolchain-3.9_3.9.1.orig-lld.tar.bz2 [dgit import orig llvm-toolchain-3.9_3.9.1.orig-lld.tar.bz2] --- fce3050c856710b5bf35fab0124f74bd5cd7fd23 diff --git a/.arcconfig b/.arcconfig new file mode 100644 index 00000000..ebf4a4a6 --- /dev/null +++ b/.arcconfig @@ -0,0 +1,4 @@ +{ + "project_id" : "lld", + "conduit_uri" : "https://reviews.llvm.org/" +} diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..9b3aa8b7 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: LLVM diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..0a288ee8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +#==============================================================================# +# This file specifies intentionally untracked files that git should ignore. +# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html +#==============================================================================# + +#==============================================================================# +# File extensions to be ignored anywhere in the tree. +#==============================================================================# +# Temp files created by most text editors. +*~ +# Merge files created by git. +*.orig +# Byte compiled python modules. +*.pyc +# vim swap files +.*.swp +# Mac OS X Finder layout info +.DS_Store + +#==============================================================================# +# Directories to be ignored. +#==============================================================================# +# Sphinx build files. +docs/_build diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..46ca748f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,106 @@ +set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(LLD_INCLUDE_DIR ${LLD_SOURCE_DIR}/include ) +set(LLD_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# Compute the LLD version from the LLVM version. +string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" LLD_VERSION + ${PACKAGE_VERSION}) +message(STATUS "LLD version: ${LLD_VERSION}") + +string(REGEX REPLACE "([0-9]+)\\.[0-9]+(\\.[0-9]+)?" "\\1" LLD_VERSION_MAJOR + ${LLD_VERSION}) +string(REGEX REPLACE "[0-9]+\\.([0-9]+)(\\.[0-9]+)?" "\\1" LLD_VERSION_MINOR + ${LLD_VERSION}) + +# Determine LLD revision and repository. +# TODO: Figure out a way to get the revision and the repository on windows. +if ( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" ) + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetSourceVersion ${LLD_SOURCE_DIR} + OUTPUT_VARIABLE LLD_REVISION) + + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/utils/GetRepositoryPath ${LLD_SOURCE_DIR} + OUTPUT_VARIABLE LLD_REPOSITORY) + if ( LLD_REPOSITORY ) + # Replace newline characters with spaces + string(REGEX REPLACE "(\r?\n)+" " " LLD_REPOSITORY ${LLD_REPOSITORY}) + # Remove leading spaces + STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REPOSITORY "${LLD_REPOSITORY}" ) + # Remove trailing spaces + string(REGEX REPLACE "(\ )+$" "" LLD_REPOSITORY ${LLD_REPOSITORY}) + endif() + + if ( LLD_REVISION ) + # Replace newline characters with spaces + string(REGEX REPLACE "(\r?\n)+" " " LLD_REVISION ${LLD_REVISION}) + # Remove leading spaces + STRING(REGEX REPLACE "^[ \t\r\n]+" "" LLD_REVISION "${LLD_REVISION}" ) + # Remove trailing spaces + string(REGEX REPLACE "(\ )+$" "" LLD_REVISION ${LLD_REVISION}) + endif() +endif () + +# Configure the Version.inc file. +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/lld/Config/Version.inc.in + ${CMAKE_CURRENT_BINARY_DIR}/include/lld/Config/Version.inc) + + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) + message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite " +"the makefiles distributed with LLVM. Please create a directory and run cmake " +"from there, passing the path to this source directory as the last argument. " +"This process created the file `CMakeCache.txt' and the directory " +"`CMakeFiles'. Please delete them.") +endif() + +list (APPEND CMAKE_MODULE_PATH "${LLD_SOURCE_DIR}/cmake/modules") + +option(LLD_USE_VTUNE + "Enable VTune user task tracking." + OFF) +if (LLD_USE_VTUNE) + find_package(VTune) + if (VTUNE_FOUND) + include_directories(${VTune_INCLUDE_DIRS}) + list(APPEND LLVM_COMMON_LIBS ${VTune_LIBRARIES}) + add_definitions(-DLLD_HAS_VTUNE) + endif() +endif() + + +if (MSVC) + add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' + add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header. +endif() + +include_directories(BEFORE + ${CMAKE_CURRENT_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + install(DIRECTORY include/ + DESTINATION include + FILES_MATCHING + PATTERN "*.h" + PATTERN ".svn" EXCLUDE + ) +endif() + +macro(add_lld_library name) + add_llvm_library(${name} ${ARGN}) + set_target_properties(${name} PROPERTIES FOLDER "lld libraries") +endmacro(add_lld_library) + + +add_subdirectory(lib) +add_subdirectory(tools/lld) + +if (LLVM_INCLUDE_TESTS) + add_subdirectory(test) + add_subdirectory(unittests) +endif() + +add_subdirectory(docs) +add_subdirectory(COFF) +add_subdirectory(ELF) diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT new file mode 100644 index 00000000..292967e5 --- /dev/null +++ b/CODE_OWNERS.TXT @@ -0,0 +1,19 @@ +This file is a list of the people responsible for ensuring that patches for a +particular part of LLD are reviewed, either by themself or by someone else. +They are also the gatekeepers for their part of LLD, with the final word on +what goes in or not. + +The list is sorted by surname and formatted to allow easy grepping and +beautification by scripts. The fields are: name (N), email (E), web-address +(W), PGP key ID and fingerprint (P), description (D), and snail-mail address +(S). Each entry should contain at least the (N), (E) and (D) fields. + + +N: Rui Ueyama +E: ruiu@google.com +D: COFF, ELF backends (COFF/* ELF/*) + +N: Lang Hames, Nick Kledzik +E: lhames@gmail.com, kledzik@apple.com +D: Mach-O backend + diff --git a/COFF/CMakeLists.txt b/COFF/CMakeLists.txt new file mode 100644 index 00000000..ad5b6fda --- /dev/null +++ b/COFF/CMakeLists.txt @@ -0,0 +1,36 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(COFFOptionsTableGen) + +add_lld_library(lldCOFF + Chunks.cpp + DLL.cpp + Driver.cpp + DriverUtils.cpp + Error.cpp + ICF.cpp + InputFiles.cpp + Librarian.cpp + MarkLive.cpp + ModuleDef.cpp + PDB.cpp + SymbolTable.cpp + Symbols.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Core + LTO + LibDriver + Object + MC + MCDisassembler + Target + Option + Support + + LINK_LIBS ${PTHREAD_LIB} + ) + +add_dependencies(lldCOFF COFFOptionsTableGen intrinsics_gen) diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp new file mode 100644 index 00000000..1c1b1817 --- /dev/null +++ b/COFF/Chunks.cpp @@ -0,0 +1,337 @@ +//===- Chunks.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::COFF; +using llvm::support::ulittle32_t; + +namespace lld { +namespace coff { + +SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H) + : Chunk(SectionKind), Repl(this), File(F), Header(H), + Relocs(File->getCOFFObj()->getRelocations(Header)), + NumRelocs(std::distance(Relocs.begin(), Relocs.end())) { + // Initialize SectionName. + File->getCOFFObj()->getSectionName(Header, SectionName); + + Align = Header->getAlignment(); + + // Only COMDAT sections are subject of dead-stripping. + Live = !isCOMDAT(); +} + +static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); } +static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); } +static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } +static void or16(uint8_t *P, uint16_t V) { write16le(P, read16le(P) | V); } + +void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, + uint64_t P) const { + uint64_t S = Sym->getRVA(); + switch (Type) { + case IMAGE_REL_AMD64_ADDR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_AMD64_ADDR64: add64(Off, S + Config->ImageBase); break; + case IMAGE_REL_AMD64_ADDR32NB: add32(Off, S); break; + case IMAGE_REL_AMD64_REL32: add32(Off, S - P - 4); break; + case IMAGE_REL_AMD64_REL32_1: add32(Off, S - P - 5); break; + case IMAGE_REL_AMD64_REL32_2: add32(Off, S - P - 6); break; + case IMAGE_REL_AMD64_REL32_3: add32(Off, S - P - 7); break; + case IMAGE_REL_AMD64_REL32_4: add32(Off, S - P - 8); break; + case IMAGE_REL_AMD64_REL32_5: add32(Off, S - P - 9); break; + case IMAGE_REL_AMD64_SECTION: add16(Off, Sym->getSectionIndex()); break; + case IMAGE_REL_AMD64_SECREL: add32(Off, Sym->getSecrel()); break; + default: + fatal("unsupported relocation type"); + } +} + +void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, + uint64_t P) const { + uint64_t S = Sym->getRVA(); + switch (Type) { + case IMAGE_REL_I386_ABSOLUTE: break; + case IMAGE_REL_I386_DIR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_I386_DIR32NB: add32(Off, S); break; + case IMAGE_REL_I386_REL32: add32(Off, S - P - 4); break; + case IMAGE_REL_I386_SECTION: add16(Off, Sym->getSectionIndex()); break; + case IMAGE_REL_I386_SECREL: add32(Off, Sym->getSecrel()); break; + default: + fatal("unsupported relocation type"); + } +} + +static void applyMOV(uint8_t *Off, uint16_t V) { + or16(Off, ((V & 0x800) >> 1) | ((V >> 12) & 0xf)); + or16(Off + 2, ((V & 0x700) << 4) | (V & 0xff)); +} + +static void applyMOV32T(uint8_t *Off, uint32_t V) { + applyMOV(Off, V); // set MOVW operand + applyMOV(Off + 4, V >> 16); // set MOVT operand +} + +static void applyBranch20T(uint8_t *Off, int32_t V) { + uint32_t S = V < 0 ? 1 : 0; + uint32_t J1 = (V >> 19) & 1; + uint32_t J2 = (V >> 18) & 1; + or16(Off, (S << 10) | ((V >> 12) & 0x3f)); + or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); +} + +static void applyBranch24T(uint8_t *Off, int32_t V) { + uint32_t S = V < 0 ? 1 : 0; + uint32_t J1 = ((~V >> 23) & 1) ^ S; + uint32_t J2 = ((~V >> 22) & 1) ^ S; + or16(Off, (S << 10) | ((V >> 12) & 0x3ff)); + or16(Off + 2, (J1 << 13) | (J2 << 11) | ((V >> 1) & 0x7ff)); +} + +void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, + uint64_t P) const { + uint64_t S = Sym->getRVA(); + // Pointer to thumb code must have the LSB set. + if (Sym->isExecutable()) + S |= 1; + switch (Type) { + case IMAGE_REL_ARM_ADDR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_ARM_ADDR32NB: add32(Off, S); break; + case IMAGE_REL_ARM_MOV32T: applyMOV32T(Off, S + Config->ImageBase); break; + case IMAGE_REL_ARM_BRANCH20T: applyBranch20T(Off, S - P - 4); break; + case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(Off, S - P - 4); break; + case IMAGE_REL_ARM_BLX23T: applyBranch24T(Off, S - P - 4); break; + default: + fatal("unsupported relocation type"); + } +} + +void SectionChunk::writeTo(uint8_t *Buf) const { + if (!hasData()) + return; + // Copy section contents from source object file to output file. + ArrayRef A = getContents(); + memcpy(Buf + OutputSectionOff, A.data(), A.size()); + + // Apply relocations. + for (const coff_relocation &Rel : Relocs) { + uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + Defined *Sym = cast(Body); + uint64_t P = RVA + Rel.VirtualAddress; + switch (Config->Machine) { + case AMD64: + applyRelX64(Off, Rel.Type, Sym, P); + break; + case I386: + applyRelX86(Off, Rel.Type, Sym, P); + break; + case ARMNT: + applyRelARM(Off, Rel.Type, Sym, P); + break; + default: + llvm_unreachable("unknown machine type"); + } + } +} + +void SectionChunk::addAssociative(SectionChunk *Child) { + AssocChildren.push_back(Child); +} + +static uint8_t getBaserelType(const coff_relocation &Rel) { + switch (Config->Machine) { + case AMD64: + if (Rel.Type == IMAGE_REL_AMD64_ADDR64) + return IMAGE_REL_BASED_DIR64; + return IMAGE_REL_BASED_ABSOLUTE; + case I386: + if (Rel.Type == IMAGE_REL_I386_DIR32) + return IMAGE_REL_BASED_HIGHLOW; + return IMAGE_REL_BASED_ABSOLUTE; + case ARMNT: + if (Rel.Type == IMAGE_REL_ARM_ADDR32) + return IMAGE_REL_BASED_HIGHLOW; + if (Rel.Type == IMAGE_REL_ARM_MOV32T) + return IMAGE_REL_BASED_ARM_MOV32T; + return IMAGE_REL_BASED_ABSOLUTE; + default: + llvm_unreachable("unknown machine type"); + } +} + +// Windows-specific. +// Collect all locations that contain absolute addresses, which need to be +// fixed by the loader if load-time relocation is needed. +// Only called when base relocation is enabled. +void SectionChunk::getBaserels(std::vector *Res) { + for (const coff_relocation &Rel : Relocs) { + uint8_t Ty = getBaserelType(Rel); + if (Ty == IMAGE_REL_BASED_ABSOLUTE) + continue; + SymbolBody *Body = File->getSymbolBody(Rel.SymbolTableIndex)->repl(); + if (isa(Body)) + continue; + Res->emplace_back(RVA + Rel.VirtualAddress, Ty); + } +} + +bool SectionChunk::hasData() const { + return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA); +} + +uint32_t SectionChunk::getPermissions() const { + return Header->Characteristics & PermMask; +} + +bool SectionChunk::isCOMDAT() const { + return Header->Characteristics & IMAGE_SCN_LNK_COMDAT; +} + +void SectionChunk::printDiscardedMessage() const { + // Removed by dead-stripping. If it's removed by ICF, ICF already + // printed out the name, so don't repeat that here. + if (Sym && this == Repl) + llvm::outs() << "Discarded " << Sym->getName() << "\n"; +} + +StringRef SectionChunk::getDebugName() { + if (Sym) + return Sym->getName(); + return ""; +} + +ArrayRef SectionChunk::getContents() const { + ArrayRef A; + File->getCOFFObj()->getSectionContents(Header, A); + return A; +} + +void SectionChunk::replace(SectionChunk *Other) { + Other->Repl = Repl; + Other->Live = false; +} + +CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { + // Common symbols are aligned on natural boundaries up to 32 bytes. + // This is what MSVC link.exe does. + Align = std::min(uint64_t(32), NextPowerOf2(Sym.getValue())); +} + +uint32_t CommonChunk::getPermissions() const { + return IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; +} + +void StringChunk::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, Str.data(), Str.size()); +} + +ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) { + // Intel Optimization Manual says that all branch targets + // should be 16-byte aligned. MSVC linker does this too. + Align = 16; +} + +void ImportThunkChunkX64::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86)); + // The first two bytes is a JMP instruction. Fill its operand. + write32le(Buf + OutputSectionOff + 2, ImpSymbol->getRVA() - RVA - getSize()); +} + +void ImportThunkChunkX86::getBaserels(std::vector *Res) { + Res->emplace_back(getRVA() + 2); +} + +void ImportThunkChunkX86::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, ImportThunkX86, sizeof(ImportThunkX86)); + // The first two bytes is a JMP instruction. Fill its operand. + write32le(Buf + OutputSectionOff + 2, + ImpSymbol->getRVA() + Config->ImageBase); +} + +void ImportThunkChunkARM::getBaserels(std::vector *Res) { + Res->emplace_back(getRVA(), IMAGE_REL_BASED_ARM_MOV32T); +} + +void ImportThunkChunkARM::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, ImportThunkARM, sizeof(ImportThunkARM)); + // Fix mov.w and mov.t operands. + applyMOV32T(Buf + OutputSectionOff, ImpSymbol->getRVA() + Config->ImageBase); +} + +void LocalImportChunk::getBaserels(std::vector *Res) { + Res->emplace_back(getRVA()); +} + +size_t LocalImportChunk::getSize() const { + return Config->is64() ? 8 : 4; +} + +void LocalImportChunk::writeTo(uint8_t *Buf) const { + if (Config->is64()) { + write64le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase); + } else { + write32le(Buf + OutputSectionOff, Sym->getRVA() + Config->ImageBase); + } +} + +void SEHTableChunk::writeTo(uint8_t *Buf) const { + ulittle32_t *Begin = reinterpret_cast(Buf + OutputSectionOff); + size_t Cnt = 0; + for (Defined *D : Syms) + Begin[Cnt++] = D->getRVA(); + std::sort(Begin, Begin + Cnt); +} + +// Windows-specific. +// This class represents a block in .reloc section. +BaserelChunk::BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End) { + // Block header consists of 4 byte page RVA and 4 byte block size. + // Each entry is 2 byte. Last entry may be padding. + Data.resize(alignTo((End - Begin) * 2 + 8, 4)); + uint8_t *P = Data.data(); + write32le(P, Page); + write32le(P + 4, Data.size()); + P += 8; + for (Baserel *I = Begin; I != End; ++I) { + write16le(P, (I->Type << 12) | (I->RVA - Page)); + P += 2; + } +} + +void BaserelChunk::writeTo(uint8_t *Buf) const { + memcpy(Buf + OutputSectionOff, Data.data(), Data.size()); +} + +uint8_t Baserel::getDefaultType() { + switch (Config->Machine) { + case AMD64: + return IMAGE_REL_BASED_DIR64; + case I386: + return IMAGE_REL_BASED_HIGHLOW; + default: + llvm_unreachable("unknown machine type"); + } +} + +} // namespace coff +} // namespace lld diff --git a/COFF/Chunks.h b/COFF/Chunks.h new file mode 100644 index 00000000..cd0e2e69 --- /dev/null +++ b/COFF/Chunks.h @@ -0,0 +1,332 @@ +//===- Chunks.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CHUNKS_H +#define LLD_COFF_CHUNKS_H + +#include "Config.h" +#include "InputFiles.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Object/COFF.h" +#include +#include +#include + +namespace lld { +namespace coff { + +using llvm::COFF::ImportDirectoryTableEntry; +using llvm::object::COFFSymbolRef; +using llvm::object::SectionRef; +using llvm::object::coff_relocation; +using llvm::object::coff_section; +using llvm::sys::fs::file_magic; + +class Baserel; +class Defined; +class DefinedImportData; +class DefinedRegular; +class ObjectFile; +class OutputSection; +class SymbolBody; + +// Mask for section types (code, data, bss, disacardable, etc.) +// and permissions (writable, readable or executable). +const uint32_t PermMask = 0xFF0000F0; + +// A Chunk represents a chunk of data that will occupy space in the +// output (if the resolver chose that). It may or may not be backed by +// a section of an input file. It could be linker-created data, or +// doesn't even have actual data (if common or bss). +class Chunk { +public: + enum Kind { SectionKind, OtherKind }; + Kind kind() const { return ChunkKind; } + virtual ~Chunk() = default; + + // Returns the size of this chunk (even if this is a common or BSS.) + virtual size_t getSize() const = 0; + + // Write this chunk to a mmap'ed file, assuming Buf is pointing to + // beginning of the file. Because this function may use RVA values + // of other chunks for relocations, you need to set them properly + // before calling this function. + virtual void writeTo(uint8_t *Buf) const {} + + // The writer sets and uses the addresses. + uint64_t getRVA() const { return RVA; } + uint32_t getAlign() const { return Align; } + void setRVA(uint64_t V) { RVA = V; } + void setOutputSectionOff(uint64_t V) { OutputSectionOff = V; } + + // Returns true if this has non-zero data. BSS chunks return + // false. If false is returned, the space occupied by this chunk + // will be filled with zeros. + virtual bool hasData() const { return true; } + + // Returns readable/writable/executable bits. + virtual uint32_t getPermissions() const { return 0; } + + // Returns the section name if this is a section chunk. + // It is illegal to call this function on non-section chunks. + virtual StringRef getSectionName() const { + llvm_unreachable("unimplemented getSectionName"); + } + + // An output section has pointers to chunks in the section, and each + // chunk has a back pointer to an output section. + void setOutputSection(OutputSection *O) { Out = O; } + OutputSection *getOutputSection() { return Out; } + + // Windows-specific. + // Collect all locations that contain absolute addresses for base relocations. + virtual void getBaserels(std::vector *Res) {} + + // Returns a human-readable name of this chunk. Chunks are unnamed chunks of + // bytes, so this is used only for logging or debugging. + virtual StringRef getDebugName() { return ""; } + +protected: + Chunk(Kind K = OtherKind) : ChunkKind(K) {} + const Kind ChunkKind; + + // The RVA of this chunk in the output. The writer sets a value. + uint64_t RVA = 0; + + // The offset from beginning of the output section. The writer sets a value. + uint64_t OutputSectionOff = 0; + + // The output section for this chunk. + OutputSection *Out = nullptr; + + // The alignment of this chunk. The writer uses the value. + uint32_t Align = 1; +}; + +// A chunk corresponding a section of an input file. +class SectionChunk : public Chunk { + // Identical COMDAT Folding feature accesses section internal data. + friend class ICF; + +public: + class symbol_iterator : public llvm::iterator_adaptor_base< + symbol_iterator, const coff_relocation *, + std::random_access_iterator_tag, SymbolBody *> { + friend SectionChunk; + + ObjectFile *File; + + symbol_iterator(ObjectFile *File, const coff_relocation *I) + : symbol_iterator::iterator_adaptor_base(I), File(File) {} + + public: + symbol_iterator() = default; + + SymbolBody *operator*() const { + return File->getSymbolBody(I->SymbolTableIndex); + } + }; + + SectionChunk(ObjectFile *File, const coff_section *Header); + static bool classof(const Chunk *C) { return C->kind() == SectionKind; } + size_t getSize() const override { return Header->SizeOfRawData; } + ArrayRef getContents() const; + void writeTo(uint8_t *Buf) const override; + bool hasData() const override; + uint32_t getPermissions() const override; + StringRef getSectionName() const override { return SectionName; } + void getBaserels(std::vector *Res) override; + bool isCOMDAT() const; + void applyRelX64(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + void applyRelX86(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + void applyRelARM(uint8_t *Off, uint16_t Type, Defined *Sym, uint64_t P) const; + + // Called if the garbage collector decides to not include this chunk + // in a final output. It's supposed to print out a log message to stdout. + void printDiscardedMessage() const; + + // Adds COMDAT associative sections to this COMDAT section. A chunk + // and its children are treated as a group by the garbage collector. + void addAssociative(SectionChunk *Child); + + StringRef getDebugName() override; + void setSymbol(DefinedRegular *S) { if (!Sym) Sym = S; } + + // Used by the garbage collector. + bool isLive() { return !Config->DoGC || Live; } + void markLive() { + assert(!isLive() && "Cannot mark an already live section!"); + Live = true; + } + + // Allow iteration over the bodies of this chunk's relocated symbols. + llvm::iterator_range symbols() const { + return llvm::make_range(symbol_iterator(File, Relocs.begin()), + symbol_iterator(File, Relocs.end())); + } + + // Allow iteration over the associated child chunks for this section. + ArrayRef children() const { return AssocChildren; } + + // A pointer pointing to a replacement for this chunk. + // Initially it points to "this" object. If this chunk is merged + // with other chunk by ICF, it points to another chunk, + // and this chunk is considrered as dead. + SectionChunk *Repl; + + // The CRC of the contents as described in the COFF spec 4.5.5. + // Auxiliary Format 5: Section Definitions. Used for ICF. + uint32_t Checksum = 0; + +private: + // A file this chunk was created from. + ObjectFile *File; + + const coff_section *Header; + StringRef SectionName; + std::vector AssocChildren; + llvm::iterator_range Relocs; + size_t NumRelocs; + + // Used by the garbage collector. + bool Live; + + // Used for ICF (Identical COMDAT Folding) + void replace(SectionChunk *Other); + std::atomic GroupID = { 0 }; + + // Sym points to a section symbol if this is a COMDAT chunk. + DefinedRegular *Sym = nullptr; +}; + +// A chunk for common symbols. Common chunks don't have actual data. +class CommonChunk : public Chunk { +public: + CommonChunk(const COFFSymbolRef Sym); + size_t getSize() const override { return Sym.getValue(); } + bool hasData() const override { return false; } + uint32_t getPermissions() const override; + StringRef getSectionName() const override { return ".bss"; } + +private: + const COFFSymbolRef Sym; +}; + +// A chunk for linker-created strings. +class StringChunk : public Chunk { +public: + explicit StringChunk(StringRef S) : Str(S) {} + size_t getSize() const override { return Str.size() + 1; } + void writeTo(uint8_t *Buf) const override; + +private: + StringRef Str; +}; + +static const uint8_t ImportThunkX86[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 +}; + +static const uint8_t ImportThunkARM[] = { + 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 + 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 + 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip] +}; + +// Windows-specific. +// A chunk for DLL import jump table entry. In a final output, it's +// contents will be a JMP instruction to some __imp_ symbol. +class ImportThunkChunkX64 : public Chunk { +public: + explicit ImportThunkChunkX64(Defined *S); + size_t getSize() const override { return sizeof(ImportThunkX86); } + void writeTo(uint8_t *Buf) const override; + +private: + Defined *ImpSymbol; +}; + +class ImportThunkChunkX86 : public Chunk { +public: + explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {} + size_t getSize() const override { return sizeof(ImportThunkX86); } + void getBaserels(std::vector *Res) override; + void writeTo(uint8_t *Buf) const override; + +private: + Defined *ImpSymbol; +}; + +class ImportThunkChunkARM : public Chunk { +public: + explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {} + size_t getSize() const override { return sizeof(ImportThunkARM); } + void getBaserels(std::vector *Res) override; + void writeTo(uint8_t *Buf) const override; + +private: + Defined *ImpSymbol; +}; + +// Windows-specific. +// See comments for DefinedLocalImport class. +class LocalImportChunk : public Chunk { +public: + explicit LocalImportChunk(Defined *S) : Sym(S) {} + size_t getSize() const override; + void getBaserels(std::vector *Res) override; + void writeTo(uint8_t *Buf) const override; + +private: + Defined *Sym; +}; + +// Windows-specific. +// A chunk for SEH table which contains RVAs of safe exception handler +// functions. x86-only. +class SEHTableChunk : public Chunk { +public: + explicit SEHTableChunk(std::set S) : Syms(std::move(S)) {} + size_t getSize() const override { return Syms.size() * 4; } + void writeTo(uint8_t *Buf) const override; + +private: + std::set Syms; +}; + +// Windows-specific. +// This class represents a block in .reloc section. +// See the PE/COFF spec 5.6 for details. +class BaserelChunk : public Chunk { +public: + BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End); + size_t getSize() const override { return Data.size(); } + void writeTo(uint8_t *Buf) const override; + +private: + std::vector Data; +}; + +class Baserel { +public: + Baserel(uint32_t V, uint8_t Ty) : RVA(V), Type(Ty) {} + explicit Baserel(uint32_t V) : Baserel(V, getDefaultType()) {} + uint8_t getDefaultType(); + + uint32_t RVA; + uint8_t Type; +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/Config.h b/COFF/Config.h new file mode 100644 index 00000000..a5472e93 --- /dev/null +++ b/COFF/Config.h @@ -0,0 +1,151 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CONFIG_H +#define LLD_COFF_CONFIG_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/COFF.h" +#include +#include +#include +#include + +namespace lld { +namespace coff { + +using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; +using llvm::COFF::WindowsSubsystem; +using llvm::StringRef; +class DefinedAbsolute; +class DefinedRelative; +class StringChunk; +class Undefined; + +// Short aliases. +static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; +static const auto ARMNT = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT; +static const auto I386 = llvm::COFF::IMAGE_FILE_MACHINE_I386; + +// Represents an /export option. +struct Export { + StringRef Name; // N in /export:N or /export:E=N + StringRef ExtName; // E in /export:E=N + Undefined *Sym = nullptr; + uint16_t Ordinal = 0; + bool Noname = false; + bool Data = false; + bool Private = false; + + // If an export is a form of /export:foo=dllname.bar, that means + // that foo should be exported as an alias to bar in the DLL. + // ForwardTo is set to "dllname.bar" part. Usually empty. + StringRef ForwardTo; + StringChunk *ForwardChunk = nullptr; + + // True if this /export option was in .drectves section. + bool Directives = false; + StringRef SymbolName; + StringRef ExportName; // Name in DLL + + bool operator==(const Export &E) { + return (Name == E.Name && ExtName == E.ExtName && + Ordinal == E.Ordinal && Noname == E.Noname && + Data == E.Data && Private == E.Private); + } +}; + +// Global configuration. +struct Configuration { + enum ManifestKind { SideBySide, Embed, No }; + bool is64() { return Machine == AMD64; } + + llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; + bool Verbose = false; + WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; + Undefined *Entry = nullptr; + bool NoEntry = false; + std::string OutputFile; + bool DoGC = true; + bool DoICF = true; + bool Relocatable = true; + bool Force = false; + bool Debug = false; + bool WriteSymtab = true; + + // Symbols in this set are considered as live by the garbage collector. + std::set GCRoot; + + std::set NoDefaultLibs; + bool NoDefaultLibAll = false; + + // True if we are creating a DLL. + bool DLL = false; + StringRef Implib; + std::vector Exports; + std::set DelayLoads; + std::map DLLOrder; + Undefined *DelayLoadHelper = nullptr; + + // Used for SafeSEH. + DefinedRelative *SEHTable = nullptr; + DefinedAbsolute *SEHCount = nullptr; + + // Used for /opt:lldlto=N + unsigned LTOOptLevel = 2; + + // Used for /opt:lldltojobs=N + unsigned LTOJobs = 1; + + // Used for /merge:from=to (e.g. /merge:.rdata=.text) + std::map Merge; + + // Used for /section=.name,{DEKPRSW} to set section attributes. + std::map Section; + + // Options for manifest files. + ManifestKind Manifest = SideBySide; + int ManifestID = 1; + StringRef ManifestDependency; + bool ManifestUAC = true; + std::vector ManifestInput; + StringRef ManifestLevel = "'asInvoker'"; + StringRef ManifestUIAccess = "'false'"; + StringRef ManifestFile; + + // Used for /failifmismatch. + std::map MustMatch; + + // Used for /alternatename. + std::map AlternateNames; + + uint64_t ImageBase = -1; + uint64_t StackReserve = 1024 * 1024; + uint64_t StackCommit = 4096; + uint64_t HeapReserve = 1024 * 1024; + uint64_t HeapCommit = 4096; + uint32_t MajorImageVersion = 0; + uint32_t MinorImageVersion = 0; + uint32_t MajorOSVersion = 6; + uint32_t MinorOSVersion = 0; + bool DynamicBase = true; + bool AllowBind = true; + bool NxCompat = true; + bool AllowIsolation = true; + bool TerminalServerAware = true; + bool LargeAddressAware = false; + bool HighEntropyVA = false; +}; + +extern Configuration *Config; + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/DLL.cpp b/COFF/DLL.cpp new file mode 100644 index 00000000..9ac370c1 --- /dev/null +++ b/COFF/DLL.cpp @@ -0,0 +1,571 @@ +//===- DLL.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines various types of chunks for the DLL import or export +// descriptor tables. They are inherently Windows-specific. +// You need to read Microsoft PE/COFF spec to understand details +// about the data structures. +// +// If you are not particularly interested in linking against Windows +// DLL, you can skip this file, and you should still be able to +// understand the rest of the linker. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "DLL.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::COFF; + +namespace lld { +namespace coff { +namespace { + +// Import table + +static int ptrSize() { return Config->is64() ? 8 : 4; } + +// A chunk for the import descriptor table. +class HintNameChunk : public Chunk { +public: + HintNameChunk(StringRef N, uint16_t H) : Name(N), Hint(H) {} + + size_t getSize() const override { + // Starts with 2 byte Hint field, followed by a null-terminated string, + // ends with 0 or 1 byte padding. + return alignTo(Name.size() + 3, 2); + } + + void writeTo(uint8_t *Buf) const override { + write16le(Buf + OutputSectionOff, Hint); + memcpy(Buf + OutputSectionOff + 2, Name.data(), Name.size()); + } + +private: + StringRef Name; + uint16_t Hint; +}; + +// A chunk for the import descriptor table. +class LookupChunk : public Chunk { +public: + explicit LookupChunk(Chunk *C) : HintName(C) {} + size_t getSize() const override { return ptrSize(); } + + void writeTo(uint8_t *Buf) const override { + write32le(Buf + OutputSectionOff, HintName->getRVA()); + } + + Chunk *HintName; +}; + +// A chunk for the import descriptor table. +// This chunk represent import-by-ordinal symbols. +// See Microsoft PE/COFF spec 7.1. Import Header for details. +class OrdinalOnlyChunk : public Chunk { +public: + explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {} + size_t getSize() const override { return ptrSize(); } + + void writeTo(uint8_t *Buf) const override { + // An import-by-ordinal slot has MSB 1 to indicate that + // this is import-by-ordinal (and not import-by-name). + if (Config->is64()) { + write64le(Buf + OutputSectionOff, (1ULL << 63) | Ordinal); + } else { + write32le(Buf + OutputSectionOff, (1ULL << 31) | Ordinal); + } + } + + uint16_t Ordinal; +}; + +// A chunk for the import descriptor table. +class ImportDirectoryChunk : public Chunk { +public: + explicit ImportDirectoryChunk(Chunk *N) : DLLName(N) {} + size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); } + + void writeTo(uint8_t *Buf) const override { + auto *E = (coff_import_directory_table_entry *)(Buf + OutputSectionOff); + E->ImportLookupTableRVA = LookupTab->getRVA(); + E->NameRVA = DLLName->getRVA(); + E->ImportAddressTableRVA = AddressTab->getRVA(); + } + + Chunk *DLLName; + Chunk *LookupTab; + Chunk *AddressTab; +}; + +// A chunk representing null terminator in the import table. +// Contents of this chunk is always null bytes. +class NullChunk : public Chunk { +public: + explicit NullChunk(size_t N) : Size(N) {} + bool hasData() const override { return false; } + size_t getSize() const override { return Size; } + void setAlign(size_t N) { Align = N; } + +private: + size_t Size; +}; + +static std::vector> +binImports(const std::vector &Imports) { + // Group DLL-imported symbols by DLL name because that's how + // symbols are layed out in the import descriptor table. + auto Less = [](const std::string &A, const std::string &B) { + return Config->DLLOrder[A] < Config->DLLOrder[B]; + }; + std::map, + bool(*)(const std::string &, const std::string &)> M(Less); + for (DefinedImportData *Sym : Imports) + M[Sym->getDLLName().lower()].push_back(Sym); + + std::vector> V; + for (auto &P : M) { + // Sort symbols by name for each group. + std::vector &Syms = P.second; + std::sort(Syms.begin(), Syms.end(), + [](DefinedImportData *A, DefinedImportData *B) { + return A->getName() < B->getName(); + }); + V.push_back(std::move(Syms)); + } + return V; +} + +// Export table +// See Microsoft PE/COFF spec 4.3 for details. + +// A chunk for the delay import descriptor table etnry. +class DelayDirectoryChunk : public Chunk { +public: + explicit DelayDirectoryChunk(Chunk *N) : DLLName(N) {} + + size_t getSize() const override { + return sizeof(delay_import_directory_table_entry); + } + + void writeTo(uint8_t *Buf) const override { + auto *E = (delay_import_directory_table_entry *)(Buf + OutputSectionOff); + E->Attributes = 1; + E->Name = DLLName->getRVA(); + E->ModuleHandle = ModuleHandle->getRVA(); + E->DelayImportAddressTable = AddressTab->getRVA(); + E->DelayImportNameTable = NameTab->getRVA(); + } + + Chunk *DLLName; + Chunk *ModuleHandle; + Chunk *AddressTab; + Chunk *NameTab; +}; + +// Initial contents for delay-loaded functions. +// This code calls __delayLoadHelper2 function to resolve a symbol +// and then overwrites its jump table slot with the result +// for subsequent function calls. +static const uint8_t ThunkX64[] = { + 0x51, // push rcx + 0x52, // push rdx + 0x41, 0x50, // push r8 + 0x41, 0x51, // push r9 + 0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h + 0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0 + 0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1 + 0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2 + 0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3 + 0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_] + 0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...] + 0xE8, 0, 0, 0, 0, // call __delayLoadHelper2 + 0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp] + 0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h] + 0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h] + 0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h] + 0x48, 0x83, 0xC4, 0x48, // add rsp, 48h + 0x41, 0x59, // pop r9 + 0x41, 0x58, // pop r8 + 0x5A, // pop rdx + 0x59, // pop rcx + 0xFF, 0xE0, // jmp rax +}; + +static const uint8_t ThunkX86[] = { + 0x51, // push ecx + 0x52, // push edx + 0x68, 0, 0, 0, 0, // push offset ___imp__ + 0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR__dll + 0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8 + 0x5A, // pop edx + 0x59, // pop ecx + 0xFF, 0xE0, // jmp eax +}; + +// A chunk for the delay import thunk. +class ThunkChunkX64 : public Chunk { +public: + ThunkChunkX64(Defined *I, Chunk *D, Defined *H) + : Imp(I), Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(ThunkX64); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf + OutputSectionOff, ThunkX64, sizeof(ThunkX64)); + write32le(Buf + OutputSectionOff + 36, Imp->getRVA() - RVA - 40); + write32le(Buf + OutputSectionOff + 43, Desc->getRVA() - RVA - 47); + write32le(Buf + OutputSectionOff + 48, Helper->getRVA() - RVA - 52); + } + + Defined *Imp = nullptr; + Chunk *Desc = nullptr; + Defined *Helper = nullptr; +}; + +class ThunkChunkX86 : public Chunk { +public: + ThunkChunkX86(Defined *I, Chunk *D, Defined *H) + : Imp(I), Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(ThunkX86); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf + OutputSectionOff, ThunkX86, sizeof(ThunkX86)); + write32le(Buf + OutputSectionOff + 3, Imp->getRVA() + Config->ImageBase); + write32le(Buf + OutputSectionOff + 8, Desc->getRVA() + Config->ImageBase); + write32le(Buf + OutputSectionOff + 13, Helper->getRVA() - RVA - 17); + } + + void getBaserels(std::vector *Res) override { + Res->emplace_back(RVA + 3); + Res->emplace_back(RVA + 8); + } + + Defined *Imp = nullptr; + Chunk *Desc = nullptr; + Defined *Helper = nullptr; +}; + +// A chunk for the import descriptor table. +class DelayAddressChunk : public Chunk { +public: + explicit DelayAddressChunk(Chunk *C) : Thunk(C) {} + size_t getSize() const override { return ptrSize(); } + + void writeTo(uint8_t *Buf) const override { + if (Config->is64()) { + write64le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase); + } else { + write32le(Buf + OutputSectionOff, Thunk->getRVA() + Config->ImageBase); + } + } + + void getBaserels(std::vector *Res) override { + Res->emplace_back(RVA); + } + + Chunk *Thunk; +}; + +// Export table +// Read Microsoft PE/COFF spec 5.3 for details. + +// A chunk for the export descriptor table. +class ExportDirectoryChunk : public Chunk { +public: + ExportDirectoryChunk(int I, int J, Chunk *D, Chunk *A, Chunk *N, Chunk *O) + : MaxOrdinal(I), NameTabSize(J), DLLName(D), AddressTab(A), NameTab(N), + OrdinalTab(O) {} + + size_t getSize() const override { + return sizeof(export_directory_table_entry); + } + + void writeTo(uint8_t *Buf) const override { + auto *E = (export_directory_table_entry *)(Buf + OutputSectionOff); + E->NameRVA = DLLName->getRVA(); + E->OrdinalBase = 0; + E->AddressTableEntries = MaxOrdinal + 1; + E->NumberOfNamePointers = NameTabSize; + E->ExportAddressTableRVA = AddressTab->getRVA(); + E->NamePointerRVA = NameTab->getRVA(); + E->OrdinalTableRVA = OrdinalTab->getRVA(); + } + + uint16_t MaxOrdinal; + uint16_t NameTabSize; + Chunk *DLLName; + Chunk *AddressTab; + Chunk *NameTab; + Chunk *OrdinalTab; +}; + +class AddressTableChunk : public Chunk { +public: + explicit AddressTableChunk(size_t MaxOrdinal) : Size(MaxOrdinal + 1) {} + size_t getSize() const override { return Size * 4; } + + void writeTo(uint8_t *Buf) const override { + for (Export &E : Config->Exports) { + uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4; + if (E.ForwardChunk) { + write32le(P, E.ForwardChunk->getRVA()); + } else { + write32le(P, cast(E.Sym->repl())->getRVA()); + } + } + } + +private: + size_t Size; +}; + +class NamePointersChunk : public Chunk { +public: + explicit NamePointersChunk(std::vector &V) : Chunks(V) {} + size_t getSize() const override { return Chunks.size() * 4; } + + void writeTo(uint8_t *Buf) const override { + uint8_t *P = Buf + OutputSectionOff; + for (Chunk *C : Chunks) { + write32le(P, C->getRVA()); + P += 4; + } + } + +private: + std::vector Chunks; +}; + +class ExportOrdinalChunk : public Chunk { +public: + explicit ExportOrdinalChunk(size_t I) : Size(I) {} + size_t getSize() const override { return Size * 2; } + + void writeTo(uint8_t *Buf) const override { + uint8_t *P = Buf + OutputSectionOff; + for (Export &E : Config->Exports) { + if (E.Noname) + continue; + write16le(P, E.Ordinal); + P += 2; + } + } + +private: + size_t Size; +}; + +} // anonymous namespace + +uint64_t IdataContents::getDirSize() { + return Dirs.size() * sizeof(ImportDirectoryTableEntry); +} + +uint64_t IdataContents::getIATSize() { + return Addresses.size() * ptrSize(); +} + +// Returns a list of .idata contents. +// See Microsoft PE/COFF spec 5.4 for details. +std::vector IdataContents::getChunks() { + create(); + std::vector V; + // The loader assumes a specific order of data. + // Add each type in the correct order. + for (std::unique_ptr &C : Dirs) + V.push_back(C.get()); + for (std::unique_ptr &C : Lookups) + V.push_back(C.get()); + for (std::unique_ptr &C : Addresses) + V.push_back(C.get()); + for (std::unique_ptr &C : Hints) + V.push_back(C.get()); + for (auto &P : DLLNames) { + std::unique_ptr &C = P.second; + V.push_back(C.get()); + } + return V; +} + +void IdataContents::create() { + std::vector> V = binImports(Imports); + + // Create .idata contents for each DLL. + for (std::vector &Syms : V) { + StringRef Name = Syms[0]->getDLLName(); + + // Create lookup and address tables. If they have external names, + // we need to create HintName chunks to store the names. + // If they don't (if they are import-by-ordinals), we store only + // ordinal values to the table. + size_t Base = Lookups.size(); + for (DefinedImportData *S : Syms) { + uint16_t Ord = S->getOrdinal(); + if (S->getExternalName().empty()) { + Lookups.push_back(make_unique(Ord)); + Addresses.push_back(make_unique(Ord)); + continue; + } + auto C = make_unique(S->getExternalName(), Ord); + Lookups.push_back(make_unique(C.get())); + Addresses.push_back(make_unique(C.get())); + Hints.push_back(std::move(C)); + } + // Terminate with null values. + Lookups.push_back(make_unique(ptrSize())); + Addresses.push_back(make_unique(ptrSize())); + + for (int I = 0, E = Syms.size(); I < E; ++I) + Syms[I]->setLocation(Addresses[Base + I].get()); + + // Create the import table header. + if (!DLLNames.count(Name)) + DLLNames[Name] = make_unique(Name); + auto Dir = make_unique(DLLNames[Name].get()); + Dir->LookupTab = Lookups[Base].get(); + Dir->AddressTab = Addresses[Base].get(); + Dirs.push_back(std::move(Dir)); + } + // Add null terminator. + Dirs.push_back(make_unique(sizeof(ImportDirectoryTableEntry))); +} + +std::vector DelayLoadContents::getChunks() { + std::vector V; + for (std::unique_ptr &C : Dirs) + V.push_back(C.get()); + for (std::unique_ptr &C : Names) + V.push_back(C.get()); + for (std::unique_ptr &C : HintNames) + V.push_back(C.get()); + for (auto &P : DLLNames) { + std::unique_ptr &C = P.second; + V.push_back(C.get()); + } + return V; +} + +std::vector DelayLoadContents::getDataChunks() { + std::vector V; + for (std::unique_ptr &C : ModuleHandles) + V.push_back(C.get()); + for (std::unique_ptr &C : Addresses) + V.push_back(C.get()); + return V; +} + +uint64_t DelayLoadContents::getDirSize() { + return Dirs.size() * sizeof(delay_import_directory_table_entry); +} + +void DelayLoadContents::create(Defined *H) { + Helper = H; + std::vector> V = binImports(Imports); + + // Create .didat contents for each DLL. + for (std::vector &Syms : V) { + StringRef Name = Syms[0]->getDLLName(); + + // Create the delay import table header. + if (!DLLNames.count(Name)) + DLLNames[Name] = make_unique(Name); + auto Dir = make_unique(DLLNames[Name].get()); + + size_t Base = Addresses.size(); + for (DefinedImportData *S : Syms) { + Chunk *T = newThunkChunk(S, Dir.get()); + auto A = make_unique(T); + Addresses.push_back(std::move(A)); + Thunks.push_back(std::unique_ptr(T)); + StringRef ExtName = S->getExternalName(); + if (ExtName.empty()) { + Names.push_back(make_unique(S->getOrdinal())); + } else { + auto C = make_unique(ExtName, 0); + Names.push_back(make_unique(C.get())); + HintNames.push_back(std::move(C)); + } + } + // Terminate with null values. + Addresses.push_back(make_unique(8)); + Names.push_back(make_unique(8)); + + for (int I = 0, E = Syms.size(); I < E; ++I) + Syms[I]->setLocation(Addresses[Base + I].get()); + auto *MH = new NullChunk(8); + MH->setAlign(8); + ModuleHandles.push_back(std::unique_ptr(MH)); + + // Fill the delay import table header fields. + Dir->ModuleHandle = MH; + Dir->AddressTab = Addresses[Base].get(); + Dir->NameTab = Names[Base].get(); + Dirs.push_back(std::move(Dir)); + } + // Add null terminator. + Dirs.push_back( + make_unique(sizeof(delay_import_directory_table_entry))); +} + +Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) { + switch (Config->Machine) { + case AMD64: + return new ThunkChunkX64(S, Dir, Helper); + case I386: + return new ThunkChunkX86(S, Dir, Helper); + default: + llvm_unreachable("unsupported machine type"); + } +} + +EdataContents::EdataContents() { + uint16_t MaxOrdinal = 0; + for (Export &E : Config->Exports) + MaxOrdinal = std::max(MaxOrdinal, E.Ordinal); + + auto *DLLName = new StringChunk(sys::path::filename(Config->OutputFile)); + auto *AddressTab = new AddressTableChunk(MaxOrdinal); + std::vector Names; + for (Export &E : Config->Exports) + if (!E.Noname) + Names.push_back(new StringChunk(E.ExportName)); + + std::vector Forwards; + for (Export &E : Config->Exports) { + if (E.ForwardTo.empty()) + continue; + E.ForwardChunk = new StringChunk(E.ForwardTo); + Forwards.push_back(E.ForwardChunk); + } + + auto *NameTab = new NamePointersChunk(Names); + auto *OrdinalTab = new ExportOrdinalChunk(Names.size()); + auto *Dir = new ExportDirectoryChunk(MaxOrdinal, Names.size(), DLLName, + AddressTab, NameTab, OrdinalTab); + Chunks.push_back(std::unique_ptr(Dir)); + Chunks.push_back(std::unique_ptr(DLLName)); + Chunks.push_back(std::unique_ptr(AddressTab)); + Chunks.push_back(std::unique_ptr(NameTab)); + Chunks.push_back(std::unique_ptr(OrdinalTab)); + for (Chunk *C : Names) + Chunks.push_back(std::unique_ptr(C)); + for (Chunk *C : Forwards) + Chunks.push_back(std::unique_ptr(C)); +} + +} // namespace coff +} // namespace lld diff --git a/COFF/DLL.h b/COFF/DLL.h new file mode 100644 index 00000000..83a12df1 --- /dev/null +++ b/COFF/DLL.h @@ -0,0 +1,84 @@ +//===- DLL.h ----------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_DLL_H +#define LLD_COFF_DLL_H + +#include "Chunks.h" +#include "Symbols.h" + +namespace lld { +namespace coff { + +// Windows-specific. +// IdataContents creates all chunks for the DLL import table. +// You are supposed to call add() to add symbols and then +// call getChunks() to get a list of chunks. +class IdataContents { +public: + void add(DefinedImportData *Sym) { Imports.push_back(Sym); } + bool empty() { return Imports.empty(); } + std::vector getChunks(); + + uint64_t getDirRVA() { return Dirs[0]->getRVA(); } + uint64_t getDirSize(); + uint64_t getIATRVA() { return Addresses[0]->getRVA(); } + uint64_t getIATSize(); + +private: + void create(); + + std::vector Imports; + std::vector> Dirs; + std::vector> Lookups; + std::vector> Addresses; + std::vector> Hints; + std::map> DLLNames; +}; + +// Windows-specific. +// DelayLoadContents creates all chunks for the delay-load DLL import table. +class DelayLoadContents { +public: + void add(DefinedImportData *Sym) { Imports.push_back(Sym); } + bool empty() { return Imports.empty(); } + void create(Defined *Helper); + std::vector getChunks(); + std::vector getDataChunks(); + std::vector> &getCodeChunks() { return Thunks; } + + uint64_t getDirRVA() { return Dirs[0]->getRVA(); } + uint64_t getDirSize(); + +private: + Chunk *newThunkChunk(DefinedImportData *S, Chunk *Dir); + + Defined *Helper; + std::vector Imports; + std::vector> Dirs; + std::vector> ModuleHandles; + std::vector> Addresses; + std::vector> Names; + std::vector> HintNames; + std::vector> Thunks; + std::map> DLLNames; +}; + +// Windows-specific. +// EdataContents creates all chunks for the DLL export table. +class EdataContents { +public: + EdataContents(); + std::vector> Chunks; +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp new file mode 100644 index 00000000..bb6a60e4 --- /dev/null +++ b/COFF/Driver.cpp @@ -0,0 +1,694 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/Optional.h" +#include "llvm/LibDriver/LibDriver.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; +using namespace llvm::COFF; +using llvm::sys::Process; +using llvm::sys::fs::OpenFlags; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; + +namespace lld { +namespace coff { + +Configuration *Config; +LinkerDriver *Driver; + +bool link(llvm::ArrayRef Args) { + Configuration C; + LinkerDriver D; + Config = &C; + Driver = &D; + Driver->link(Args); + return true; +} + +// Drop directory components and replace extension with ".exe" or ".dll". +static std::string getOutputPath(StringRef Path) { + auto P = Path.find_last_of("\\/"); + StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1); + const char* E = Config->DLL ? ".dll" : ".exe"; + return (S.substr(0, S.rfind('.')) + E).str(); +} + +// Opens a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +MemoryBufferRef LinkerDriver::openFile(StringRef Path) { + std::unique_ptr MB = + check(MemoryBuffer::getFile(Path), "could not open " + Path); + MemoryBufferRef MBRef = MB->getMemBufferRef(); + OwningMBs.push_back(std::move(MB)); // take ownership + return MBRef; +} + +static std::unique_ptr createFile(MemoryBufferRef MB) { + // File type is detected by contents, not by file extension. + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::archive) + return std::unique_ptr(new ArchiveFile(MB)); + if (Magic == file_magic::bitcode) + return std::unique_ptr(new BitcodeFile(MB)); + if (Config->OutputFile == "") + Config->OutputFile = getOutputPath(MB.getBufferIdentifier()); + return std::unique_ptr(new ObjectFile(MB)); +} + +static bool isDecorated(StringRef Sym) { + return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +} + +// Parses .drectve section contents and returns a list of files +// specified by /defaultlib. +void LinkerDriver::parseDirectives(StringRef S) { + llvm::opt::InputArgList Args = Parser.parse(S); + + for (auto *Arg : Args) { + switch (Arg->getOption().getID()) { + case OPT_alternatename: + parseAlternateName(Arg->getValue()); + break; + case OPT_defaultlib: + if (Optional Path = findLib(Arg->getValue())) { + MemoryBufferRef MB = openFile(*Path); + Symtab.addFile(createFile(MB)); + } + break; + case OPT_export: { + Export E = parseExport(Arg->getValue()); + E.Directives = true; + Config->Exports.push_back(E); + break; + } + case OPT_failifmismatch: + checkFailIfMismatch(Arg->getValue()); + break; + case OPT_incl: + addUndefined(Arg->getValue()); + break; + case OPT_merge: + parseMerge(Arg->getValue()); + break; + case OPT_nodefaultlib: + Config->NoDefaultLibs.insert(doFindLib(Arg->getValue())); + break; + case OPT_section: + parseSection(Arg->getValue()); + break; + case OPT_editandcontinue: + case OPT_fastfail: + case OPT_guardsym: + case OPT_throwingnew: + break; + default: + fatal(Arg->getSpelling() + " is not allowed in .drectve"); + } + } +} + +// Find file from search paths. You can omit ".obj", this function takes +// care of that. Note that the returned path is not guaranteed to exist. +StringRef LinkerDriver::doFindFile(StringRef Filename) { + bool hasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); + if (hasPathSep) + return Filename; + bool hasExt = (Filename.find('.') != StringRef::npos); + for (StringRef Dir : SearchPaths) { + SmallString<128> Path = Dir; + llvm::sys::path::append(Path, Filename); + if (llvm::sys::fs::exists(Path.str())) + return Alloc.save(Path.str()); + if (!hasExt) { + Path.append(".obj"); + if (llvm::sys::fs::exists(Path.str())) + return Alloc.save(Path.str()); + } + } + return Filename; +} + +// Resolves a file path. This never returns the same path +// (in that case, it returns None). +Optional LinkerDriver::findFile(StringRef Filename) { + StringRef Path = doFindFile(Filename); + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +// Find library file from search path. +StringRef LinkerDriver::doFindLib(StringRef Filename) { + // Add ".lib" to Filename if that has no file extension. + bool hasExt = (Filename.find('.') != StringRef::npos); + if (!hasExt) + Filename = Alloc.save(Filename + ".lib"); + return doFindFile(Filename); +} + +// Resolves a library path. /nodefaultlib options are taken into +// consideration. This never returns the same path (in that case, +// it returns None). +Optional LinkerDriver::findLib(StringRef Filename) { + if (Config->NoDefaultLibAll) + return None; + StringRef Path = doFindLib(Filename); + if (Config->NoDefaultLibs.count(Path)) + return None; + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +// Parses LIB environment which contains a list of search paths. +void LinkerDriver::addLibSearchPaths() { + Optional EnvOpt = Process::GetEnv("LIB"); + if (!EnvOpt.hasValue()) + return; + StringRef Env = Alloc.save(*EnvOpt); + while (!Env.empty()) { + StringRef Path; + std::tie(Path, Env) = Env.split(';'); + SearchPaths.push_back(Path); + } +} + +Undefined *LinkerDriver::addUndefined(StringRef Name) { + Undefined *U = Symtab.addUndefined(Name); + Config->GCRoot.insert(U); + return U; +} + +// Symbol names are mangled by appending "_" prefix on x86. +StringRef LinkerDriver::mangle(StringRef Sym) { + assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN); + if (Config->Machine == I386) + return Alloc.save("_" + Sym); + return Sym; +} + +// Windows specific -- find default entry point name. +StringRef LinkerDriver::findDefaultEntry() { + // User-defined main functions and their corresponding entry points. + static const char *Entries[][2] = { + {"main", "mainCRTStartup"}, + {"wmain", "wmainCRTStartup"}, + {"WinMain", "WinMainCRTStartup"}, + {"wWinMain", "wWinMainCRTStartup"}, + }; + for (auto E : Entries) { + StringRef Entry = Symtab.findMangle(mangle(E[0])); + if (!Entry.empty() && !isa(Symtab.find(Entry)->Body)) + return mangle(E[1]); + } + return ""; +} + +WindowsSubsystem LinkerDriver::inferSubsystem() { + if (Config->DLL) + return IMAGE_SUBSYSTEM_WINDOWS_GUI; + if (Symtab.findUnderscore("main") || Symtab.findUnderscore("wmain")) + return IMAGE_SUBSYSTEM_WINDOWS_CUI; + if (Symtab.findUnderscore("WinMain") || Symtab.findUnderscore("wWinMain")) + return IMAGE_SUBSYSTEM_WINDOWS_GUI; + return IMAGE_SUBSYSTEM_UNKNOWN; +} + +static uint64_t getDefaultImageBase() { + if (Config->is64()) + return Config->DLL ? 0x180000000 : 0x140000000; + return Config->DLL ? 0x10000000 : 0x400000; +} + +void LinkerDriver::link(llvm::ArrayRef ArgsArr) { + // If the first command line argument is "/lib", link.exe acts like lib.exe. + // We call our own implementation of lib.exe that understands bitcode files. + if (ArgsArr.size() > 1 && StringRef(ArgsArr[1]).equals_lower("/lib")) { + if (llvm::libDriverMain(ArgsArr.slice(1)) != 0) + fatal("lib failed"); + return; + } + + // Needed for LTO. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllDisassemblers(); + + // Parse command line options. + llvm::opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); + + // Handle /help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + + if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) + fatal("no input files"); + + // Construct search path list. + SearchPaths.push_back(""); + for (auto *Arg : Args.filtered(OPT_libpath)) + SearchPaths.push_back(Arg->getValue()); + addLibSearchPaths(); + + // Handle /out + if (auto *Arg = Args.getLastArg(OPT_out)) + Config->OutputFile = Arg->getValue(); + + // Handle /verbose + if (Args.hasArg(OPT_verbose)) + Config->Verbose = true; + + // Handle /force or /force:unresolved + if (Args.hasArg(OPT_force) || Args.hasArg(OPT_force_unresolved)) + Config->Force = true; + + // Handle /debug + if (Args.hasArg(OPT_debug)) + Config->Debug = true; + + // Handle /noentry + if (Args.hasArg(OPT_noentry)) { + if (!Args.hasArg(OPT_dll)) + fatal("/noentry must be specified with /dll"); + Config->NoEntry = true; + } + + // Handle /dll + if (Args.hasArg(OPT_dll)) { + Config->DLL = true; + Config->ManifestID = 2; + } + + // Handle /fixed + if (Args.hasArg(OPT_fixed)) { + if (Args.hasArg(OPT_dynamicbase)) + fatal("/fixed must not be specified with /dynamicbase"); + Config->Relocatable = false; + Config->DynamicBase = false; + } + + // Handle /machine + if (auto *Arg = Args.getLastArg(OPT_machine)) + Config->Machine = getMachineType(Arg->getValue()); + + // Handle /nodefaultlib: + for (auto *Arg : Args.filtered(OPT_nodefaultlib)) + Config->NoDefaultLibs.insert(doFindLib(Arg->getValue())); + + // Handle /nodefaultlib + if (Args.hasArg(OPT_nodefaultlib_all)) + Config->NoDefaultLibAll = true; + + // Handle /base + if (auto *Arg = Args.getLastArg(OPT_base)) + parseNumbers(Arg->getValue(), &Config->ImageBase); + + // Handle /stack + if (auto *Arg = Args.getLastArg(OPT_stack)) + parseNumbers(Arg->getValue(), &Config->StackReserve, &Config->StackCommit); + + // Handle /heap + if (auto *Arg = Args.getLastArg(OPT_heap)) + parseNumbers(Arg->getValue(), &Config->HeapReserve, &Config->HeapCommit); + + // Handle /version + if (auto *Arg = Args.getLastArg(OPT_version)) + parseVersion(Arg->getValue(), &Config->MajorImageVersion, + &Config->MinorImageVersion); + + // Handle /subsystem + if (auto *Arg = Args.getLastArg(OPT_subsystem)) + parseSubsystem(Arg->getValue(), &Config->Subsystem, &Config->MajorOSVersion, + &Config->MinorOSVersion); + + // Handle /alternatename + for (auto *Arg : Args.filtered(OPT_alternatename)) + parseAlternateName(Arg->getValue()); + + // Handle /include + for (auto *Arg : Args.filtered(OPT_incl)) + addUndefined(Arg->getValue()); + + // Handle /implib + if (auto *Arg = Args.getLastArg(OPT_implib)) + Config->Implib = Arg->getValue(); + + // Handle /opt + for (auto *Arg : Args.filtered(OPT_opt)) { + std::string Str = StringRef(Arg->getValue()).lower(); + SmallVector Vec; + StringRef(Str).split(Vec, ','); + for (StringRef S : Vec) { + if (S == "noref") { + Config->DoGC = false; + Config->DoICF = false; + continue; + } + if (S == "icf" || StringRef(S).startswith("icf=")) { + Config->DoICF = true; + continue; + } + if (S == "noicf") { + Config->DoICF = false; + continue; + } + if (StringRef(S).startswith("lldlto=")) { + StringRef OptLevel = StringRef(S).substr(7); + if (OptLevel.getAsInteger(10, Config->LTOOptLevel) || + Config->LTOOptLevel > 3) + fatal("/opt:lldlto: invalid optimization level: " + OptLevel); + continue; + } + if (StringRef(S).startswith("lldltojobs=")) { + StringRef Jobs = StringRef(S).substr(11); + if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0) + fatal("/opt:lldltojobs: invalid job count: " + Jobs); + continue; + } + if (S != "ref" && S != "lbr" && S != "nolbr") + fatal("/opt: unknown option: " + S); + } + } + + // Handle /failifmismatch + for (auto *Arg : Args.filtered(OPT_failifmismatch)) + checkFailIfMismatch(Arg->getValue()); + + // Handle /merge + for (auto *Arg : Args.filtered(OPT_merge)) + parseMerge(Arg->getValue()); + + // Handle /section + for (auto *Arg : Args.filtered(OPT_section)) + parseSection(Arg->getValue()); + + // Handle /manifest + if (auto *Arg = Args.getLastArg(OPT_manifest_colon)) + parseManifest(Arg->getValue()); + + // Handle /manifestuac + if (auto *Arg = Args.getLastArg(OPT_manifestuac)) + parseManifestUAC(Arg->getValue()); + + // Handle /manifestdependency + if (auto *Arg = Args.getLastArg(OPT_manifestdependency)) + Config->ManifestDependency = Arg->getValue(); + + // Handle /manifestfile + if (auto *Arg = Args.getLastArg(OPT_manifestfile)) + Config->ManifestFile = Arg->getValue(); + + // Handle /manifestinput + for (auto *Arg : Args.filtered(OPT_manifestinput)) + Config->ManifestInput.push_back(Arg->getValue()); + + // Handle miscellaneous boolean flags. + if (Args.hasArg(OPT_allowbind_no)) + Config->AllowBind = false; + if (Args.hasArg(OPT_allowisolation_no)) + Config->AllowIsolation = false; + if (Args.hasArg(OPT_dynamicbase_no)) + Config->DynamicBase = false; + if (Args.hasArg(OPT_nxcompat_no)) + Config->NxCompat = false; + if (Args.hasArg(OPT_tsaware_no)) + Config->TerminalServerAware = false; + if (Args.hasArg(OPT_nosymtab)) + Config->WriteSymtab = false; + + // Create a list of input files. Files can be given as arguments + // for /defaultlib option. + std::vector Paths; + std::vector MBs; + for (auto *Arg : Args.filtered(OPT_INPUT)) + if (Optional Path = findFile(Arg->getValue())) + Paths.push_back(*Path); + for (auto *Arg : Args.filtered(OPT_defaultlib)) + if (Optional Path = findLib(Arg->getValue())) + Paths.push_back(*Path); + for (StringRef Path : Paths) + MBs.push_back(openFile(Path)); + + // Windows specific -- Create a resource file containing a manifest file. + if (Config->Manifest == Configuration::Embed) { + std::unique_ptr MB = createManifestRes(); + MBs.push_back(MB->getMemBufferRef()); + OwningMBs.push_back(std::move(MB)); // take ownership + } + + // Windows specific -- Input files can be Windows resource files (.res files). + // We invoke cvtres.exe to convert resource files to a regular COFF file + // then link the result file normally. + std::vector Resources; + auto NotResource = [](MemoryBufferRef MB) { + return identify_magic(MB.getBuffer()) != file_magic::windows_resource; + }; + auto It = std::stable_partition(MBs.begin(), MBs.end(), NotResource); + if (It != MBs.end()) { + Resources.insert(Resources.end(), It, MBs.end()); + MBs.erase(It, MBs.end()); + } + + // Read all input files given via the command line. Note that step() + // doesn't read files that are specified by directive sections. + for (MemoryBufferRef MB : MBs) + Symtab.addFile(createFile(MB)); + Symtab.step(); + + // Determine machine type and check if all object files are + // for the same CPU type. Note that this needs to be done before + // any call to mangle(). + for (std::unique_ptr &File : Symtab.getFiles()) { + MachineTypes MT = File->getMachineType(); + if (MT == IMAGE_FILE_MACHINE_UNKNOWN) + continue; + if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + Config->Machine = MT; + continue; + } + if (Config->Machine != MT) + fatal(File->getShortName() + ": machine type " + machineToStr(MT) + + " conflicts with " + machineToStr(Config->Machine)); + } + if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { + llvm::errs() << "warning: /machine is not specified. x64 is assumed.\n"; + Config->Machine = AMD64; + } + + // Windows specific -- Convert Windows resource files to a COFF file. + if (!Resources.empty()) { + std::unique_ptr MB = convertResToCOFF(Resources); + Symtab.addFile(createFile(MB->getMemBufferRef())); + OwningMBs.push_back(std::move(MB)); // take ownership + } + + // Handle /largeaddressaware + if (Config->is64() || Args.hasArg(OPT_largeaddressaware)) + Config->LargeAddressAware = true; + + // Handle /highentropyva + if (Config->is64() && !Args.hasArg(OPT_highentropyva_no)) + Config->HighEntropyVA = true; + + // Handle /entry and /dll + if (auto *Arg = Args.getLastArg(OPT_entry)) { + Config->Entry = addUndefined(mangle(Arg->getValue())); + } else if (Args.hasArg(OPT_dll) && !Config->NoEntry) { + StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12" + : "_DllMainCRTStartup"; + Config->Entry = addUndefined(S); + } else if (!Config->NoEntry) { + // Windows specific -- If entry point name is not given, we need to + // infer that from user-defined entry name. + StringRef S = findDefaultEntry(); + if (S.empty()) + fatal("entry point must be defined"); + Config->Entry = addUndefined(S); + if (Config->Verbose) + llvm::outs() << "Entry name inferred: " << S << "\n"; + } + + // Handle /export + for (auto *Arg : Args.filtered(OPT_export)) { + Export E = parseExport(Arg->getValue()); + if (Config->Machine == I386) { + if (!isDecorated(E.Name)) + E.Name = Alloc.save("_" + E.Name); + if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + E.ExtName = Alloc.save("_" + E.ExtName); + } + Config->Exports.push_back(E); + } + + // Handle /def + if (auto *Arg = Args.getLastArg(OPT_deffile)) { + MemoryBufferRef MB = openFile(Arg->getValue()); + // parseModuleDefs mutates Config object. + parseModuleDefs(MB, &Alloc); + } + + // Handle /delayload + for (auto *Arg : Args.filtered(OPT_delayload)) { + Config->DelayLoads.insert(StringRef(Arg->getValue()).lower()); + if (Config->Machine == I386) { + Config->DelayLoadHelper = addUndefined("___delayLoadHelper2@8"); + } else { + Config->DelayLoadHelper = addUndefined("__delayLoadHelper2"); + } + } + + // Set default image base if /base is not given. + if (Config->ImageBase == uint64_t(-1)) + Config->ImageBase = getDefaultImageBase(); + + Symtab.addRelative(mangle("__ImageBase"), 0); + if (Config->Machine == I386) { + Config->SEHTable = Symtab.addRelative("___safe_se_handler_table", 0); + Config->SEHCount = Symtab.addAbsolute("___safe_se_handler_count", 0); + } + + // We do not support /guard:cf (control flow protection) yet. + // Define CFG symbols anyway so that we can link MSVC 2015 CRT. + Symtab.addAbsolute(mangle("__guard_fids_table"), 0); + Symtab.addAbsolute(mangle("__guard_fids_count"), 0); + Symtab.addAbsolute(mangle("__guard_flags"), 0x100); + + // Read as much files as we can from directives sections. + Symtab.run(); + + // Resolve auxiliary symbols until we get a convergence. + // (Trying to resolve a symbol may trigger a Lazy symbol to load a new file. + // A new file may contain a directive section to add new command line options. + // That's why we have to repeat until converge.) + for (;;) { + // Windows specific -- if entry point is not found, + // search for its mangled names. + if (Config->Entry) + Symtab.mangleMaybe(Config->Entry); + + // Windows specific -- Make sure we resolve all dllexported symbols. + for (Export &E : Config->Exports) { + if (!E.ForwardTo.empty()) + continue; + E.Sym = addUndefined(E.Name); + if (!E.Directives) + Symtab.mangleMaybe(E.Sym); + } + + // Add weak aliases. Weak aliases is a mechanism to give remaining + // undefined symbols final chance to be resolved successfully. + for (auto Pair : Config->AlternateNames) { + StringRef From = Pair.first; + StringRef To = Pair.second; + Symbol *Sym = Symtab.find(From); + if (!Sym) + continue; + if (auto *U = dyn_cast(Sym->Body)) + if (!U->WeakAlias) + U->WeakAlias = Symtab.addUndefined(To); + } + + // Windows specific -- if __load_config_used can be resolved, resolve it. + if (Symtab.findUnderscore("_load_config_used")) + addUndefined(mangle("_load_config_used")); + + if (Symtab.queueEmpty()) + break; + Symtab.run(); + } + + // Do LTO by compiling bitcode input files to a set of native COFF files then + // link those files. + Symtab.addCombinedLTOObjects(); + + // Make sure we have resolved all symbols. + Symtab.reportRemainingUndefines(/*Resolve=*/true); + + // Windows specific -- if no /subsystem is given, we need to infer + // that from entry point name. + if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { + Config->Subsystem = inferSubsystem(); + if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) + fatal("subsystem must be defined"); + } + + // Handle /safeseh. + if (Args.hasArg(OPT_safeseh)) + for (ObjectFile *File : Symtab.ObjectFiles) + if (!File->SEHCompat) + fatal("/safeseh: " + File->getName() + " is not compatible with SEH"); + + // Windows specific -- when we are creating a .dll file, we also + // need to create a .lib file. + if (!Config->Exports.empty() || Config->DLL) { + fixupExports(); + writeImportLibrary(); + assignExportOrdinals(); + } + + // Windows specific -- Create a side-by-side manifest file. + if (Config->Manifest == Configuration::SideBySide) + createSideBySideManifest(); + + // Create a dummy PDB file to satisfy build sytem rules. + if (auto *Arg = Args.getLastArg(OPT_pdb)) + createPDB(Arg->getValue()); + + // Identify unreferenced COMDAT sections. + if (Config->DoGC) + markLive(Symtab.getChunks()); + + // Identify identical COMDAT sections to merge them. + if (Config->DoICF) + doICF(Symtab.getChunks()); + + // Write the result. + writeResult(&Symtab); + + // Create a symbol map file containing symbol VAs and their names + // to help debugging. + if (auto *Arg = Args.getLastArg(OPT_lldmap)) { + std::error_code EC; + llvm::raw_fd_ostream Out(Arg->getValue(), EC, OpenFlags::F_Text); + if (EC) + fatal(EC, "could not create the symbol map"); + Symtab.printMap(Out); + } + // Call exit to avoid calling destructors. + exit(0); +} + +} // namespace coff +} // namespace lld diff --git a/COFF/Driver.h b/COFF/Driver.h new file mode 100644 index 00000000..23969ee8 --- /dev/null +++ b/COFF/Driver.h @@ -0,0 +1,177 @@ +//===- Driver.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_DRIVER_H +#define LLD_COFF_DRIVER_H + +#include "Config.h" +#include "SymbolTable.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/COFF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/StringSaver.h" +#include +#include +#include + +namespace lld { +namespace coff { + +class LinkerDriver; +extern LinkerDriver *Driver; + +using llvm::COFF::MachineTypes; +using llvm::COFF::WindowsSubsystem; +using llvm::Optional; +class InputFile; + +// Implemented in MarkLive.cpp. +void markLive(const std::vector &Chunks); + +// Implemented in ICF.cpp. +void doICF(const std::vector &Chunks); + +class ArgParser { +public: + ArgParser() : Alloc(AllocAux) {} + // Parses command line options. + llvm::opt::InputArgList parse(llvm::ArrayRef Args); + + // Concatenate LINK environment varirable and given arguments and parse them. + llvm::opt::InputArgList parseLINK(llvm::ArrayRef Args); + + // Tokenizes a given string and then parses as command line options. + llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); } + +private: + std::vector tokenize(StringRef S); + + std::vector replaceResponseFiles(std::vector); + + llvm::BumpPtrAllocator AllocAux; + llvm::StringSaver Alloc; +}; + +class LinkerDriver { +public: + LinkerDriver() : Alloc(AllocAux) {} + void link(llvm::ArrayRef Args); + + // Used by the resolver to parse .drectve section contents. + void parseDirectives(StringRef S); + +private: + llvm::BumpPtrAllocator AllocAux; + llvm::StringSaver Alloc; + ArgParser Parser; + SymbolTable Symtab; + + // Opens a file. Path has to be resolved already. + MemoryBufferRef openFile(StringRef Path); + + // Searches a file from search paths. + Optional findFile(StringRef Filename); + Optional findLib(StringRef Filename); + StringRef doFindFile(StringRef Filename); + StringRef doFindLib(StringRef Filename); + + // Parses LIB environment which contains a list of search paths. + void addLibSearchPaths(); + + // Library search path. The first element is always "" (current directory). + std::vector SearchPaths; + std::set VisitedFiles; + + Undefined *addUndefined(StringRef Sym); + StringRef mangle(StringRef Sym); + + // Windows specific -- "main" is not the only main function in Windows. + // You can choose one from these four -- {w,}{WinMain,main}. + // There are four different entry point functions for them, + // {w,}{WinMain,main}CRTStartup, respectively. The linker needs to + // choose the right one depending on which "main" function is defined. + // This function looks up the symbol table and resolve corresponding + // entry point name. + StringRef findDefaultEntry(); + WindowsSubsystem inferSubsystem(); + + // Driver is the owner of all opened files. + // InputFiles have MemoryBufferRefs to them. + std::vector> OwningMBs; +}; + +void parseModuleDefs(MemoryBufferRef MB, llvm::StringSaver *Alloc); +void writeImportLibrary(); + +// Functions below this line are defined in DriverUtils.cpp. + +void printHelp(const char *Argv0); + +// For /machine option. +MachineTypes getMachineType(StringRef Arg); +StringRef machineToStr(MachineTypes MT); + +// Parses a string in the form of "[,]". +void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size = nullptr); + +// Parses a string in the form of "[.]". +// Minor's default value is 0. +void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor); + +// Parses a string in the form of "[,[.]]". +void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, + uint32_t *Minor); + +void parseAlternateName(StringRef); +void parseMerge(StringRef); +void parseSection(StringRef); + +// Parses a string in the form of "EMBED[,=]|NO". +void parseManifest(StringRef Arg); + +// Parses a string in the form of "level=|uiAccess=" +void parseManifestUAC(StringRef Arg); + +// Create a resource file containing a manifest XML. +std::unique_ptr createManifestRes(); +void createSideBySideManifest(); + +// Used for dllexported symbols. +Export parseExport(StringRef Arg); +void fixupExports(); +void assignExportOrdinals(); + +// Parses a string in the form of "key=value" and check +// if value matches previous values for the key. +// This feature used in the directive section to reject +// incompatible objects. +void checkFailIfMismatch(StringRef Arg); + +// Convert Windows resource files (.res files) to a .obj file +// using cvtres.exe. +std::unique_ptr +convertResToCOFF(const std::vector &MBs); + +void createPDB(StringRef Path); + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/DriverUtils.cpp b/COFF/DriverUtils.cpp new file mode 100644 index 00000000..5d7dc2bc --- /dev/null +++ b/COFF/DriverUtils.cpp @@ -0,0 +1,659 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Symbols.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm::COFF; +using namespace llvm; +using llvm::cl::ExpandResponseFiles; +using llvm::cl::TokenizeWindowsCommandLine; +using llvm::sys::Process; + +namespace lld { +namespace coff { +namespace { + +class Executor { +public: + explicit Executor(StringRef S) : Saver(Alloc), Prog(Saver.save(S)) {} + void add(StringRef S) { Args.push_back(Saver.save(S)); } + void add(std::string &S) { Args.push_back(Saver.save(S)); } + void add(Twine S) { Args.push_back(Saver.save(S)); } + void add(const char *S) { Args.push_back(Saver.save(S)); } + + void run() { + ErrorOr ExeOrErr = llvm::sys::findProgramByName(Prog); + if (auto EC = ExeOrErr.getError()) + fatal(EC, "unable to find " + Prog + " in PATH: "); + const char *Exe = Saver.save(*ExeOrErr); + Args.insert(Args.begin(), Exe); + Args.push_back(nullptr); + if (llvm::sys::ExecuteAndWait(Args[0], Args.data()) != 0) { + for (const char *S : Args) + if (S) + llvm::errs() << S << " "; + fatal("ExecuteAndWait failed"); + } + } + +private: + llvm::BumpPtrAllocator Alloc; + llvm::StringSaver Saver; + StringRef Prog; + std::vector Args; +}; + +} // anonymous namespace + +// Returns /machine's value. +MachineTypes getMachineType(StringRef S) { + MachineTypes MT = StringSwitch(S.lower()) + .Case("x64", AMD64) + .Case("amd64", AMD64) + .Case("x86", I386) + .Case("i386", I386) + .Case("arm", ARMNT) + .Default(IMAGE_FILE_MACHINE_UNKNOWN); + if (MT != IMAGE_FILE_MACHINE_UNKNOWN) + return MT; + fatal("unknown /machine argument: " + S); +} + +StringRef machineToStr(MachineTypes MT) { + switch (MT) { + case ARMNT: + return "arm"; + case AMD64: + return "x64"; + case I386: + return "x86"; + default: + llvm_unreachable("unknown machine type"); + } +} + +// Parses a string in the form of "[,]". +void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size) { + StringRef S1, S2; + std::tie(S1, S2) = Arg.split(','); + if (S1.getAsInteger(0, *Addr)) + fatal("invalid number: " + S1); + if (Size && !S2.empty() && S2.getAsInteger(0, *Size)) + fatal("invalid number: " + S2); +} + +// Parses a string in the form of "[.]". +// If second number is not present, Minor is set to 0. +void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) { + StringRef S1, S2; + std::tie(S1, S2) = Arg.split('.'); + if (S1.getAsInteger(0, *Major)) + fatal("invalid number: " + S1); + *Minor = 0; + if (!S2.empty() && S2.getAsInteger(0, *Minor)) + fatal("invalid number: " + S2); +} + +// Parses a string in the form of "[,[.]]". +void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, + uint32_t *Minor) { + StringRef SysStr, Ver; + std::tie(SysStr, Ver) = Arg.split(','); + *Sys = StringSwitch(SysStr.lower()) + .Case("boot_application", IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION) + .Case("console", IMAGE_SUBSYSTEM_WINDOWS_CUI) + .Case("efi_application", IMAGE_SUBSYSTEM_EFI_APPLICATION) + .Case("efi_boot_service_driver", IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER) + .Case("efi_rom", IMAGE_SUBSYSTEM_EFI_ROM) + .Case("efi_runtime_driver", IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER) + .Case("native", IMAGE_SUBSYSTEM_NATIVE) + .Case("posix", IMAGE_SUBSYSTEM_POSIX_CUI) + .Case("windows", IMAGE_SUBSYSTEM_WINDOWS_GUI) + .Default(IMAGE_SUBSYSTEM_UNKNOWN); + if (*Sys == IMAGE_SUBSYSTEM_UNKNOWN) + fatal("unknown subsystem: " + SysStr); + if (!Ver.empty()) + parseVersion(Ver, Major, Minor); +} + +// Parse a string of the form of "=". +// Results are directly written to Config. +void parseAlternateName(StringRef S) { + StringRef From, To; + std::tie(From, To) = S.split('='); + if (From.empty() || To.empty()) + fatal("/alternatename: invalid argument: " + S); + auto It = Config->AlternateNames.find(From); + if (It != Config->AlternateNames.end() && It->second != To) + fatal("/alternatename: conflicts: " + S); + Config->AlternateNames.insert(It, std::make_pair(From, To)); +} + +// Parse a string of the form of "=". +// Results are directly written to Config. +void parseMerge(StringRef S) { + StringRef From, To; + std::tie(From, To) = S.split('='); + if (From.empty() || To.empty()) + fatal("/merge: invalid argument: " + S); + auto Pair = Config->Merge.insert(std::make_pair(From, To)); + bool Inserted = Pair.second; + if (!Inserted) { + StringRef Existing = Pair.first->second; + if (Existing != To) + llvm::errs() << "warning: " << S << ": already merged into " + << Existing << "\n"; + } +} + +static uint32_t parseSectionAttributes(StringRef S) { + uint32_t Ret = 0; + for (char C : S.lower()) { + switch (C) { + case 'd': + Ret |= IMAGE_SCN_MEM_DISCARDABLE; + break; + case 'e': + Ret |= IMAGE_SCN_MEM_EXECUTE; + break; + case 'k': + Ret |= IMAGE_SCN_MEM_NOT_CACHED; + break; + case 'p': + Ret |= IMAGE_SCN_MEM_NOT_PAGED; + break; + case 'r': + Ret |= IMAGE_SCN_MEM_READ; + break; + case 's': + Ret |= IMAGE_SCN_MEM_SHARED; + break; + case 'w': + Ret |= IMAGE_SCN_MEM_WRITE; + break; + default: + fatal("/section: invalid argument: " + S); + } + } + return Ret; +} + +// Parses /section option argument. +void parseSection(StringRef S) { + StringRef Name, Attrs; + std::tie(Name, Attrs) = S.split(','); + if (Name.empty() || Attrs.empty()) + fatal("/section: invalid argument: " + S); + Config->Section[Name] = parseSectionAttributes(Attrs); +} + +// Parses a string in the form of "EMBED[,=]|NO". +// Results are directly written to Config. +void parseManifest(StringRef Arg) { + if (Arg.equals_lower("no")) { + Config->Manifest = Configuration::No; + return; + } + if (!Arg.startswith_lower("embed")) + fatal("invalid option " + Arg); + Config->Manifest = Configuration::Embed; + Arg = Arg.substr(strlen("embed")); + if (Arg.empty()) + return; + if (!Arg.startswith_lower(",id=")) + fatal("invalid option " + Arg); + Arg = Arg.substr(strlen(",id=")); + if (Arg.getAsInteger(0, Config->ManifestID)) + fatal("invalid option " + Arg); +} + +// Parses a string in the form of "level=|uiAccess=|NO". +// Results are directly written to Config. +void parseManifestUAC(StringRef Arg) { + if (Arg.equals_lower("no")) { + Config->ManifestUAC = false; + return; + } + for (;;) { + Arg = Arg.ltrim(); + if (Arg.empty()) + return; + if (Arg.startswith_lower("level=")) { + Arg = Arg.substr(strlen("level=")); + std::tie(Config->ManifestLevel, Arg) = Arg.split(" "); + continue; + } + if (Arg.startswith_lower("uiaccess=")) { + Arg = Arg.substr(strlen("uiaccess=")); + std::tie(Config->ManifestUIAccess, Arg) = Arg.split(" "); + continue; + } + fatal("invalid option " + Arg); + } +} + +// Quote each line with "". Existing double-quote is converted +// to two double-quotes. +static void quoteAndPrint(raw_ostream &Out, StringRef S) { + while (!S.empty()) { + StringRef Line; + std::tie(Line, S) = S.split("\n"); + if (Line.empty()) + continue; + Out << '\"'; + for (int I = 0, E = Line.size(); I != E; ++I) { + if (Line[I] == '\"') { + Out << "\"\""; + } else { + Out << Line[I]; + } + } + Out << "\"\n"; + } +} + +// Create the default manifest file as a temporary file. +static std::string createDefaultXml() { + // Create a temporary file. + SmallString<128> Path; + if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path)) + fatal(EC, "cannot create a temporary file"); + + // Open the temporary file for writing. + std::error_code EC; + llvm::raw_fd_ostream OS(Path, EC, sys::fs::F_Text); + if (EC) + fatal(EC, "failed to open " + Path); + + // Emit the XML. Note that we do *not* verify that the XML attributes are + // syntactically correct. This is intentional for link.exe compatibility. + OS << "\n" + << "\n"; + if (Config->ManifestUAC) { + OS << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n" + << " \n"; + if (!Config->ManifestDependency.empty()) { + OS << " \n" + << " \n" + << " ManifestDependency << " />\n" + << " \n" + << " \n"; + } + } + OS << "\n"; + OS.close(); + return StringRef(Path); +} + +static std::string readFile(StringRef Path) { + std::unique_ptr MB = + check(MemoryBuffer::getFile(Path), "could not open " + Path); + std::unique_ptr Buf(std::move(MB)); + return Buf->getBuffer(); +} + +static std::string createManifestXml() { + // Create the default manifest file. + std::string Path1 = createDefaultXml(); + if (Config->ManifestInput.empty()) + return readFile(Path1); + + // If manifest files are supplied by the user using /MANIFESTINPUT + // option, we need to merge them with the default manifest. + SmallString<128> Path2; + if (auto EC = sys::fs::createTemporaryFile("tmp", "manifest", Path2)) + fatal(EC, "cannot create a temporary file"); + FileRemover Remover1(Path1); + FileRemover Remover2(Path2); + + Executor E("mt.exe"); + E.add("/manifest"); + E.add(Path1); + for (StringRef Filename : Config->ManifestInput) { + E.add("/manifest"); + E.add(Filename); + } + E.add("/nologo"); + E.add("/out:" + StringRef(Path2)); + E.run(); + return readFile(Path2); +} + +// Create a resource file containing a manifest XML. +std::unique_ptr createManifestRes() { + // Create a temporary file for the resource script file. + SmallString<128> RCPath; + if (auto EC = sys::fs::createTemporaryFile("tmp", "rc", RCPath)) + fatal(EC, "cannot create a temporary file"); + FileRemover RCRemover(RCPath); + + // Open the temporary file for writing. + std::error_code EC; + llvm::raw_fd_ostream Out(RCPath, EC, sys::fs::F_Text); + if (EC) + fatal(EC, "failed to open " + RCPath); + + // Write resource script to the RC file. + Out << "#define LANG_ENGLISH 9\n" + << "#define SUBLANG_DEFAULT 1\n" + << "#define APP_MANIFEST " << Config->ManifestID << "\n" + << "#define RT_MANIFEST 24\n" + << "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n" + << "APP_MANIFEST RT_MANIFEST {\n"; + quoteAndPrint(Out, createManifestXml()); + Out << "}\n"; + Out.close(); + + // Create output resource file. + SmallString<128> ResPath; + if (auto EC = sys::fs::createTemporaryFile("tmp", "res", ResPath)) + fatal(EC, "cannot create a temporary file"); + + Executor E("rc.exe"); + E.add("/fo"); + E.add(ResPath.str()); + E.add("/nologo"); + E.add(RCPath.str()); + E.run(); + return check(MemoryBuffer::getFile(ResPath), "could not open " + ResPath); +} + +void createSideBySideManifest() { + std::string Path = Config->ManifestFile; + if (Path == "") + Path = Config->OutputFile + ".manifest"; + std::error_code EC; + llvm::raw_fd_ostream Out(Path, EC, llvm::sys::fs::F_Text); + if (EC) + fatal(EC, "failed to create manifest"); + Out << createManifestXml(); +} + +// Parse a string in the form of +// "[=][,@ordinal[,NONAME]][,DATA][,PRIVATE]" +// or "=.". +// Used for parsing /export arguments. +Export parseExport(StringRef Arg) { + Export E; + StringRef Rest; + std::tie(E.Name, Rest) = Arg.split(","); + if (E.Name.empty()) + goto err; + + if (E.Name.find('=') != StringRef::npos) { + StringRef X, Y; + std::tie(X, Y) = E.Name.split("="); + + // If "=.". + if (Y.find(".") != StringRef::npos) { + E.Name = X; + E.ForwardTo = Y; + return E; + } + + E.ExtName = X; + E.Name = Y; + if (E.Name.empty()) + goto err; + } + + // If "=[,@ordinal[,NONAME]][,DATA][,PRIVATE]" + while (!Rest.empty()) { + StringRef Tok; + std::tie(Tok, Rest) = Rest.split(","); + if (Tok.equals_lower("noname")) { + if (E.Ordinal == 0) + goto err; + E.Noname = true; + continue; + } + if (Tok.equals_lower("data")) { + E.Data = true; + continue; + } + if (Tok.equals_lower("private")) { + E.Private = true; + continue; + } + if (Tok.startswith("@")) { + int32_t Ord; + if (Tok.substr(1).getAsInteger(0, Ord)) + goto err; + if (Ord <= 0 || 65535 < Ord) + goto err; + E.Ordinal = Ord; + continue; + } + goto err; + } + return E; + +err: + fatal("invalid /export: " + Arg); +} + +static StringRef undecorate(StringRef Sym) { + if (Config->Machine != I386) + return Sym; + return Sym.startswith("_") ? Sym.substr(1) : Sym; +} + +// Performs error checking on all /export arguments. +// It also sets ordinals. +void fixupExports() { + // Symbol ordinals must be unique. + std::set Ords; + for (Export &E : Config->Exports) { + if (E.Ordinal == 0) + continue; + if (!Ords.insert(E.Ordinal).second) + fatal("duplicate export ordinal: " + E.Name); + } + + for (Export &E : Config->Exports) { + if (!E.ForwardTo.empty()) { + E.SymbolName = E.Name; + } else if (Undefined *U = cast_or_null(E.Sym->WeakAlias)) { + E.SymbolName = U->getName(); + } else { + E.SymbolName = E.Sym->getName(); + } + } + + for (Export &E : Config->Exports) { + if (!E.ForwardTo.empty()) { + E.ExportName = undecorate(E.Name); + } else { + E.ExportName = undecorate(E.ExtName.empty() ? E.Name : E.ExtName); + } + } + + // Uniquefy by name. + std::map Map; + std::vector V; + for (Export &E : Config->Exports) { + auto Pair = Map.insert(std::make_pair(E.ExportName, &E)); + bool Inserted = Pair.second; + if (Inserted) { + V.push_back(E); + continue; + } + Export *Existing = Pair.first->second; + if (E == *Existing || E.Name != Existing->Name) + continue; + llvm::errs() << "warning: duplicate /export option: " << E.Name << "\n"; + } + Config->Exports = std::move(V); + + // Sort by name. + std::sort(Config->Exports.begin(), Config->Exports.end(), + [](const Export &A, const Export &B) { + return A.ExportName < B.ExportName; + }); +} + +void assignExportOrdinals() { + // Assign unique ordinals if default (= 0). + uint16_t Max = 0; + for (Export &E : Config->Exports) + Max = std::max(Max, E.Ordinal); + for (Export &E : Config->Exports) + if (E.Ordinal == 0) + E.Ordinal = ++Max; +} + +// Parses a string in the form of "key=value" and check +// if value matches previous values for the same key. +void checkFailIfMismatch(StringRef Arg) { + StringRef K, V; + std::tie(K, V) = Arg.split('='); + if (K.empty() || V.empty()) + fatal("/failifmismatch: invalid argument: " + Arg); + StringRef Existing = Config->MustMatch[K]; + if (!Existing.empty() && V != Existing) + fatal("/failifmismatch: mismatch detected: " + Existing + " and " + V + + " for key " + K); + Config->MustMatch[K] = V; +} + +// Convert Windows resource files (.res files) to a .obj file +// using cvtres.exe. +std::unique_ptr +convertResToCOFF(const std::vector &MBs) { + // Create an output file path. + SmallString<128> Path; + if (auto EC = llvm::sys::fs::createTemporaryFile("resource", "obj", Path)) + fatal(EC, "could not create temporary file"); + + // Execute cvtres.exe. + Executor E("cvtres.exe"); + E.add("/machine:" + machineToStr(Config->Machine)); + E.add("/readonly"); + E.add("/nologo"); + E.add("/out:" + Path); + for (MemoryBufferRef MB : MBs) + E.add(MB.getBufferIdentifier()); + E.run(); + return check(MemoryBuffer::getFile(Path), "could not open " + Path); +} + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + { \ + X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \ + OPT_##GROUP, OPT_##ALIAS, X6 \ + }, +#include "Options.inc" +#undef OPTION +}; + +class COFFOptTable : public llvm::opt::OptTable { +public: + COFFOptTable() : OptTable(infoTable, true) {} +}; + +// Parses a given list of options. +llvm::opt::InputArgList ArgParser::parse(ArrayRef ArgsArr) { + // First, replace respnose files (@-style options). + std::vector Argv = replaceResponseFiles(ArgsArr); + + // Make InputArgList from string vectors. + COFFOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + llvm::opt::InputArgList Args = + Table.ParseArgs(Argv, MissingIndex, MissingCount); + + // Print the real command line if response files are expanded. + if (Args.hasArg(OPT_verbose) && ArgsArr.size() != Argv.size()) { + llvm::outs() << "Command line:"; + for (const char *S : Argv) + llvm::outs() << " " << S; + llvm::outs() << "\n"; + } + + if (MissingCount) + fatal("missing arg value for \"" + Twine(Args.getArgString(MissingIndex)) + + "\", expected " + Twine(MissingCount) + + (MissingCount == 1 ? " argument." : " arguments.")); + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + return Args; +} + +llvm::opt::InputArgList ArgParser::parseLINK(ArrayRef Args) { + // Concatenate LINK env and given arguments and parse them. + Optional Env = Process::GetEnv("LINK"); + if (!Env) + return parse(Args); + std::vector V = tokenize(*Env); + V.insert(V.end(), Args.begin(), Args.end()); + return parse(V); +} + +std::vector ArgParser::tokenize(StringRef S) { + SmallVector Tokens; + StringSaver Saver(AllocAux); + llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + return std::vector(Tokens.begin(), Tokens.end()); +} + +// Creates a new command line by replacing options starting with '@' +// character. '@' is replaced by the file's contents. +std::vector +ArgParser::replaceResponseFiles(std::vector Argv) { + SmallVector Tokens(Argv.data(), Argv.data() + Argv.size()); + StringSaver Saver(AllocAux); + ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens); + return std::vector(Tokens.begin(), Tokens.end()); +} + +void printHelp(const char *Argv0) { + COFFOptTable Table; + Table.PrintHelp(llvm::outs(), Argv0, "LLVM Linker", false); +} + +} // namespace coff +} // namespace lld diff --git a/COFF/Error.cpp b/COFF/Error.cpp new file mode 100644 index 00000000..602a8544 --- /dev/null +++ b/COFF/Error.cpp @@ -0,0 +1,33 @@ +//===- Error.cpp ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace coff { + +void fatal(const Twine &Msg) { + llvm::errs() << Msg << "\n"; + exit(1); +} + +void fatal(std::error_code EC, const Twine &Msg) { + fatal(Msg + ": " + EC.message()); +} + +void fatal(llvm::Error &Err, const Twine &Msg) { + fatal(errorToErrorCode(std::move(Err)), Msg); +} + +} // namespace coff +} // namespace lld diff --git a/COFF/Error.h b/COFF/Error.h new file mode 100644 index 00000000..c9f64c66 --- /dev/null +++ b/COFF/Error.h @@ -0,0 +1,38 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_ERROR_H +#define LLD_COFF_ERROR_H + +#include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" + +namespace lld { +namespace coff { + +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(llvm::Error &Err, const Twine &Prefix); + +template T check(ErrorOr &&V, const Twine &Prefix) { + if (auto EC = V.getError()) + fatal(EC, Prefix); + return std::move(*V); +} + +template T check(Expected E, const Twine &Prefix) { + if (llvm::Error Err = E.takeError()) + fatal(Err, Prefix); + return std::move(*E); +} + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp new file mode 100644 index 00000000..a2c5a903 --- /dev/null +++ b/COFF/ICF.cpp @@ -0,0 +1,244 @@ +//===- ICF.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Identical COMDAT Folding is a feature to merge COMDAT sections not by +// name (which is regular COMDAT handling) but by contents. If two COMDAT +// sections have the same data, relocations, attributes, etc., then the two +// are considered identical and merged by the linker. This optimization +// makes outputs smaller. +// +// ICF is theoretically a problem of reducing graphs by merging as many +// identical subgraphs as possible, if we consider sections as vertices and +// relocations as edges. This may be a bit more complicated problem than you +// might think. The order of processing sections matters since merging two +// sections can make other sections, whose relocations now point to the same +// section, mergeable. Graphs may contain cycles, which is common in COFF. +// We need a sophisticated algorithm to do this properly and efficiently. +// +// What we do in this file is this. We split sections into groups. Sections +// in the same group are considered identical. +// +// First, all sections are grouped by their "constant" values. Constant +// values are values that are never changed by ICF, such as section contents, +// section name, number of relocations, type and offset of each relocation, +// etc. Because we do not care about some relocation targets in this step, +// two sections in the same group may not be identical, but at least two +// sections in different groups can never be identical. +// +// Then, we try to split each group by relocation targets. Relocations are +// considered identical if and only if the relocation targets are in the +// same group. Splitting a group may make more groups to be splittable, +// because two relocations that were previously considered identical might +// now point to different groups. We repeat this step until the convergence +// is obtained. +// +// This algorithm is so-called "optimistic" algorithm described in +// http://research.google.com/pubs/pub36912.html. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Symbols.h" +#include "lld/Core/Parallel.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace llvm; + +namespace lld { +namespace coff { + +typedef std::vector::iterator ChunkIterator; +typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *); + +class ICF { +public: + void run(const std::vector &V); + +private: + static uint64_t getHash(SectionChunk *C); + static bool equalsConstant(const SectionChunk *A, const SectionChunk *B); + static bool equalsVariable(const SectionChunk *A, const SectionChunk *B); + bool forEachGroup(std::vector &Chunks, Comparator Eq); + bool segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq); + + std::atomic NextID = { 1 }; +}; + +// Entry point to ICF. +void doICF(const std::vector &Chunks) { + ICF().run(Chunks); +} + +uint64_t ICF::getHash(SectionChunk *C) { + return hash_combine(C->getPermissions(), + hash_value(C->SectionName), + C->NumRelocs, + C->getAlign(), + uint32_t(C->Header->SizeOfRawData), + C->Checksum); +} + +bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { + if (A->AssocChildren.size() != B->AssocChildren.size() || + A->NumRelocs != B->NumRelocs) { + return false; + } + + // Compare associative sections. + for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) + if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) + return false; + + // Compare relocations. + auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { + if (R1.Type != R2.Type || + R1.VirtualAddress != R2.VirtualAddress) { + return false; + } + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + if (B1 == B2) + return true; + if (auto *D1 = dyn_cast(B1)) + if (auto *D2 = dyn_cast(B2)) + return D1->getValue() == D2->getValue() && + D1->getChunk()->GroupID == D2->getChunk()->GroupID; + return false; + }; + if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq)) + return false; + + // Compare section attributes and contents. + return A->getPermissions() == B->getPermissions() && + A->SectionName == B->SectionName && + A->getAlign() == B->getAlign() && + A->Header->SizeOfRawData == B->Header->SizeOfRawData && + A->Checksum == B->Checksum && + A->getContents() == B->getContents(); +} + +bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { + // Compare associative sections. + for (size_t I = 0, E = A->AssocChildren.size(); I != E; ++I) + if (A->AssocChildren[I]->GroupID != B->AssocChildren[I]->GroupID) + return false; + + // Compare relocations. + auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) { + SymbolBody *B1 = A->File->getSymbolBody(R1.SymbolTableIndex)->repl(); + SymbolBody *B2 = B->File->getSymbolBody(R2.SymbolTableIndex)->repl(); + if (B1 == B2) + return true; + if (auto *D1 = dyn_cast(B1)) + if (auto *D2 = dyn_cast(B2)) + return D1->getChunk()->GroupID == D2->getChunk()->GroupID; + return false; + }; + return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); +} + +bool ICF::segregate(ChunkIterator Begin, ChunkIterator End, Comparator Eq) { + bool R = false; + for (auto It = Begin;;) { + SectionChunk *Head = *It; + auto Bound = std::partition(It + 1, End, [&](SectionChunk *SC) { + return Eq(Head, SC); + }); + if (Bound == End) + return R; + uint64_t ID = NextID++; + std::for_each(It, Bound, [&](SectionChunk *SC) { SC->GroupID = ID; }); + It = Bound; + R = true; + } +} + +bool ICF::forEachGroup(std::vector &Chunks, Comparator Eq) { + bool R = false; + for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { + SectionChunk *Head = *It; + auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) { + return SC->GroupID != Head->GroupID; + }); + if (segregate(It, Bound, Eq)) + R = true; + It = Bound; + } + return R; +} + +// Merge identical COMDAT sections. +// Two sections are considered the same if their section headers, +// contents and relocations are all the same. +void ICF::run(const std::vector &Vec) { + // Collect only mergeable sections and group by hash value. + parallel_for_each(Vec.begin(), Vec.end(), [&](Chunk *C) { + if (auto *SC = dyn_cast(C)) { + bool Global = SC->Sym && SC->Sym->isExternal(); + bool Writable = SC->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; + if (SC->isCOMDAT() && SC->isLive() && Global && !Writable) + SC->GroupID = getHash(SC) | (uint64_t(1) << 63); + } + }); + std::vector Chunks; + for (Chunk *C : Vec) { + if (auto *SC = dyn_cast(C)) { + if (SC->GroupID) { + Chunks.push_back(SC); + } else { + SC->GroupID = NextID++; + } + } + } + + // From now on, sections in Chunks are ordered so that sections in + // the same group are consecutive in the vector. + std::sort(Chunks.begin(), Chunks.end(), + [](SectionChunk *A, SectionChunk *B) { + return A->GroupID < B->GroupID; + }); + + // Split groups until we get a convergence. + int Cnt = 1; + forEachGroup(Chunks, equalsConstant); + + for (;;) { + if (!forEachGroup(Chunks, equalsVariable)) + break; + ++Cnt; + } + if (Config->Verbose) + llvm::outs() << "\nICF needed " << Cnt << " iterations.\n"; + + // Merge sections in the same group. + for (auto It = Chunks.begin(), End = Chunks.end(); It != End;) { + SectionChunk *Head = *It++; + auto Bound = std::find_if(It, End, [&](SectionChunk *SC) { + return Head->GroupID != SC->GroupID; + }); + if (It == Bound) + continue; + if (Config->Verbose) + llvm::outs() << "Selected " << Head->getDebugName() << "\n"; + while (It != Bound) { + SectionChunk *SC = *It++; + if (Config->Verbose) + llvm::outs() << " Removed " << SC->getDebugName() << "\n"; + Head->replace(SC); + } + } +} + +} // namespace coff +} // namespace lld diff --git a/COFF/InputFiles.cpp b/COFF/InputFiles.cpp new file mode 100644 index 00000000..ff268263 --- /dev/null +++ b/COFF/InputFiles.cpp @@ -0,0 +1,373 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Config.h" +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm-c/lto.h" +#include +#include +#include + +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm::support::endian; + +using llvm::Triple; +using llvm::support::ulittle32_t; + +namespace lld { +namespace coff { + +int InputFile::NextIndex = 0; +llvm::LLVMContext BitcodeFile::Context; + +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef Path) { + size_t Pos = Path.find_last_of("\\/"); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string InputFile::getShortName() { + if (ParentName == "") + return getName().lower(); + std::string Res = (getBasename(ParentName) + "(" + + getBasename(getName()) + ")").str(); + return StringRef(Res).lower(); +} + +void ArchiveFile::parse() { + // Parse a MemoryBufferRef as an archive file. + File = check(Archive::create(MB), "failed to parse static library"); + + // Allocate a buffer for Lazy objects. + size_t NumSyms = File->getNumberOfSymbols(); + LazySymbols.reserve(NumSyms); + + // Read the symbol table to construct Lazy objects. + for (const Archive::Symbol &Sym : File->symbols()) + LazySymbols.emplace_back(this, Sym); + + // Seen is a map from member files to boolean values. Initially + // all members are mapped to false, which indicates all these files + // are not read yet. + Error Err; + for (auto &Child : File->children(Err)) + Seen[Child.getChildOffset()].clear(); + if (Err) + fatal(Err, "failed to parse static library"); +} + +// Returns a buffer pointing to a member file containing a given symbol. +// This function is thread-safe. +MemoryBufferRef ArchiveFile::getMember(const Archive::Symbol *Sym) { + const Archive::Child &C = + check(Sym->getMember(), + "could not get the member for symbol " + Sym->getName()); + + // Return an empty buffer if we have already returned the same buffer. + if (Seen[C.getChildOffset()].test_and_set()) + return MemoryBufferRef(); + return check(C.getMemoryBufferRef(), + "could not get the buffer for the member defining symbol " + + Sym->getName()); +} + +void ObjectFile::parse() { + // Parse a memory buffer as a COFF file. + std::unique_ptr Bin = + check(createBinary(MB), "failed to parse object file"); + + if (auto *Obj = dyn_cast(Bin.get())) { + Bin.release(); + COFFObj.reset(Obj); + } else { + fatal(getName() + " is not a COFF file"); + } + + // Read section and symbol tables. + initializeChunks(); + initializeSymbols(); + initializeSEH(); +} + +void ObjectFile::initializeChunks() { + uint32_t NumSections = COFFObj->getNumberOfSections(); + Chunks.reserve(NumSections); + SparseChunks.resize(NumSections + 1); + for (uint32_t I = 1; I < NumSections + 1; ++I) { + const coff_section *Sec; + StringRef Name; + if (auto EC = COFFObj->getSection(I, Sec)) + fatal(EC, "getSection failed: #" + Twine(I)); + if (auto EC = COFFObj->getSectionName(Sec, Name)) + fatal(EC, "getSectionName failed: #" + Twine(I)); + if (Name == ".sxdata") { + SXData = Sec; + continue; + } + if (Name == ".drectve") { + ArrayRef Data; + COFFObj->getSectionContents(Sec, Data); + Directives = std::string((const char *)Data.data(), Data.size()); + continue; + } + // Skip non-DWARF debug info. MSVC linker converts the sections into + // a PDB file, but we don't support that. + if (Name == ".debug" || Name.startswith(".debug$")) + continue; + // We want to preserve DWARF debug sections only when /debug is on. + if (!Config->Debug && Name.startswith(".debug")) + continue; + if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) + continue; + auto *C = new (Alloc) SectionChunk(this, Sec); + Chunks.push_back(C); + SparseChunks[I] = C; + } +} + +void ObjectFile::initializeSymbols() { + uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); + SymbolBodies.reserve(NumSymbols); + SparseSymbolBodies.resize(NumSymbols); + llvm::SmallVector, 8> WeakAliases; + int32_t LastSectionNumber = 0; + for (uint32_t I = 0; I < NumSymbols; ++I) { + // Get a COFFSymbolRef object. + COFFSymbolRef Sym = + check(COFFObj->getSymbol(I), "broken object file: " + getName()); + + const void *AuxP = nullptr; + if (Sym.getNumberOfAuxSymbols()) + AuxP = COFFObj->getSymbol(I + 1)->getRawPtr(); + bool IsFirst = (LastSectionNumber != Sym.getSectionNumber()); + + SymbolBody *Body = nullptr; + if (Sym.isUndefined()) { + Body = createUndefined(Sym); + } else if (Sym.isWeakExternal()) { + Body = createUndefined(Sym); + uint32_t TagIndex = + static_cast(AuxP)->TagIndex; + WeakAliases.emplace_back((Undefined *)Body, TagIndex); + } else { + Body = createDefined(Sym, AuxP, IsFirst); + } + if (Body) { + SymbolBodies.push_back(Body); + SparseSymbolBodies[I] = Body; + } + I += Sym.getNumberOfAuxSymbols(); + LastSectionNumber = Sym.getSectionNumber(); + } + for (auto WeakAlias : WeakAliases) + WeakAlias.first->WeakAlias = SparseSymbolBodies[WeakAlias.second]; +} + +Undefined *ObjectFile::createUndefined(COFFSymbolRef Sym) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + return new (Alloc) Undefined(Name); +} + +Defined *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP, + bool IsFirst) { + StringRef Name; + if (Sym.isCommon()) { + auto *C = new (Alloc) CommonChunk(Sym); + Chunks.push_back(C); + return new (Alloc) DefinedCommon(this, Sym, C); + } + if (Sym.isAbsolute()) { + COFFObj->getSymbolName(Sym, Name); + // Skip special symbols. + if (Name == "@comp.id") + return nullptr; + // COFF spec 5.10.1. The .sxdata section. + if (Name == "@feat.00") { + if (Sym.getValue() & 1) + SEHCompat = true; + return nullptr; + } + return new (Alloc) DefinedAbsolute(Name, Sym); + } + int32_t SectionNumber = Sym.getSectionNumber(); + if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) + return nullptr; + + // Reserved sections numbers don't have contents. + if (llvm::COFF::isReservedSectionNumber(SectionNumber)) + fatal("broken object file: " + getName()); + + // This symbol references a section which is not present in the section + // header. + if ((uint32_t)SectionNumber >= SparseChunks.size()) + fatal("broken object file: " + getName()); + + // Nothing else to do without a section chunk. + auto *SC = cast_or_null(SparseChunks[SectionNumber]); + if (!SC) + return nullptr; + + // Handle section definitions + if (IsFirst && AuxP) { + auto *Aux = reinterpret_cast(AuxP); + if (Aux->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) + if (auto *ParentSC = cast_or_null( + SparseChunks[Aux->getNumber(Sym.isBigObj())])) + ParentSC->addAssociative(SC); + SC->Checksum = Aux->CheckSum; + } + + auto *B = new (Alloc) DefinedRegular(this, Sym, SC); + if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP) + SC->setSymbol(B); + + return B; +} + +void ObjectFile::initializeSEH() { + if (!SEHCompat || !SXData) + return; + ArrayRef A; + COFFObj->getSectionContents(SXData, A); + if (A.size() % 4 != 0) + fatal(".sxdata must be an array of symbol table indices"); + auto *I = reinterpret_cast(A.data()); + auto *E = reinterpret_cast(A.data() + A.size()); + for (; I != E; ++I) + SEHandlers.insert(SparseSymbolBodies[*I]); +} + +MachineTypes ObjectFile::getMachineType() { + if (COFFObj) + return static_cast(COFFObj->getMachine()); + return IMAGE_FILE_MACHINE_UNKNOWN; +} + +StringRef ltrim1(StringRef S, const char *Chars) { + if (!S.empty() && strchr(Chars, S[0])) + return S.substr(1); + return S; +} + +void ImportFile::parse() { + const char *Buf = MB.getBufferStart(); + const char *End = MB.getBufferEnd(); + const auto *Hdr = reinterpret_cast(Buf); + + // Check if the total size is valid. + if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) + fatal("broken import library"); + + // Read names and create an __imp_ symbol. + StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(*Hdr))); + StringRef ImpName = StringAlloc.save("__imp_" + Name); + const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; + DLLName = StringRef(NameStart); + StringRef ExtName; + switch (Hdr->getNameType()) { + case IMPORT_ORDINAL: + ExtName = ""; + break; + case IMPORT_NAME: + ExtName = Name; + break; + case IMPORT_NAME_NOPREFIX: + ExtName = ltrim1(Name, "?@_"); + break; + case IMPORT_NAME_UNDECORATE: + ExtName = ltrim1(Name, "?@_"); + ExtName = ExtName.substr(0, ExtName.find('@')); + break; + } + ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, ExtName, Hdr); + SymbolBodies.push_back(ImpSym); + + // If type is function, we need to create a thunk which jump to an + // address pointed by the __imp_ symbol. (This allows you to call + // DLL functions just like regular non-DLL functions.) + if (Hdr->getType() != llvm::COFF::IMPORT_CODE) + return; + ThunkSym = new (Alloc) DefinedImportThunk(Name, ImpSym, Hdr->Machine); + SymbolBodies.push_back(ThunkSym); +} + +void BitcodeFile::parse() { + // Usually parse() is thread-safe, but bitcode file is an exception. + std::lock_guard Lock(Mu); + + Context.enableDebugTypeODRUniquing(); + ErrorOr> ModOrErr = LTOModule::createFromBuffer( + Context, MB.getBufferStart(), MB.getBufferSize(), llvm::TargetOptions()); + M = check(std::move(ModOrErr), "could not create LTO module"); + + llvm::StringSaver Saver(Alloc); + for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { + lto_symbol_attributes Attrs = M->getSymbolAttributes(I); + if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) + continue; + + StringRef SymName = Saver.save(M->getSymbolName(I)); + int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; + if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { + SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + } else { + bool Replaceable = + (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || // common + (Attrs & LTO_SYMBOL_COMDAT) || // comdat + (SymbolDef == LTO_SYMBOL_DEFINITION_WEAK && // weak external + (Attrs & LTO_SYMBOL_ALIAS))); + SymbolBodies.push_back(new (Alloc) DefinedBitcode(this, SymName, + Replaceable)); + } + } + + Directives = M->getLinkerOpts(); +} + +MachineTypes BitcodeFile::getMachineType() { + if (!M) + return IMAGE_FILE_MACHINE_UNKNOWN; + switch (Triple(M->getTargetTriple()).getArch()) { + case Triple::x86_64: + return AMD64; + case Triple::x86: + return I386; + case Triple::arm: + return ARMNT; + default: + return IMAGE_FILE_MACHINE_UNKNOWN; + } +} + +std::mutex BitcodeFile::Mu; + +} // namespace coff +} // namespace lld diff --git a/COFF/InputFiles.h b/COFF/InputFiles.h new file mode 100644 index 00000000..0ec01b50 --- /dev/null +++ b/COFF/InputFiles.h @@ -0,0 +1,223 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_INPUT_FILES_H +#define LLD_COFF_INPUT_FILES_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOModule.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/StringSaver.h" +#include +#include +#include +#include + +namespace lld { +namespace coff { + +using llvm::LTOModule; +using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; +using llvm::COFF::MachineTypes; +using llvm::object::Archive; +using llvm::object::COFFObjectFile; +using llvm::object::COFFSymbolRef; +using llvm::object::coff_section; + +class Chunk; +class Defined; +class DefinedImportData; +class DefinedImportThunk; +class Lazy; +class SymbolBody; +class Undefined; + +// The root class of input files. +class InputFile { +public: + enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; + Kind kind() const { return FileKind; } + virtual ~InputFile() {} + + // Returns the filename. + StringRef getName() { return MB.getBufferIdentifier(); } + + // Returns symbols defined by this file. + virtual std::vector &getSymbols() = 0; + + // Reads a file (the constructor doesn't do that). + virtual void parse() = 0; + + // Returns the CPU type this file was compiled to. + virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; } + + // Returns a short, human-friendly filename. If this is a member of + // an archive file, a returned value includes parent's filename. + // Used for logging or debugging. + std::string getShortName(); + + // Sets a parent filename if this file is created from an archive. + void setParentName(StringRef N) { ParentName = N; } + + // Returns .drectve section contents if exist. + StringRef getDirectives() { return StringRef(Directives).trim(); } + + // Each file has a unique index. The index number is used to + // resolve ties in symbol resolution. + int Index; + static int NextIndex; + +protected: + InputFile(Kind K, MemoryBufferRef M) + : Index(NextIndex++), MB(M), FileKind(K) {} + + MemoryBufferRef MB; + std::string Directives; + +private: + const Kind FileKind; + StringRef ParentName; +}; + +// .lib or .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + void parse() override; + + // Returns a memory buffer for a given symbol. An empty memory buffer + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + MemoryBufferRef getMember(const Archive::Symbol *Sym); + + llvm::MutableArrayRef getLazySymbols() { return LazySymbols; } + + // All symbols returned by ArchiveFiles are of Lazy type. + std::vector &getSymbols() override { + llvm_unreachable("internal fatal"); + } + +private: + std::unique_ptr File; + std::string Filename; + std::vector LazySymbols; + std::map Seen; +}; + +// .obj or .o file. This may be a member of an archive file. +class ObjectFile : public InputFile { +public: + explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } + void parse() override; + MachineTypes getMachineType() override; + std::vector &getChunks() { return Chunks; } + std::vector &getSymbols() override { return SymbolBodies; } + + // Returns a SymbolBody object for the SymbolIndex'th symbol in the + // underlying object file. + SymbolBody *getSymbolBody(uint32_t SymbolIndex) { + return SparseSymbolBodies[SymbolIndex]; + } + + // Returns the underying COFF file. + COFFObjectFile *getCOFFObj() { return COFFObj.get(); } + + // True if this object file is compatible with SEH. + // COFF-specific and x86-only. + bool SEHCompat = false; + + // The list of safe exception handlers listed in .sxdata section. + // COFF-specific and x86-only. + std::set SEHandlers; + +private: + void initializeChunks(); + void initializeSymbols(); + void initializeSEH(); + + Defined *createDefined(COFFSymbolRef Sym, const void *Aux, bool IsFirst); + Undefined *createUndefined(COFFSymbolRef Sym); + + std::unique_ptr COFFObj; + llvm::BumpPtrAllocator Alloc; + const coff_section *SXData = nullptr; + + // List of all chunks defined by this file. This includes both section + // chunks and non-section chunks for common symbols. + std::vector Chunks; + + // This vector contains the same chunks as Chunks, but they are + // indexed such that you can get a SectionChunk by section index. + // Nonexistent section indices are filled with null pointers. + // (Because section number is 1-based, the first slot is always a + // null pointer.) + std::vector SparseChunks; + + // List of all symbols referenced or defined by this file. + std::vector SymbolBodies; + + // This vector contains the same symbols as SymbolBodies, but they + // are indexed such that you can get a SymbolBody by symbol + // index. Nonexistent indices (which are occupied by auxiliary + // symbols in the real symbol table) are filled with null pointers. + std::vector SparseSymbolBodies; +}; + +// This type represents import library members that contain DLL names +// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 +// for details about the format. +class ImportFile : public InputFile { +public: + explicit ImportFile(MemoryBufferRef M) + : InputFile(ImportKind, M), StringAlloc(StringAllocAux) {} + static bool classof(const InputFile *F) { return F->kind() == ImportKind; } + std::vector &getSymbols() override { return SymbolBodies; } + + DefinedImportData *ImpSym = nullptr; + DefinedImportThunk *ThunkSym = nullptr; + std::string DLLName; + +private: + void parse() override; + + std::vector SymbolBodies; + llvm::BumpPtrAllocator Alloc; + llvm::BumpPtrAllocator StringAllocAux; + llvm::StringSaver StringAlloc; +}; + +// Used for LTO. +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + std::vector &getSymbols() override { return SymbolBodies; } + MachineTypes getMachineType() override; + std::unique_ptr takeModule() { return std::move(M); } + + static llvm::LLVMContext Context; + +private: + void parse() override; + + std::vector SymbolBodies; + llvm::BumpPtrAllocator Alloc; + std::unique_ptr M; + static std::mutex Mu; +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/Librarian.cpp b/COFF/Librarian.cpp new file mode 100644 index 00000000..25fb4a87 --- /dev/null +++ b/COFF/Librarian.cpp @@ -0,0 +1,489 @@ +//===- Librarian.cpp ------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains functions for the Librarian. The librarian creates and +// manages libraries of the Common Object File Format (COFF) object files. It +// primarily is used for creating static libraries and import libraries. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "Symbols.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Path.h" + +#include + +using namespace lld::coff; +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +static bool is32bit() { + switch (Config->Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation() { + switch (Config->Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template void append(std::vector &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector &B, + ArrayRef Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + std::vector::size_type Pos = B.size(); + std::vector::size_type Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + memcpy(&B[Offset], &Length, sizeof(Length)); +} + +static std::string getImplibPath() { + if (!Config->Implib.empty()) + return Config->Implib; + SmallString<128> Out = StringRef(Config->OutputFile); + sys::path::replace_extension(Out, ".lib"); + return Out.str(); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Config->Machine == I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static std::string replace(StringRef S, StringRef From, StringRef To) { + size_t Pos = S.find(From); + assert(Pos != StringRef::npos); + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + + BumpPtrAllocator Alloc; + StringRef DLLName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S) + : DLLName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportNameType NameType, bool isData); +}; +} + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 7; + static const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (DLLName.size() + 1)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(DLLName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + static const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation())}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation())}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation())}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + DLLName.size() + 1); + memcpy(&Buffer[S], DLLName.data(), DLLName.size()); + Buffer[S + DLLName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + reinterpret_cast(SymbolTable[5].Name).Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + reinterpret_cast(SymbolTable[6].Name).Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { + static const uint32_t NumberOfSections = 1; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Config->Machine), u16(NumberOfSections), u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + sizeof(export_address_table_entry) + + // .idata$4 + sizeof(export_address_table_entry)), + u32(NumberOfSymbols), u16(0), + u16(is32bit() ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(sizeof(export_address_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(sizeof(export_address_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(export_address_table_entry)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5 + static const export_address_table_entry ILT{u32(0)}; + append(Buffer, ILT); + + // .idata$4 + static const export_address_table_entry IAT{u32(0)}; + append(Buffer, IAT); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + reinterpret_cast(SymbolTable[0].Name).Offset = + sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, DLLName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportNameType NameType, + bool isData) { + size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Config->Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (isData ? IMPORT_DATA : IMPORT_CODE); + Imp->TypeInfo |= NameType << 2; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, DLLName.data(), DLLName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), DLLName)}; +} + +// Creates an import library for a DLL. In this function, we first +// create an empty import library using lib.exe and then adds short +// import files to that file. +void lld::coff::writeImportLibrary() { + std::vector Members; + + std::string Path = getImplibPath(); + std::string DLLName = llvm::sys::path::filename(Config->OutputFile); + ObjectFactory OF(DLLName); + + std::vector ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (Export &E : Config->Exports) { + if (E.Private) + continue; + + ImportNameType Type = getNameType(E.SymbolName, E.Name); + std::string Name = E.ExtName.empty() + ? std::string(E.SymbolName) + : replace(E.SymbolName, E.Name, E.ExtName); + Members.push_back(OF.createShortImport(Name, E.Ordinal, Type, E.Data)); + } + + std::pair Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + if (auto EC = Result.second) + fatal(EC, "failed to write " + Path); +} diff --git a/COFF/MarkLive.cpp b/COFF/MarkLive.cpp new file mode 100644 index 00000000..0870986a --- /dev/null +++ b/COFF/MarkLive.cpp @@ -0,0 +1,61 @@ +//===- MarkLive.cpp -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" +#include + +namespace lld { +namespace coff { + +// Set live bit on for each reachable chunk. Unmarked (unreachable) +// COMDAT chunks will be ignored by Writer, so they will be excluded +// from the final output. +void markLive(const std::vector &Chunks) { + // We build up a worklist of sections which have been marked as live. We only + // push into the worklist when we discover an unmarked section, and we mark + // as we push, so sections never appear twice in the list. + SmallVector Worklist; + + // COMDAT section chunks are dead by default. Add non-COMDAT chunks. + for (Chunk *C : Chunks) + if (auto *SC = dyn_cast(C)) + if (SC->isLive()) + Worklist.push_back(SC); + + auto Enqueue = [&](SectionChunk *C) { + if (C->isLive()) + return; + C->markLive(); + Worklist.push_back(C); + }; + + // Add GC root chunks. + for (Undefined *U : Config->GCRoot) + if (auto *D = dyn_cast(U->repl())) + Enqueue(D->getChunk()); + + while (!Worklist.empty()) { + SectionChunk *SC = Worklist.pop_back_val(); + assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); + + // Mark all symbols listed in the relocation table for this section. + for (SymbolBody *S : SC->symbols()) + if (auto *D = dyn_cast(S->repl())) + Enqueue(D->getChunk()); + + // Mark associative sections if any. + for (SectionChunk *C : SC->children()) + Enqueue(C); + } +} + +} +} diff --git a/COFF/ModuleDef.cpp b/COFF/ModuleDef.cpp new file mode 100644 index 00000000..5e393f45 --- /dev/null +++ b/COFF/ModuleDef.cpp @@ -0,0 +1,291 @@ +//===- COFF/ModuleDef.cpp -------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// Parsed results are directly written to Config global variable. +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Error.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace lld { +namespace coff { +namespace { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + KwBase, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym) { + return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +} + +class Lexer { +public: + explicit Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch(Word) + .Case("BASE", KwBase) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S, StringSaver *A) : Lex(S), Alloc(A) {} + + void parse() { + do { + parseOne(); + } while (Tok.K != Eof); + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + void readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + fatal("integer expected"); + } + + void expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + fatal(Msg); + } + + void unget() { Stack.push_back(Tok); } + + void parseOne() { + read(); + switch (Tok.K) { + case Eof: + return; + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return; + } + parseExport(); + } + case KwHeapsize: + parseNumbers(&Config->HeapReserve, &Config->HeapCommit); + return; + case KwLibrary: + parseName(&Config->OutputFile, &Config->ImageBase); + if (!StringRef(Config->OutputFile).endswith_lower(".dll")) + Config->OutputFile += ".dll"; + return; + case KwStacksize: + parseNumbers(&Config->StackReserve, &Config->StackCommit); + return; + case KwName: + parseName(&Config->OutputFile, &Config->ImageBase); + return; + case KwVersion: + parseVersion(&Config->MajorImageVersion, &Config->MinorImageVersion); + return; + default: + fatal("unknown directive: " + Tok.Value); + } + } + + void parseExport() { + Export E; + E.Name = Tok.Value; + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + fatal("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = Tok.Value; + } else { + unget(); + } + + if (Config->Machine == I386) { + if (!isDecorated(E.Name)) + E.Name = Alloc->save("_" + E.Name); + if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + E.ExtName = Alloc->save("_" + E.ExtName); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + Tok.Value.drop_front().getAsInteger(10, E.Ordinal); + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + unget(); + Config->Exports.push_back(E); + return; + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + void parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + readAsInt(Reserve); + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return; + } + readAsInt(Commit); + } + + // NAME outputPath [BASE=address] + void parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = Tok.Value; + } else { + *Out = ""; + unget(); + return; + } + read(); + if (Tok.K == KwBase) { + expect(Equal, "'=' expected"); + readAsInt(Baseaddr); + } else { + unget(); + *Baseaddr = 0; + } + } + + // VERSION major[.minor] + void parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + fatal("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + fatal("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + fatal("integer expected, but got " + Tok.Value); + } + + Lexer Lex; + Token Tok; + std::vector Stack; + StringSaver *Alloc; +}; + +} // anonymous namespace + +void parseModuleDefs(MemoryBufferRef MB, StringSaver *Alloc) { + Parser(MB.getBuffer(), Alloc).parse(); +} + +} // namespace coff +} // namespace lld diff --git a/COFF/Options.td b/COFF/Options.td new file mode 100644 index 00000000..e5c9c5b4 --- /dev/null +++ b/COFF/Options.td @@ -0,0 +1,127 @@ +include "llvm/Option/OptParser.td" + +// link.exe accepts options starting with either a dash or a slash. + +// Flag that takes no arguments. +class F : Flag<["/", "-", "-?"], name>; + +// Flag that takes one argument after ":". +class P : + Joined<["/", "-", "-?"], name#":">, HelpText; + +// Boolean flag suffixed by ":no". +multiclass B { + def "" : F; + def _no : F, HelpText; +} + +def align : P<"align", "Section alignment">; +def alternatename : P<"alternatename", "Define weak alias">; +def base : P<"base", "Base address of the program">; +def defaultlib : P<"defaultlib", "Add the library to the list of input files">; +def delayload : P<"delayload", "Delay loaded DLL name">; +def entry : P<"entry", "Name of entry point symbol">; +def export : P<"export", "Export a function">; +// No help text because /failifmismatch is not intended to be used by the user. +def failifmismatch : P<"failifmismatch", "">; +def heap : P<"heap", "Size of the heap">; +def implib : P<"implib", "Import library name">; +def libpath : P<"libpath", "Additional library search path">; +def machine : P<"machine", "Specify target platform">; +def merge : P<"merge", "Combine sections">; +def mllvm : P<"mllvm", "Options to pass to LLVM">; +def nodefaultlib : P<"nodefaultlib", "Remove a default library">; +def opt : P<"opt", "Control optimizations">; +def out : P<"out", "Path to file to write output">; +def pdb : P<"pdb", "PDB file path">; +def section : P<"section", "Specify section attributes">; +def stack : P<"stack", "Size of the stack">; +def stub : P<"stub", "Specify DOS stub file">; +def subsystem : P<"subsystem", "Specify subsystem">; +def version : P<"version", "Specify a version number in the PE header">; + +def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias; + +def manifest : F<"manifest">; +def manifest_colon : P<"manifest", "Create manifest file">; +def manifestuac : P<"manifestuac", "User access control">; +def manifestfile : P<"manifestfile", "Manifest file path">; +def manifestdependency : P<"manifestdependency", + "Attributes for in manifest file">; +def manifestinput : P<"manifestinput", "Specify manifest file">; + +// We cannot use multiclass P because class name "incl" is different +// from its command line option name. We do this because "include" is +// a reserved keyword in tablegen. +def incl : Joined<["/", "-"], "include:">, + HelpText<"Force symbol to be added to symbol table as undefined one">; + +// "def" is also a keyword. +def deffile : Joined<["/", "-"], "def:">, + HelpText<"Use module-definition file">; + +def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; +def dll : F<"dll">, HelpText<"Create a DLL">; +def nodefaultlib_all : F<"nodefaultlib">; +def noentry : F<"noentry">; +def profile : F<"profile">; +def swaprun_cd : F<"swaprun:cd">; +def swaprun_net : F<"swaprun:net">; +def verbose : F<"verbose">; + +def force : F<"force">, + HelpText<"Allow undefined symbols when creating executables">; +def force_unresolved : F<"force:unresolved">; + +defm allowbind: B<"allowbind", "Disable DLL binding">; +defm allowisolation : B<"allowisolation", "Set NO_ISOLATION bit">; +defm dynamicbase : B<"dynamicbase", + "Disable address space layout randomization">; +defm fixed : B<"fixed", "Enable base relocations">; +defm highentropyva : B<"highentropyva", "Set HIGH_ENTROPY_VA bit">; +defm largeaddressaware : B<"largeaddressaware", "Disable large addresses">; +defm nxcompat : B<"nxcompat", "Disable data execution provention">; +defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler">; +defm tsaware : B<"tsaware", "Create non-Terminal Server aware executable">; + +def help : F<"help">; +def help_q : Flag<["/?", "-?"], "">, Alias; + +// LLD extensions +def nosymtab : F<"nosymtab">; + +// Flags for debugging +def lldmap : Joined<["/", "-"], "lldmap:">; + +//============================================================================== +// The flags below do nothing. They are defined only for link.exe compatibility. +//============================================================================== + +class QF : Joined<["/", "-", "-?"], name#":">; + +multiclass QB { + def "" : F; + def _no : F; +} + +def functionpadmin : F<"functionpadmin">; +def ignoreidl : F<"ignoreidl">; +def incremental : F<"incremental">; +def no_incremental : F<"incremental:no">; +def nologo : F<"nologo">; +def throwingnew : F<"throwingnew">; +def editandcontinue : F<"editandcontinue">; +def fastfail : F<"fastfail">; + +def delay : QF<"delay">; +def errorreport : QF<"errorreport">; +def idlout : QF<"idlout">; +def ignore : QF<"ignore">; +def maxilksize : QF<"maxilksize">; +def pdbaltpath : QF<"pdbaltpath">; +def tlbid : QF<"tlbid">; +def tlbout : QF<"tlbout">; +def verbose_all : QF<"verbose">; +def guardsym : QF<"guardsym">; + +defm wx : QB<"wx">; diff --git a/COFF/PDB.cpp b/COFF/PDB.cpp new file mode 100644 index 00000000..7606ccc6 --- /dev/null +++ b/COFF/PDB.cpp @@ -0,0 +1,61 @@ +//===- PDB.cpp ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Error.h" +#include "Symbols.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::support::endian; + +const int PageSize = 4096; +const uint8_t Magic[32] = "Microsoft C/C++ MSF 7.00\r\n\032DS\0\0"; + +namespace { +struct PDBHeader { + uint8_t Magic[32]; + ulittle32_t PageSize; + ulittle32_t FpmPage; + ulittle32_t PageCount; + ulittle32_t RootSize; + ulittle32_t Reserved; + ulittle32_t RootPointer; +}; +} + +void lld::coff::createPDB(StringRef Path) { + // Create a file. + size_t FileSize = PageSize * 3; + ErrorOr> BufferOrErr = + FileOutputBuffer::create(Path, FileSize); + if (auto EC = BufferOrErr.getError()) + fatal(EC, "failed to open " + Path); + std::unique_ptr Buffer = std::move(*BufferOrErr); + + // Write the file header. + uint8_t *Buf = Buffer->getBufferStart(); + auto *Hdr = reinterpret_cast(Buf); + memcpy(Hdr->Magic, Magic, sizeof(Magic)); + Hdr->PageSize = PageSize; + // I don't know what FpmPage field means, but it must not be 0. + Hdr->FpmPage = 1; + Hdr->PageCount = FileSize / PageSize; + // Root directory is empty, containing only the length field. + Hdr->RootSize = 4; + // Root directory is on page 1. + Hdr->RootPointer = 1; + + // Write the root directory. Root stream is on page 2. + write32le(Buf + PageSize, 2); + Buffer->commit(); +} diff --git a/COFF/README.md b/COFF/README.md new file mode 100644 index 00000000..f1bfc9c1 --- /dev/null +++ b/COFF/README.md @@ -0,0 +1 @@ +See docs/NewLLD.rst diff --git a/COFF/SymbolTable.cpp b/COFF/SymbolTable.cpp new file mode 100644 index 00000000..df9da4c3 --- /dev/null +++ b/COFF/SymbolTable.cpp @@ -0,0 +1,448 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "lld/Core/Parallel.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/LTO/legacy/LTOCodeGenerator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace lld { +namespace coff { + +void SymbolTable::addFile(std::unique_ptr FileP) { +#if LLVM_ENABLE_THREADS + std::launch Policy = std::launch::async; +#else + std::launch Policy = std::launch::deferred; +#endif + + InputFile *File = FileP.get(); + Files.push_back(std::move(FileP)); + if (auto *F = dyn_cast(File)) { + ArchiveQueue.push_back( + std::async(Policy, [=]() { F->parse(); return F; })); + return; + } + ObjectQueue.push_back( + std::async(Policy, [=]() { File->parse(); return File; })); + if (auto *F = dyn_cast(File)) { + ObjectFiles.push_back(F); + } else if (auto *F = dyn_cast(File)) { + BitcodeFiles.push_back(F); + } else { + ImportFiles.push_back(cast(File)); + } +} + +void SymbolTable::step() { + if (queueEmpty()) + return; + readObjects(); + readArchives(); +} + +void SymbolTable::run() { + while (!queueEmpty()) + step(); +} + +void SymbolTable::readArchives() { + if (ArchiveQueue.empty()) + return; + + // Add lazy symbols to the symbol table. Lazy symbols that conflict + // with existing undefined symbols are accumulated in LazySyms. + std::vector LazySyms; + for (std::future &Future : ArchiveQueue) { + ArchiveFile *File = Future.get(); + if (Config->Verbose) + llvm::outs() << "Reading " << File->getShortName() << "\n"; + for (Lazy &Sym : File->getLazySymbols()) + addLazy(&Sym, &LazySyms); + } + ArchiveQueue.clear(); + + // Add archive member files to ObjectQueue that should resolve + // existing undefined symbols. + for (Symbol *Sym : LazySyms) + addMemberFile(cast(Sym->Body)); +} + +void SymbolTable::readObjects() { + if (ObjectQueue.empty()) + return; + + // Add defined and undefined symbols to the symbol table. + std::vector Directives; + for (size_t I = 0; I < ObjectQueue.size(); ++I) { + InputFile *File = ObjectQueue[I].get(); + if (Config->Verbose) + llvm::outs() << "Reading " << File->getShortName() << "\n"; + // Adding symbols may add more files to ObjectQueue + // (but not to ArchiveQueue). + for (SymbolBody *Sym : File->getSymbols()) + if (Sym->isExternal()) + addSymbol(Sym); + StringRef S = File->getDirectives(); + if (!S.empty()) { + Directives.push_back(S); + if (Config->Verbose) + llvm::outs() << "Directives: " << File->getShortName() + << ": " << S << "\n"; + } + } + ObjectQueue.clear(); + + // Parse directive sections. This may add files to + // ArchiveQueue and ObjectQueue. + for (StringRef S : Directives) + Driver->parseDirectives(S); +} + +bool SymbolTable::queueEmpty() { + return ArchiveQueue.empty() && ObjectQueue.empty(); +} + +void SymbolTable::reportRemainingUndefines(bool Resolve) { + llvm::SmallPtrSet Undefs; + for (auto &I : Symtab) { + Symbol *Sym = I.second; + auto *Undef = dyn_cast(Sym->Body); + if (!Undef) + continue; + StringRef Name = Undef->getName(); + // A weak alias may have been resolved, so check for that. + if (Defined *D = Undef->getWeakAlias()) { + if (Resolve) + Sym->Body = D; + continue; + } + // If we can resolve a symbol by removing __imp_ prefix, do that. + // This odd rule is for compatibility with MSVC linker. + if (Name.startswith("__imp_")) { + Symbol *Imp = find(Name.substr(strlen("__imp_"))); + if (Imp && isa(Imp->Body)) { + if (!Resolve) + continue; + auto *D = cast(Imp->Body); + auto *S = new (Alloc) DefinedLocalImport(Name, D); + LocalImportChunks.push_back(S->getChunk()); + Sym->Body = S; + continue; + } + } + // Remaining undefined symbols are not fatal if /force is specified. + // They are replaced with dummy defined symbols. + if (Config->Force && Resolve) + Sym->Body = new (Alloc) DefinedAbsolute(Name, 0); + Undefs.insert(Sym->Body); + } + if (Undefs.empty()) + return; + for (Undefined *U : Config->GCRoot) + if (Undefs.count(U->repl())) + llvm::errs() << ": undefined symbol: " << U->getName() << "\n"; + for (std::unique_ptr &File : Files) + if (!isa(File.get())) + for (SymbolBody *Sym : File->getSymbols()) + if (Undefs.count(Sym->repl())) + llvm::errs() << File->getShortName() << ": undefined symbol: " + << Sym->getName() << "\n"; + if (!Config->Force) + fatal("link failed"); +} + +void SymbolTable::addLazy(Lazy *New, std::vector *Accum) { + Symbol *Sym = insert(New); + if (Sym->Body == New) + return; + SymbolBody *Existing = Sym->Body; + if (isa(Existing)) + return; + if (Lazy *L = dyn_cast(Existing)) + if (L->getFileIndex() < New->getFileIndex()) + return; + Sym->Body = New; + New->setBackref(Sym); + if (isa(Existing)) + Accum->push_back(Sym); +} + +void SymbolTable::addSymbol(SymbolBody *New) { + // Find an existing symbol or create and insert a new one. + assert(isa(New) || isa(New)); + Symbol *Sym = insert(New); + if (Sym->Body == New) + return; + SymbolBody *Existing = Sym->Body; + + // If we have an undefined symbol and a lazy symbol, + // let the lazy symbol to read a member file. + if (auto *L = dyn_cast(Existing)) { + // Undefined symbols with weak aliases need not to be resolved, + // since they would be replaced with weak aliases if they remain + // undefined. + if (auto *U = dyn_cast(New)) { + if (!U->WeakAlias) { + addMemberFile(L); + return; + } + } + Sym->Body = New; + return; + } + + // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, + // equivalent (conflicting), or more preferable, respectively. + int Comp = Existing->compare(New); + if (Comp == 0) + fatal("duplicate symbol: " + Existing->getDebugName() + " and " + + New->getDebugName()); + if (Comp < 0) + Sym->Body = New; +} + +Symbol *SymbolTable::insert(SymbolBody *New) { + Symbol *&Sym = Symtab[New->getName()]; + if (Sym) { + New->setBackref(Sym); + return Sym; + } + Sym = new (Alloc) Symbol(New); + New->setBackref(Sym); + return Sym; +} + +// Reads an archive member file pointed by a given symbol. +void SymbolTable::addMemberFile(Lazy *Body) { + std::unique_ptr File = Body->getMember(); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (!File) + return; + if (Config->Verbose) + llvm::outs() << "Loaded " << File->getShortName() << " for " + << Body->getName() << "\n"; + addFile(std::move(File)); +} + +std::vector SymbolTable::getChunks() { + std::vector Res; + for (ObjectFile *File : ObjectFiles) { + std::vector &V = File->getChunks(); + Res.insert(Res.end(), V.begin(), V.end()); + } + return Res; +} + +Symbol *SymbolTable::find(StringRef Name) { + auto It = Symtab.find(Name); + if (It == Symtab.end()) + return nullptr; + return It->second; +} + +Symbol *SymbolTable::findUnderscore(StringRef Name) { + if (Config->Machine == I386) + return find(("_" + Name).str()); + return find(Name); +} + +StringRef SymbolTable::findByPrefix(StringRef Prefix) { + for (auto Pair : Symtab) { + StringRef Name = Pair.first; + if (Name.startswith(Prefix)) + return Name; + } + return ""; +} + +StringRef SymbolTable::findMangle(StringRef Name) { + if (Symbol *Sym = find(Name)) + if (!isa(Sym->Body)) + return Name; + if (Config->Machine != I386) + return findByPrefix(("?" + Name + "@@Y").str()); + if (!Name.startswith("_")) + return ""; + // Search for x86 C function. + StringRef S = findByPrefix((Name + "@").str()); + if (!S.empty()) + return S; + // Search for x86 C++ non-member function. + return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); +} + +void SymbolTable::mangleMaybe(Undefined *U) { + if (U->WeakAlias) + return; + if (!isa(U->repl())) + return; + StringRef Alias = findMangle(U->getName()); + if (!Alias.empty()) + U->WeakAlias = addUndefined(Alias); +} + +Undefined *SymbolTable::addUndefined(StringRef Name) { + auto *New = new (Alloc) Undefined(Name); + addSymbol(New); + if (auto *U = dyn_cast(New->repl())) + return U; + return New; +} + +DefinedRelative *SymbolTable::addRelative(StringRef Name, uint64_t VA) { + auto *New = new (Alloc) DefinedRelative(Name, VA); + addSymbol(New); + return New; +} + +DefinedAbsolute *SymbolTable::addAbsolute(StringRef Name, uint64_t VA) { + auto *New = new (Alloc) DefinedAbsolute(Name, VA); + addSymbol(New); + return New; +} + +void SymbolTable::printMap(llvm::raw_ostream &OS) { + for (ObjectFile *File : ObjectFiles) { + OS << File->getShortName() << ":\n"; + for (SymbolBody *Body : File->getSymbols()) + if (auto *R = dyn_cast(Body)) + if (R->getChunk()->isLive()) + OS << Twine::utohexstr(Config->ImageBase + R->getRVA()) + << " " << R->getName() << "\n"; + } +} + +void SymbolTable::addCombinedLTOObject(ObjectFile *Obj) { + for (SymbolBody *Body : Obj->getSymbols()) { + if (!Body->isExternal()) + continue; + // We should not see any new undefined symbols at this point, but we'll + // diagnose them later in reportRemainingUndefines(). + StringRef Name = Body->getName(); + Symbol *Sym = insert(Body); + SymbolBody *Existing = Sym->Body; + + if (Existing == Body) + continue; + + if (isa(Existing)) { + Sym->Body = Body; + continue; + } + if (auto *L = dyn_cast(Existing)) { + // We may see new references to runtime library symbols such as __chkstk + // here. These symbols must be wholly defined in non-bitcode files. + addMemberFile(L); + continue; + } + + int Comp = Existing->compare(Body); + if (Comp == 0) + fatal("LTO: unexpected duplicate symbol: " + Name); + if (Comp < 0) + Sym->Body = Body; + } +} + +void SymbolTable::addCombinedLTOObjects() { + if (BitcodeFiles.empty()) + return; + + // Diagnose any undefined symbols early, but do not resolve weak externals, + // as resolution breaks the invariant that each Symbol points to a unique + // SymbolBody, which we rely on to replace DefinedBitcode symbols correctly. + reportRemainingUndefines(/*Resolve=*/false); + + // Create an object file and add it to the symbol table by replacing any + // DefinedBitcode symbols with the definitions in the object file. + LTOCodeGenerator CG(BitcodeFile::Context); + CG.setOptLevel(Config->LTOOptLevel); + std::vector Objs = createLTOObjects(&CG); + + for (ObjectFile *Obj : Objs) + addCombinedLTOObject(Obj); + + size_t NumBitcodeFiles = BitcodeFiles.size(); + run(); + if (BitcodeFiles.size() != NumBitcodeFiles) + fatal("LTO: late loaded symbol created new bitcode reference"); +} + +// Combine and compile bitcode files and then return the result +// as a vector of regular COFF object files. +std::vector SymbolTable::createLTOObjects(LTOCodeGenerator *CG) { + // All symbols referenced by non-bitcode objects must be preserved. + for (ObjectFile *File : ObjectFiles) + for (SymbolBody *Body : File->getSymbols()) + if (auto *S = dyn_cast(Body->repl())) + CG->addMustPreserveSymbol(S->getName()); + + // Likewise for bitcode symbols which we initially resolved to non-bitcode. + for (BitcodeFile *File : BitcodeFiles) + for (SymbolBody *Body : File->getSymbols()) + if (isa(Body) && !isa(Body->repl())) + CG->addMustPreserveSymbol(Body->getName()); + + // Likewise for other symbols that must be preserved. + for (Undefined *U : Config->GCRoot) { + if (auto *S = dyn_cast(U->repl())) + CG->addMustPreserveSymbol(S->getName()); + else if (auto *S = dyn_cast_or_null(U->getWeakAlias())) + CG->addMustPreserveSymbol(S->getName()); + } + + CG->setModule(BitcodeFiles[0]->takeModule()); + for (unsigned I = 1, E = BitcodeFiles.size(); I != E; ++I) + CG->addModule(BitcodeFiles[I]->takeModule().get()); + + bool DisableVerify = true; +#ifdef NDEBUG + DisableVerify = false; +#endif + if (!CG->optimize(DisableVerify, false, false, false)) + fatal(""); // optimize() should have emitted any error message. + + Objs.resize(Config->LTOJobs); + // Use std::list to avoid invalidation of pointers in OSPtrs. + std::list OSs; + std::vector OSPtrs; + for (SmallString<0> &Obj : Objs) { + OSs.emplace_back(Obj); + OSPtrs.push_back(&OSs.back()); + } + + if (!CG->compileOptimized(OSPtrs)) + fatal(""); // compileOptimized() should have emitted any error message. + + std::vector ObjFiles; + for (SmallString<0> &Obj : Objs) { + auto *ObjFile = new ObjectFile(MemoryBufferRef(Obj, "")); + Files.emplace_back(ObjFile); + ObjectFiles.push_back(ObjFile); + ObjFile->parse(); + ObjFiles.push_back(ObjFile); + } + + return ObjFiles; +} + +} // namespace coff +} // namespace lld diff --git a/COFF/SymbolTable.h b/COFF/SymbolTable.h new file mode 100644 index 00000000..8bf4387c --- /dev/null +++ b/COFF/SymbolTable.h @@ -0,0 +1,125 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_SYMBOL_TABLE_H +#define LLD_COFF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/raw_ostream.h" + +#ifdef _MSC_VER +// depends on for __uncaught_exception. +#include +#endif + +#include + +namespace llvm { +struct LTOCodeGenerator; +} + +namespace lld { +namespace coff { + +class Chunk; +class Defined; +class Lazy; +class SymbolBody; +struct Symbol; + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or lazy symbols (the last one is symbols in archive +// files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a lazy and a +// undefined, it'll read an archive member to read a real definition +// to replace the lazy symbol. The logic is implemented in resolve(). +class SymbolTable { +public: + void addFile(std::unique_ptr File); + std::vector> &getFiles() { return Files; } + void step(); + void run(); + bool queueEmpty(); + + // Print an error message on undefined symbols. If Resolve is true, try to + // resolve any undefined symbols and update the symbol table accordingly. + void reportRemainingUndefines(bool Resolve); + + // Returns a list of chunks of selected symbols. + std::vector getChunks(); + + // Returns a symbol for a given name. Returns a nullptr if not found. + Symbol *find(StringRef Name); + Symbol *findUnderscore(StringRef Name); + + // Occasionally we have to resolve an undefined symbol to its + // mangled symbol. This function tries to find a mangled name + // for U from the symbol table, and if found, set the symbol as + // a weak alias for U. + void mangleMaybe(Undefined *U); + StringRef findMangle(StringRef Name); + + // Print a layout map to OS. + void printMap(llvm::raw_ostream &OS); + + // Build a set of COFF objects representing the combined contents of + // BitcodeFiles and add them to the symbol table. Called after all files are + // added and before the writer writes results to a file. + void addCombinedLTOObjects(); + + // The writer needs to handle DLL import libraries specially in + // order to create the import descriptor table. + std::vector ImportFiles; + + // The writer needs to infer the machine type from the object files. + std::vector ObjectFiles; + + // Creates an Undefined symbol for a given name. + Undefined *addUndefined(StringRef Name); + DefinedRelative *addRelative(StringRef Name, uint64_t VA); + DefinedAbsolute *addAbsolute(StringRef Name, uint64_t VA); + + // A list of chunks which to be added to .rdata. + std::vector LocalImportChunks; + +private: + void readArchives(); + void readObjects(); + + void addSymbol(SymbolBody *New); + void addLazy(Lazy *New, std::vector *Accum); + Symbol *insert(SymbolBody *New); + StringRef findByPrefix(StringRef Prefix); + + void addMemberFile(Lazy *Body); + void addCombinedLTOObject(ObjectFile *Obj); + std::vector createLTOObjects(llvm::LTOCodeGenerator *CG); + + llvm::DenseMap Symtab; + + std::vector> Files; + std::vector> ArchiveQueue; + std::vector> ObjectQueue; + + std::vector BitcodeFiles; + std::vector> Objs; + llvm::BumpPtrAllocator Alloc; +}; + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/Symbols.cpp b/COFF/Symbols.cpp new file mode 100644 index 00000000..6e2db663 --- /dev/null +++ b/COFF/Symbols.cpp @@ -0,0 +1,217 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Error.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::object; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; + +namespace lld { +namespace coff { + +StringRef SymbolBody::getName() { + // DefinedCOFF names are read lazily for a performance reason. + // Non-external symbol names are never used by the linker except for logging + // or debugging. Their internal references are resolved not by name but by + // symbol index. And because they are not external, no one can refer them by + // name. Object files contain lots of non-external symbols, and creating + // StringRefs for them (which involves lots of strlen() on the string table) + // is a waste of time. + if (Name.empty()) { + auto *D = cast(this); + D->File->getCOFFObj()->getSymbolName(D->Sym, Name); + } + return Name; +} + +// Returns 1, 0 or -1 if this symbol should take precedence +// over the Other, tie or lose, respectively. +int SymbolBody::compare(SymbolBody *Other) { + Kind LK = kind(), RK = Other->kind(); + + // Normalize so that the smaller kind is on the left. + if (LK > RK) + return -Other->compare(this); + + // First handle comparisons between two different kinds. + if (LK != RK) { + if (RK > LastDefinedKind) { + if (LK == LazyKind && cast(Other)->WeakAlias) + return -1; + + // The LHS is either defined or lazy and so it wins. + assert((LK <= LastDefinedKind || LK == LazyKind) && "Bad kind!"); + return 1; + } + + // Bitcode has special complexities. + if (RK == DefinedBitcodeKind) { + auto *RHS = cast(Other); + + switch (LK) { + case DefinedCommonKind: + return 1; + + case DefinedRegularKind: + // As an approximation, regular symbols win over bitcode symbols, + // but we definitely have a conflict if the regular symbol is not + // replaceable and neither is the bitcode symbol. We do not + // replicate the rest of the symbol resolution logic here; symbol + // resolution will be done accurately after lowering bitcode symbols + // to regular symbols in addCombinedLTOObject(). + if (cast(this)->isCOMDAT() || RHS->IsReplaceable) + return 1; + + // Fallthrough to the default of a tie otherwise. + default: + return 0; + } + } + + // Either of the object file kind will trump a higher kind. + if (LK <= LastDefinedCOFFKind) + return 1; + + // The remaining kind pairs are ties amongst defined symbols. + return 0; + } + + // Now handle the case where the kinds are the same. + switch (LK) { + case DefinedRegularKind: { + auto *LHS = cast(this); + auto *RHS = cast(Other); + if (LHS->isCOMDAT() && RHS->isCOMDAT()) + return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; + return 0; + } + + case DefinedCommonKind: { + auto *LHS = cast(this); + auto *RHS = cast(Other); + if (LHS->getSize() == RHS->getSize()) + return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; + return LHS->getSize() > RHS->getSize() ? 1 : -1; + } + + case DefinedBitcodeKind: { + auto *LHS = cast(this); + auto *RHS = cast(Other); + // If both are non-replaceable, we have a tie. + if (!LHS->IsReplaceable && !RHS->IsReplaceable) + return 0; + + // Non-replaceable symbols win, but even two replaceable symboles don't + // tie. If both symbols are replaceable, choice is arbitrary. + if (RHS->IsReplaceable && LHS->IsReplaceable) + return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; + return LHS->IsReplaceable ? -1 : 1; + } + + case LazyKind: { + // Don't tie, pick the earliest. + auto *LHS = cast(this); + auto *RHS = cast(Other); + return LHS->getFileIndex() < RHS->getFileIndex() ? 1 : -1; + } + + case UndefinedKind: { + auto *LHS = cast(this); + auto *RHS = cast(Other); + // Tie if both undefined symbols have different weak aliases. + if (LHS->WeakAlias && RHS->WeakAlias) { + if (LHS->WeakAlias->getName() != RHS->WeakAlias->getName()) + return 0; + return uintptr_t(LHS) < uintptr_t(RHS) ? 1 : -1; + } + return LHS->WeakAlias ? 1 : -1; + } + + case DefinedLocalImportKind: + case DefinedImportThunkKind: + case DefinedImportDataKind: + case DefinedAbsoluteKind: + case DefinedRelativeKind: + // These all simply tie. + return 0; + } + llvm_unreachable("unknown symbol kind"); +} + +std::string SymbolBody::getDebugName() { + std::string N = getName().str(); + if (auto *D = dyn_cast(this)) { + N += " "; + N += D->File->getShortName(); + } else if (auto *D = dyn_cast(this)) { + N += " "; + N += D->File->getShortName(); + } + return N; +} + +COFFSymbolRef DefinedCOFF::getCOFFSymbol() { + size_t SymSize = File->getCOFFObj()->getSymbolTableEntrySize(); + if (SymSize == sizeof(coff_symbol16)) + return COFFSymbolRef(reinterpret_cast(Sym)); + assert(SymSize == sizeof(coff_symbol32)); + return COFFSymbolRef(reinterpret_cast(Sym)); +} + +DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine) + : Defined(DefinedImportThunkKind, Name) { + switch (Machine) { + case AMD64: Data.reset(new ImportThunkChunkX64(S)); return; + case I386: Data.reset(new ImportThunkChunkX86(S)); return; + case ARMNT: Data.reset(new ImportThunkChunkARM(S)); return; + default: llvm_unreachable("unknown machine type"); + } +} + +std::unique_ptr Lazy::getMember() { + MemoryBufferRef MBRef = File->getMember(&Sym); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBRef.getBuffer().empty()) + return std::unique_ptr(nullptr); + + file_magic Magic = identify_magic(MBRef.getBuffer()); + if (Magic == file_magic::coff_import_library) + return std::unique_ptr(new ImportFile(MBRef)); + + std::unique_ptr Obj; + if (Magic == file_magic::coff_object) + Obj.reset(new ObjectFile(MBRef)); + else if (Magic == file_magic::bitcode) + Obj.reset(new BitcodeFile(MBRef)); + else + fatal("unknown file type: " + File->getName()); + + Obj->setParentName(File->getName()); + return Obj; +} + +Defined *Undefined::getWeakAlias() { + // A weak alias may be a weak alias to another symbol, so check recursively. + for (SymbolBody *A = WeakAlias; A; A = cast(A)->WeakAlias) + if (auto *D = dyn_cast(A->repl())) + return D; + return nullptr; +} + +} // namespace coff +} // namespace lld diff --git a/COFF/Symbols.h b/COFF/Symbols.h new file mode 100644 index 00000000..f96c1fb3 --- /dev/null +++ b/COFF/Symbols.h @@ -0,0 +1,403 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_SYMBOLS_H +#define LLD_COFF_SYMBOLS_H + +#include "Chunks.h" +#include "Config.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include +#include +#include + +namespace lld { +namespace coff { + +using llvm::object::Archive; +using llvm::object::COFFSymbolRef; +using llvm::object::coff_import_header; +using llvm::object::coff_symbol_generic; + +class ArchiveFile; +class BitcodeFile; +class InputFile; +class ObjectFile; +class SymbolBody; + +// A real symbol object, SymbolBody, is usually accessed indirectly +// through a Symbol. There's always one Symbol for each symbol name. +// The resolver updates SymbolBody pointers as it resolves symbols. +struct Symbol { + explicit Symbol(SymbolBody *P) : Body(P) {} + SymbolBody *Body; +}; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + // The order of these is significant. We start with the regular defined + // symbols as those are the most prevelant and the zero tag is the cheapest + // to set. Among the defined kinds, the lower the kind is preferred over + // the higher kind when testing wether one symbol should take precedence + // over another. + DefinedRegularKind = 0, + DefinedCommonKind, + DefinedLocalImportKind, + DefinedImportThunkKind, + DefinedImportDataKind, + DefinedAbsoluteKind, + DefinedRelativeKind, + DefinedBitcodeKind, + + UndefinedKind, + LazyKind, + + LastDefinedCOFFKind = DefinedCommonKind, + LastDefinedKind = DefinedBitcodeKind, + }; + + Kind kind() const { return static_cast(SymbolKind); } + + // Returns true if this is an external symbol. + bool isExternal() { return IsExternal; } + + // Returns the symbol name. + StringRef getName(); + + // A SymbolBody has a backreference to a Symbol. Originally they are + // doubly-linked. A backreference will never change. But the pointer + // in the Symbol may be mutated by the resolver. If you have a + // pointer P to a SymbolBody and are not sure whether the resolver + // has chosen the object among other objects having the same name, + // you can access P->Backref->Body to get the resolver's result. + void setBackref(Symbol *P) { Backref = P; } + SymbolBody *repl() { return Backref ? Backref->Body : this; } + + // Decides which symbol should "win" in the symbol table, this or + // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if + // they are duplicate (conflicting) symbols. + int compare(SymbolBody *Other); + + // Returns a name of this symbol including source file name. + // Used only for debugging and logging. + std::string getDebugName(); + +protected: + explicit SymbolBody(Kind K, StringRef N = "") + : SymbolKind(K), IsExternal(true), IsCOMDAT(false), + IsReplaceable(false), Name(N) {} + + const unsigned SymbolKind : 8; + unsigned IsExternal : 1; + + // This bit is used by the \c DefinedRegular subclass. + unsigned IsCOMDAT : 1; + + // This bit is used by the \c DefinedBitcode subclass. + unsigned IsReplaceable : 1; + + StringRef Name; + Symbol *Backref = nullptr; +}; + +// The base class for any defined symbols, including absolute symbols, +// etc. +class Defined : public SymbolBody { +public: + Defined(Kind K, StringRef N = "") : SymbolBody(K, N) {} + + static bool classof(const SymbolBody *S) { + return S->kind() <= LastDefinedKind; + } + + // Returns the RVA (relative virtual address) of this symbol. The + // writer sets and uses RVAs. + uint64_t getRVA(); + + // Returns the RVA relative to the beginning of the output section. + // Used to implement SECREL relocation type. + uint64_t getSecrel(); + + // Returns the output section index. + // Used to implement SECTION relocation type. + uint64_t getSectionIndex(); + + // Returns true if this symbol points to an executable (e.g. .text) section. + // Used to implement ARM relocations. + bool isExecutable(); +}; + +// Symbols defined via a COFF object file. +class DefinedCOFF : public Defined { + friend SymbolBody; +public: + DefinedCOFF(Kind K, ObjectFile *F, COFFSymbolRef S) + : Defined(K), File(F), Sym(S.getGeneric()) {} + + static bool classof(const SymbolBody *S) { + return S->kind() <= LastDefinedCOFFKind; + } + + int getFileIndex() { return File->Index; } + + COFFSymbolRef getCOFFSymbol(); + +protected: + ObjectFile *File; + const coff_symbol_generic *Sym; +}; + +// Regular defined symbols read from object file symbol tables. +class DefinedRegular : public DefinedCOFF { +public: + DefinedRegular(ObjectFile *F, COFFSymbolRef S, SectionChunk *C) + : DefinedCOFF(DefinedRegularKind, F, S), Data(&C->Repl) { + IsExternal = S.isExternal(); + IsCOMDAT = C->isCOMDAT(); + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedRegularKind; + } + + uint64_t getRVA() { return (*Data)->getRVA() + Sym->Value; } + bool isCOMDAT() { return IsCOMDAT; } + SectionChunk *getChunk() { return *Data; } + uint32_t getValue() { return Sym->Value; } + +private: + SectionChunk **Data; +}; + +class DefinedCommon : public DefinedCOFF { +public: + DefinedCommon(ObjectFile *F, COFFSymbolRef S, CommonChunk *C) + : DefinedCOFF(DefinedCommonKind, F, S), Data(C) { + IsExternal = S.isExternal(); + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedCommonKind; + } + + uint64_t getRVA() { return Data->getRVA(); } + +private: + friend SymbolBody; + uint64_t getSize() { return Sym->Value; } + CommonChunk *Data; +}; + +// Absolute symbols. +class DefinedAbsolute : public Defined { +public: + DefinedAbsolute(StringRef N, COFFSymbolRef S) + : Defined(DefinedAbsoluteKind, N), VA(S.getValue()) { + IsExternal = S.isExternal(); + } + + DefinedAbsolute(StringRef N, uint64_t V) + : Defined(DefinedAbsoluteKind, N), VA(V) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedAbsoluteKind; + } + + uint64_t getRVA() { return VA - Config->ImageBase; } + void setVA(uint64_t V) { VA = V; } + +private: + uint64_t VA; +}; + +// This is a kind of absolute symbol but relative to the image base. +// Unlike absolute symbols, relocations referring this kind of symbols +// are subject of the base relocation. This type is used rarely -- +// mainly for __ImageBase. +class DefinedRelative : public Defined { +public: + explicit DefinedRelative(StringRef Name, uint64_t V = 0) + : Defined(DefinedRelativeKind, Name), RVA(V) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedRelativeKind; + } + + uint64_t getRVA() { return RVA; } + void setRVA(uint64_t V) { RVA = V; } + +private: + uint64_t RVA; +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and Lazy for +// the same name, it will ask the Lazy to load a file. +class Lazy : public SymbolBody { +public: + Lazy(ArchiveFile *F, const Archive::Symbol S) + : SymbolBody(LazyKind, S.getName()), File(F), Sym(S) {} + + static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + std::unique_ptr getMember(); + + int getFileIndex() { return File->Index; } + +private: + ArchiveFile *File; + const Archive::Symbol Sym; +}; + +// Undefined symbols. +class Undefined : public SymbolBody { +public: + explicit Undefined(StringRef N) : SymbolBody(UndefinedKind, N) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == UndefinedKind; + } + + // An undefined symbol can have a fallback symbol which gives an + // undefined symbol a second chance if it would remain undefined. + // If it remains undefined, it'll be replaced with whatever the + // Alias pointer points to. + SymbolBody *WeakAlias = nullptr; + + // If this symbol is external weak, try to resolve it to a defined + // symbol by searching the chain of fallback symbols. Returns the symbol if + // successful, otherwise returns null. + Defined *getWeakAlias(); +}; + +// Windows-specific classes. + +// This class represents a symbol imported from a DLL. This has two +// names for internal use and external use. The former is used for +// name resolution, and the latter is used for the import descriptor +// table in an output. The former has "__imp_" prefix. +class DefinedImportData : public Defined { +public: + DefinedImportData(StringRef D, StringRef N, StringRef E, + const coff_import_header *H) + : Defined(DefinedImportDataKind, N), DLLName(D), ExternalName(E), Hdr(H) { + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedImportDataKind; + } + + uint64_t getRVA() { return Location->getRVA(); } + StringRef getDLLName() { return DLLName; } + StringRef getExternalName() { return ExternalName; } + void setLocation(Chunk *AddressTable) { Location = AddressTable; } + uint16_t getOrdinal() { return Hdr->OrdinalHint; } + +private: + StringRef DLLName; + StringRef ExternalName; + const coff_import_header *Hdr; + Chunk *Location = nullptr; +}; + +// This class represents a symbol for a jump table entry which jumps +// to a function in a DLL. Linker are supposed to create such symbols +// without "__imp_" prefix for all function symbols exported from +// DLLs, so that you can call DLL functions as regular functions with +// a regular name. A function pointer is given as a DefinedImportData. +class DefinedImportThunk : public Defined { +public: + DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine); + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedImportThunkKind; + } + + uint64_t getRVA() { return Data->getRVA(); } + Chunk *getChunk() { return Data.get(); } + +private: + std::unique_ptr Data; +}; + +// If you have a symbol "__imp_foo" in your object file, a symbol name +// "foo" becomes automatically available as a pointer to "__imp_foo". +// This class is for such automatically-created symbols. +// Yes, this is an odd feature. We didn't intend to implement that. +// This is here just for compatibility with MSVC. +class DefinedLocalImport : public Defined { +public: + DefinedLocalImport(StringRef N, Defined *S) + : Defined(DefinedLocalImportKind, N), Data(S) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedLocalImportKind; + } + + uint64_t getRVA() { return Data.getRVA(); } + Chunk *getChunk() { return &Data; } + +private: + LocalImportChunk Data; +}; + +class DefinedBitcode : public Defined { + friend SymbolBody; +public: + DefinedBitcode(BitcodeFile *F, StringRef N, bool IsReplaceable) + : Defined(DefinedBitcodeKind, N), File(F) { + this->IsReplaceable = IsReplaceable; + } + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedBitcodeKind; + } + +private: + BitcodeFile *File; +}; + +inline uint64_t Defined::getRVA() { + switch (kind()) { + case DefinedAbsoluteKind: + return cast(this)->getRVA(); + case DefinedRelativeKind: + return cast(this)->getRVA(); + case DefinedImportDataKind: + return cast(this)->getRVA(); + case DefinedImportThunkKind: + return cast(this)->getRVA(); + case DefinedLocalImportKind: + return cast(this)->getRVA(); + case DefinedCommonKind: + return cast(this)->getRVA(); + case DefinedRegularKind: + return cast(this)->getRVA(); + case DefinedBitcodeKind: + llvm_unreachable("There is no address for a bitcode symbol."); + case LazyKind: + case UndefinedKind: + llvm_unreachable("Cannot get the address for an undefined symbol."); + } + llvm_unreachable("unknown symbol kind"); +} + +} // namespace coff +} // namespace lld + +#endif diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp new file mode 100644 index 00000000..d8077df9 --- /dev/null +++ b/COFF/Writer.cpp @@ -0,0 +1,796 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "DLL.h" +#include "Error.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Writer.h" +#include "lld/Core/Parallel.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::coff; + +static const int PageSize = 4096; +static const int SectorSize = 512; +static const int DOSStubSize = 64; +static const int NumberfOfDataDirectory = 16; + +namespace { +// The writer writes a SymbolTable result to a file. +class Writer { +public: + Writer(SymbolTable *T) : Symtab(T) {} + void run(); + +private: + void createSections(); + void createMiscChunks(); + void createImportTables(); + void createExportTable(); + void assignAddresses(); + void removeEmptySections(); + void createSymbolAndStringTable(); + void openFile(StringRef OutputPath); + template void writeHeader(); + void fixSafeSEHSymbols(); + void setSectionPermissions(); + void writeSections(); + void sortExceptionTable(); + void applyRelocations(); + + llvm::Optional createSymbol(Defined *D); + size_t addEntryToStringTable(StringRef Str); + + OutputSection *findSection(StringRef Name); + OutputSection *createSection(StringRef Name); + void addBaserels(OutputSection *Dest); + void addBaserelBlocks(OutputSection *Dest, std::vector &V); + + uint32_t getSizeOfInitializedData(); + std::map> binImports(); + + SymbolTable *Symtab; + std::unique_ptr Buffer; + llvm::SpecificBumpPtrAllocator CAlloc; + llvm::SpecificBumpPtrAllocator BAlloc; + std::vector OutputSections; + std::vector Strtab; + std::vector OutputSymtab; + IdataContents Idata; + DelayLoadContents DelayIdata; + EdataContents Edata; + std::unique_ptr SEHTable; + + uint64_t FileSize; + uint32_t PointerToSymbolTable = 0; + uint64_t SizeOfImage; + uint64_t SizeOfHeaders; + + std::vector> Chunks; +}; +} // anonymous namespace + +namespace lld { +namespace coff { + +void writeResult(SymbolTable *T) { Writer(T).run(); } + +// OutputSection represents a section in an output file. It's a +// container of chunks. OutputSection and Chunk are 1:N relationship. +// Chunks cannot belong to more than one OutputSections. The writer +// creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and RVAs. +class OutputSection { +public: + OutputSection(StringRef N) : Name(N), Header({}) {} + void setRVA(uint64_t); + void setFileOffset(uint64_t); + void addChunk(Chunk *C); + StringRef getName() { return Name; } + std::vector &getChunks() { return Chunks; } + void addPermissions(uint32_t C); + void setPermissions(uint32_t C); + uint32_t getPermissions() { return Header.Characteristics & PermMask; } + uint32_t getCharacteristics() { return Header.Characteristics; } + uint64_t getRVA() { return Header.VirtualAddress; } + uint64_t getFileOff() { return Header.PointerToRawData; } + void writeHeaderTo(uint8_t *Buf); + + // Returns the size of this section in an executable memory image. + // This may be smaller than the raw size (the raw size is multiple + // of disk sector size, so there may be padding at end), or may be + // larger (if that's the case, the loader reserves spaces after end + // of raw data). + uint64_t getVirtualSize() { return Header.VirtualSize; } + + // Returns the size of the section in the output file. + uint64_t getRawSize() { return Header.SizeOfRawData; } + + // Set offset into the string table storing this section name. + // Used only when the name is longer than 8 bytes. + void setStringTableOff(uint32_t V) { StringTableOff = V; } + + // N.B. The section index is one based. + uint32_t SectionIndex = 0; + +private: + StringRef Name; + coff_section Header; + uint32_t StringTableOff = 0; + std::vector Chunks; +}; + +void OutputSection::setRVA(uint64_t RVA) { + Header.VirtualAddress = RVA; + for (Chunk *C : Chunks) + C->setRVA(C->getRVA() + RVA); +} + +void OutputSection::setFileOffset(uint64_t Off) { + // If a section has no actual data (i.e. BSS section), we want to + // set 0 to its PointerToRawData. Otherwise the output is rejected + // by the loader. + if (Header.SizeOfRawData == 0) + return; + Header.PointerToRawData = Off; +} + +void OutputSection::addChunk(Chunk *C) { + Chunks.push_back(C); + C->setOutputSection(this); + uint64_t Off = Header.VirtualSize; + Off = alignTo(Off, C->getAlign()); + C->setRVA(Off); + C->setOutputSectionOff(Off); + Off += C->getSize(); + Header.VirtualSize = Off; + if (C->hasData()) + Header.SizeOfRawData = alignTo(Off, SectorSize); +} + +void OutputSection::addPermissions(uint32_t C) { + Header.Characteristics |= C & PermMask; +} + +void OutputSection::setPermissions(uint32_t C) { + Header.Characteristics = C & PermMask; +} + +// Write the section header to a given buffer. +void OutputSection::writeHeaderTo(uint8_t *Buf) { + auto *Hdr = reinterpret_cast(Buf); + *Hdr = Header; + if (StringTableOff) { + // If name is too long, write offset into the string table as a name. + sprintf(Hdr->Name, "/%d", StringTableOff); + } else { + assert(!Config->Debug || Name.size() <= COFF::NameSize); + strncpy(Hdr->Name, Name.data(), + std::min(Name.size(), (size_t)COFF::NameSize)); + } +} + +uint64_t Defined::getSecrel() { + if (auto *D = dyn_cast(this)) + return getRVA() - D->getChunk()->getOutputSection()->getRVA(); + fatal("SECREL relocation points to a non-regular symbol"); +} + +uint64_t Defined::getSectionIndex() { + if (auto *D = dyn_cast(this)) + return D->getChunk()->getOutputSection()->SectionIndex; + fatal("SECTION relocation points to a non-regular symbol"); +} + +bool Defined::isExecutable() { + const auto X = IMAGE_SCN_MEM_EXECUTE; + if (auto *D = dyn_cast(this)) + return D->getChunk()->getOutputSection()->getPermissions() & X; + return isa(this); +} + +} // namespace coff +} // namespace lld + +// The main function of the writer. +void Writer::run() { + createSections(); + createMiscChunks(); + createImportTables(); + createExportTable(); + if (Config->Relocatable) + createSection(".reloc"); + assignAddresses(); + removeEmptySections(); + setSectionPermissions(); + createSymbolAndStringTable(); + openFile(Config->OutputFile); + if (Config->is64()) { + writeHeader(); + } else { + writeHeader(); + } + fixSafeSEHSymbols(); + writeSections(); + sortExceptionTable(); + if (auto EC = Buffer->commit()) + fatal(EC, "failed to write the output file"); +} + +static StringRef getOutputSection(StringRef Name) { + StringRef S = Name.split('$').first; + auto It = Config->Merge.find(S); + if (It == Config->Merge.end()) + return S; + return It->second; +} + +// Create output section objects and add them to OutputSections. +void Writer::createSections() { + // First, bin chunks by name. + std::map> Map; + for (Chunk *C : Symtab->getChunks()) { + auto *SC = dyn_cast(C); + if (SC && !SC->isLive()) { + if (Config->Verbose) + SC->printDiscardedMessage(); + continue; + } + Map[C->getSectionName()].push_back(C); + } + + // Then create an OutputSection for each section. + // '$' and all following characters in input section names are + // discarded when determining output section. So, .text$foo + // contributes to .text, for example. See PE/COFF spec 3.2. + SmallDenseMap Sections; + for (auto Pair : Map) { + StringRef Name = getOutputSection(Pair.first); + OutputSection *&Sec = Sections[Name]; + if (!Sec) { + Sec = new (CAlloc.Allocate()) OutputSection(Name); + OutputSections.push_back(Sec); + } + std::vector &Chunks = Pair.second; + for (Chunk *C : Chunks) { + Sec->addChunk(C); + Sec->addPermissions(C->getPermissions()); + } + } +} + +void Writer::createMiscChunks() { + // Create thunks for locally-dllimported symbols. + if (!Symtab->LocalImportChunks.empty()) { + OutputSection *Sec = createSection(".rdata"); + for (Chunk *C : Symtab->LocalImportChunks) + Sec->addChunk(C); + } + + // Create SEH table. x86-only. + if (Config->Machine != I386) + return; + std::set Handlers; + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { + if (!File->SEHCompat) + return; + for (SymbolBody *B : File->SEHandlers) + Handlers.insert(cast(B->repl())); + } + SEHTable.reset(new SEHTableChunk(Handlers)); + createSection(".rdata")->addChunk(SEHTable.get()); +} + +// Create .idata section for the DLL-imported symbol table. +// The format of this section is inherently Windows-specific. +// IdataContents class abstracted away the details for us, +// so we just let it create chunks and add them to the section. +void Writer::createImportTables() { + if (Symtab->ImportFiles.empty()) + return; + + // Initialize DLLOrder so that import entries are ordered in + // the same order as in the command line. (That affects DLL + // initialization order, and this ordering is MSVC-compatible.) + for (ImportFile *File : Symtab->ImportFiles) { + std::string DLL = StringRef(File->DLLName).lower(); + if (Config->DLLOrder.count(DLL) == 0) + Config->DLLOrder[DLL] = Config->DLLOrder.size(); + } + + OutputSection *Text = createSection(".text"); + for (ImportFile *File : Symtab->ImportFiles) { + if (DefinedImportThunk *Thunk = File->ThunkSym) + Text->addChunk(Thunk->getChunk()); + if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { + DelayIdata.add(File->ImpSym); + } else { + Idata.add(File->ImpSym); + } + } + if (!Idata.empty()) { + OutputSection *Sec = createSection(".idata"); + for (Chunk *C : Idata.getChunks()) + Sec->addChunk(C); + } + if (!DelayIdata.empty()) { + Defined *Helper = cast(Config->DelayLoadHelper->repl()); + DelayIdata.create(Helper); + OutputSection *Sec = createSection(".didat"); + for (Chunk *C : DelayIdata.getChunks()) + Sec->addChunk(C); + Sec = createSection(".data"); + for (Chunk *C : DelayIdata.getDataChunks()) + Sec->addChunk(C); + Sec = createSection(".text"); + for (std::unique_ptr &C : DelayIdata.getCodeChunks()) + Sec->addChunk(C.get()); + } +} + +void Writer::createExportTable() { + if (Config->Exports.empty()) + return; + OutputSection *Sec = createSection(".edata"); + for (std::unique_ptr &C : Edata.Chunks) + Sec->addChunk(C.get()); +} + +// The Windows loader doesn't seem to like empty sections, +// so we remove them if any. +void Writer::removeEmptySections() { + auto IsEmpty = [](OutputSection *S) { return S->getVirtualSize() == 0; }; + OutputSections.erase( + std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty), + OutputSections.end()); + uint32_t Idx = 1; + for (OutputSection *Sec : OutputSections) + Sec->SectionIndex = Idx++; +} + +size_t Writer::addEntryToStringTable(StringRef Str) { + assert(Str.size() > COFF::NameSize); + size_t OffsetOfEntry = Strtab.size() + 4; // +4 for the size field + Strtab.insert(Strtab.end(), Str.begin(), Str.end()); + Strtab.push_back('\0'); + return OffsetOfEntry; +} + +Optional Writer::createSymbol(Defined *Def) { + if (auto *D = dyn_cast(Def)) + if (!D->getChunk()->isLive()) + return None; + + coff_symbol16 Sym; + StringRef Name = Def->getName(); + if (Name.size() > COFF::NameSize) { + Sym.Name.Offset.Zeroes = 0; + Sym.Name.Offset.Offset = addEntryToStringTable(Name); + } else { + memset(Sym.Name.ShortName, 0, COFF::NameSize); + memcpy(Sym.Name.ShortName, Name.data(), Name.size()); + } + + if (auto *D = dyn_cast(Def)) { + COFFSymbolRef Ref = D->getCOFFSymbol(); + Sym.Type = Ref.getType(); + Sym.StorageClass = Ref.getStorageClass(); + } else { + Sym.Type = IMAGE_SYM_TYPE_NULL; + Sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; + } + Sym.NumberOfAuxSymbols = 0; + + switch (Def->kind()) { + case SymbolBody::DefinedAbsoluteKind: + case SymbolBody::DefinedRelativeKind: + Sym.Value = Def->getRVA(); + Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; + break; + default: { + uint64_t RVA = Def->getRVA(); + OutputSection *Sec = nullptr; + for (OutputSection *S : OutputSections) { + if (S->getRVA() > RVA) + break; + Sec = S; + } + Sym.Value = RVA - Sec->getRVA(); + Sym.SectionNumber = Sec->SectionIndex; + break; + } + } + return Sym; +} + +void Writer::createSymbolAndStringTable() { + if (!Config->Debug || !Config->WriteSymtab) + return; + + // Name field in the section table is 8 byte long. Longer names need + // to be written to the string table. First, construct string table. + for (OutputSection *Sec : OutputSections) { + StringRef Name = Sec->getName(); + if (Name.size() <= COFF::NameSize) + continue; + Sec->setStringTableOff(addEntryToStringTable(Name)); + } + + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) + for (SymbolBody *B : File->getSymbols()) + if (auto *D = dyn_cast(B)) + if (Optional Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); + + for (ImportFile *File : Symtab->ImportFiles) + for (SymbolBody *B : File->getSymbols()) + if (Optional Sym = createSymbol(cast(B))) + OutputSymtab.push_back(*Sym); + + OutputSection *LastSection = OutputSections.back(); + // We position the symbol table to be adjacent to the end of the last section. + uint64_t FileOff = LastSection->getFileOff() + + alignTo(LastSection->getRawSize(), SectorSize); + if (!OutputSymtab.empty()) { + PointerToSymbolTable = FileOff; + FileOff += OutputSymtab.size() * sizeof(coff_symbol16); + } + if (!Strtab.empty()) + FileOff += Strtab.size() + 4; + FileSize = alignTo(FileOff, SectorSize); +} + +// Visits all sections to assign incremental, non-overlapping RVAs and +// file offsets. +void Writer::assignAddresses() { + SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + + sizeof(data_directory) * NumberfOfDataDirectory + + sizeof(coff_section) * OutputSections.size(); + SizeOfHeaders += + Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); + SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize); + uint64_t RVA = 0x1000; // The first page is kept unmapped. + FileSize = SizeOfHeaders; + // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because + // the loader cannot handle holes. + std::stable_partition( + OutputSections.begin(), OutputSections.end(), [](OutputSection *S) { + return (S->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0; + }); + for (OutputSection *Sec : OutputSections) { + if (Sec->getName() == ".reloc") + addBaserels(Sec); + Sec->setRVA(RVA); + Sec->setFileOffset(FileSize); + RVA += alignTo(Sec->getVirtualSize(), PageSize); + FileSize += alignTo(Sec->getRawSize(), SectorSize); + } + SizeOfImage = SizeOfHeaders + alignTo(RVA - 0x1000, PageSize); +} + +template void Writer::writeHeader() { + // Write DOS stub + uint8_t *Buf = Buffer->getBufferStart(); + auto *DOS = reinterpret_cast(Buf); + Buf += DOSStubSize; + DOS->Magic[0] = 'M'; + DOS->Magic[1] = 'Z'; + DOS->AddressOfRelocationTable = sizeof(dos_header); + DOS->AddressOfNewExeHeader = DOSStubSize; + + // Write PE magic + memcpy(Buf, PEMagic, sizeof(PEMagic)); + Buf += sizeof(PEMagic); + + // Write COFF header + auto *COFF = reinterpret_cast(Buf); + Buf += sizeof(*COFF); + COFF->Machine = Config->Machine; + COFF->NumberOfSections = OutputSections.size(); + COFF->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; + if (Config->LargeAddressAware) + COFF->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; + if (!Config->is64()) + COFF->Characteristics |= IMAGE_FILE_32BIT_MACHINE; + if (Config->DLL) + COFF->Characteristics |= IMAGE_FILE_DLL; + if (!Config->Relocatable) + COFF->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; + COFF->SizeOfOptionalHeader = + sizeof(PEHeaderTy) + sizeof(data_directory) * NumberfOfDataDirectory; + + // Write PE header + auto *PE = reinterpret_cast(Buf); + Buf += sizeof(*PE); + PE->Magic = Config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; + PE->ImageBase = Config->ImageBase; + PE->SectionAlignment = PageSize; + PE->FileAlignment = SectorSize; + PE->MajorImageVersion = Config->MajorImageVersion; + PE->MinorImageVersion = Config->MinorImageVersion; + PE->MajorOperatingSystemVersion = Config->MajorOSVersion; + PE->MinorOperatingSystemVersion = Config->MinorOSVersion; + PE->MajorSubsystemVersion = Config->MajorOSVersion; + PE->MinorSubsystemVersion = Config->MinorOSVersion; + PE->Subsystem = Config->Subsystem; + PE->SizeOfImage = SizeOfImage; + PE->SizeOfHeaders = SizeOfHeaders; + if (!Config->NoEntry) { + Defined *Entry = cast(Config->Entry->repl()); + PE->AddressOfEntryPoint = Entry->getRVA(); + // Pointer to thumb code must have the LSB set, so adjust it. + if (Config->Machine == ARMNT) + PE->AddressOfEntryPoint |= 1; + } + PE->SizeOfStackReserve = Config->StackReserve; + PE->SizeOfStackCommit = Config->StackCommit; + PE->SizeOfHeapReserve = Config->HeapReserve; + PE->SizeOfHeapCommit = Config->HeapCommit; + if (Config->DynamicBase) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; + if (Config->HighEntropyVA) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; + if (!Config->AllowBind) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND; + if (Config->NxCompat) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; + if (!Config->AllowIsolation) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; + if (Config->TerminalServerAware) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; + PE->NumberOfRvaAndSize = NumberfOfDataDirectory; + if (OutputSection *Text = findSection(".text")) { + PE->BaseOfCode = Text->getRVA(); + PE->SizeOfCode = Text->getRawSize(); + } + PE->SizeOfInitializedData = getSizeOfInitializedData(); + + // Write data directory + auto *Dir = reinterpret_cast(Buf); + Buf += sizeof(*Dir) * NumberfOfDataDirectory; + if (OutputSection *Sec = findSection(".edata")) { + Dir[EXPORT_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[EXPORT_TABLE].Size = Sec->getVirtualSize(); + } + if (!Idata.empty()) { + Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA(); + Dir[IMPORT_TABLE].Size = Idata.getDirSize(); + Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); + Dir[IAT].Size = Idata.getIATSize(); + } + if (!DelayIdata.empty()) { + Dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = + DelayIdata.getDirRVA(); + Dir[DELAY_IMPORT_DESCRIPTOR].Size = DelayIdata.getDirSize(); + } + if (OutputSection *Sec = findSection(".rsrc")) { + Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize(); + } + if (OutputSection *Sec = findSection(".reloc")) { + Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); + } + if (OutputSection *Sec = findSection(".pdata")) { + Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA(); + Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); + } + if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { + if (Defined *B = dyn_cast(Sym->Body)) { + Dir[TLS_TABLE].RelativeVirtualAddress = B->getRVA(); + Dir[TLS_TABLE].Size = Config->is64() + ? sizeof(object::coff_tls_directory64) + : sizeof(object::coff_tls_directory32); + } + } + if (Symbol *Sym = Symtab->findUnderscore("_load_config_used")) { + if (auto *B = dyn_cast(Sym->Body)) { + SectionChunk *SC = B->getChunk(); + assert(B->getRVA() >= SC->getRVA()); + uint64_t OffsetInChunk = B->getRVA() - SC->getRVA(); + if (!SC->hasData() || OffsetInChunk + 4 > SC->getSize()) + fatal("_load_config_used is malformed"); + + ArrayRef SecContents = SC->getContents(); + uint32_t LoadConfigSize = + *reinterpret_cast(&SecContents[OffsetInChunk]); + if (OffsetInChunk + LoadConfigSize > SC->getSize()) + fatal("_load_config_used is too large"); + Dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = B->getRVA(); + Dir[LOAD_CONFIG_TABLE].Size = LoadConfigSize; + } + } + + // Write section table + for (OutputSection *Sec : OutputSections) { + Sec->writeHeaderTo(Buf); + Buf += sizeof(coff_section); + } + + if (OutputSymtab.empty()) + return; + + COFF->PointerToSymbolTable = PointerToSymbolTable; + uint32_t NumberOfSymbols = OutputSymtab.size(); + COFF->NumberOfSymbols = NumberOfSymbols; + auto *SymbolTable = reinterpret_cast( + Buffer->getBufferStart() + COFF->PointerToSymbolTable); + for (size_t I = 0; I != NumberOfSymbols; ++I) + SymbolTable[I] = OutputSymtab[I]; + // Create the string table, it follows immediately after the symbol table. + // The first 4 bytes is length including itself. + Buf = reinterpret_cast(&SymbolTable[NumberOfSymbols]); + write32le(Buf, Strtab.size() + 4); + if (!Strtab.empty()) + memcpy(Buf + 4, Strtab.data(), Strtab.size()); +} + +void Writer::openFile(StringRef Path) { + Buffer = check( + FileOutputBuffer::create(Path, FileSize, FileOutputBuffer::F_executable), + "failed to open " + Path); +} + +void Writer::fixSafeSEHSymbols() { + if (!SEHTable) + return; + Config->SEHTable->setRVA(SEHTable->getRVA()); + Config->SEHCount->setVA(SEHTable->getSize() / 4); +} + +// Handles /section options to allow users to overwrite +// section attributes. +void Writer::setSectionPermissions() { + for (auto &P : Config->Section) { + StringRef Name = P.first; + uint32_t Perm = P.second; + if (auto *Sec = findSection(Name)) + Sec->setPermissions(Perm); + } +} + +// Write section contents to a mmap'ed file. +void Writer::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + for (OutputSection *Sec : OutputSections) { + uint8_t *SecBuf = Buf + Sec->getFileOff(); + // Fill gaps between functions in .text with INT3 instructions + // instead of leaving as NUL bytes (which can be interpreted as + // ADD instructions). + if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) + memset(SecBuf, 0xCC, Sec->getRawSize()); + parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(), + [&](Chunk *C) { C->writeTo(SecBuf); }); + } +} + +// Sort .pdata section contents according to PE/COFF spec 5.5. +void Writer::sortExceptionTable() { + OutputSection *Sec = findSection(".pdata"); + if (!Sec) + return; + // We assume .pdata contains function table entries only. + uint8_t *Begin = Buffer->getBufferStart() + Sec->getFileOff(); + uint8_t *End = Begin + Sec->getVirtualSize(); + if (Config->Machine == AMD64) { + struct Entry { ulittle32_t Begin, End, Unwind; }; + parallel_sort( + (Entry *)Begin, (Entry *)End, + [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); + return; + } + if (Config->Machine == ARMNT) { + struct Entry { ulittle32_t Begin, Unwind; }; + parallel_sort( + (Entry *)Begin, (Entry *)End, + [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); + return; + } + errs() << "warning: don't know how to handle .pdata.\n"; +} + +OutputSection *Writer::findSection(StringRef Name) { + for (OutputSection *Sec : OutputSections) + if (Sec->getName() == Name) + return Sec; + return nullptr; +} + +uint32_t Writer::getSizeOfInitializedData() { + uint32_t Res = 0; + for (OutputSection *S : OutputSections) + if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA) + Res += S->getRawSize(); + return Res; +} + +// Returns an existing section or create a new one if not found. +OutputSection *Writer::createSection(StringRef Name) { + if (auto *Sec = findSection(Name)) + return Sec; + const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA; + const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA; + const auto CODE = IMAGE_SCN_CNT_CODE; + const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE; + const auto R = IMAGE_SCN_MEM_READ; + const auto W = IMAGE_SCN_MEM_WRITE; + const auto X = IMAGE_SCN_MEM_EXECUTE; + uint32_t Perms = StringSwitch(Name) + .Case(".bss", BSS | R | W) + .Case(".data", DATA | R | W) + .Case(".didat", DATA | R) + .Case(".edata", DATA | R) + .Case(".idata", DATA | R) + .Case(".rdata", DATA | R) + .Case(".reloc", DATA | DISCARDABLE | R) + .Case(".text", CODE | R | X) + .Default(0); + if (!Perms) + llvm_unreachable("unknown section name"); + auto Sec = new (CAlloc.Allocate()) OutputSection(Name); + Sec->addPermissions(Perms); + OutputSections.push_back(Sec); + return Sec; +} + +// Dest is .reloc section. Add contents to that section. +void Writer::addBaserels(OutputSection *Dest) { + std::vector V; + for (OutputSection *Sec : OutputSections) { + if (Sec == Dest) + continue; + // Collect all locations for base relocations. + for (Chunk *C : Sec->getChunks()) + C->getBaserels(&V); + // Add the addresses to .reloc section. + if (!V.empty()) + addBaserelBlocks(Dest, V); + V.clear(); + } +} + +// Add addresses to .reloc section. Note that addresses are grouped by page. +void Writer::addBaserelBlocks(OutputSection *Dest, std::vector &V) { + const uint32_t Mask = ~uint32_t(PageSize - 1); + uint32_t Page = V[0].RVA & Mask; + size_t I = 0, J = 1; + for (size_t E = V.size(); J < E; ++J) { + uint32_t P = V[J].RVA & Mask; + if (P == Page) + continue; + BaserelChunk *Buf = BAlloc.Allocate(); + Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); + I = J; + Page = P; + } + if (I == J) + return; + BaserelChunk *Buf = BAlloc.Allocate(); + Dest->addChunk(new (Buf) BaserelChunk(Page, &V[I], &V[0] + J)); +} diff --git a/COFF/Writer.h b/COFF/Writer.h new file mode 100644 index 00000000..0473315a --- /dev/null +++ b/COFF/Writer.h @@ -0,0 +1,26 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_WRITER_H +#define LLD_COFF_WRITER_H + +#include + +namespace lld { +namespace coff { + +class Chunk; +class OutputSection; + +void writeResult(SymbolTable *T); + +} +} + +#endif diff --git a/ELF/CMakeLists.txt b/ELF/CMakeLists.txt new file mode 100644 index 00000000..a1b65adc --- /dev/null +++ b/ELF/CMakeLists.txt @@ -0,0 +1,50 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(ELFOptionsTableGen) + +add_lld_library(lldELF + Driver.cpp + DriverUtils.cpp + EhFrame.cpp + Error.cpp + ICF.cpp + InputFiles.cpp + InputSection.cpp + LTO.cpp + LinkerScript.cpp + MarkLive.cpp + OutputSections.cpp + Relocations.cpp + ScriptParser.cpp + Strings.cpp + SymbolListFile.cpp + SymbolTable.cpp + Symbols.cpp + Target.cpp + Thunks.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Analysis + BitReader + BitWriter + Codegen + Core + IPO + Linker + LTO + Object + Option + Passes + MC + Support + Target + TransformUtils + + LINK_LIBS + lldConfig + ${PTHREAD_LIB} + ) + +add_dependencies(lldELF intrinsics_gen ELFOptionsTableGen) diff --git a/ELF/Config.h b/ELF/Config.h new file mode 100644 index 00000000..2ccd95e8 --- /dev/null +++ b/ELF/Config.h @@ -0,0 +1,134 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CONFIG_H +#define LLD_ELF_CONFIG_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ELF.h" + +#include + +namespace lld { +namespace elf { + +class InputFile; +struct Symbol; + +enum ELFKind { + ELFNoneKind, + ELF32LEKind, + ELF32BEKind, + ELF64LEKind, + ELF64BEKind +}; + +enum class BuildIdKind { None, Fnv1, Md5, Sha1, Hexstring }; + +enum class UnresolvedPolicy { NoUndef, Error, Warn, Ignore }; + +struct SymbolVersion { + llvm::StringRef Name; + bool IsExternCpp; +}; + +// This struct contains symbols version definition that +// can be found in version script if it is used for link. +struct VersionDefinition { + VersionDefinition(llvm::StringRef Name, size_t Id) : Name(Name), Id(Id) {} + llvm::StringRef Name; + size_t Id; + std::vector Globals; + size_t NameOff; // Offset in string table. +}; + +// This struct contains the global configuration for the linker. +// Most fields are direct mapping from the command line options +// and such fields have the same name as the corresponding options. +// Most fields are initialized by the driver. +struct Configuration { + Symbol *EntrySym = nullptr; + InputFile *FirstElf = nullptr; + llvm::StringRef DynamicLinker; + llvm::StringRef Entry; + llvm::StringRef Emulation; + llvm::StringRef Fini; + llvm::StringRef Init; + llvm::StringRef LtoAAPipeline; + llvm::StringRef LtoNewPmPasses; + llvm::StringRef OutputFile; + llvm::StringRef SoName; + llvm::StringRef Sysroot; + std::string RPath; + std::vector VersionDefinitions; + std::vector DynamicList; + std::vector SearchPaths; + std::vector Undefined; + std::vector VersionScriptGlobals; + std::vector BuildIdVector; + bool AllowMultipleDefinition; + bool AsNeeded = false; + bool Bsymbolic; + bool BsymbolicFunctions; + bool Demangle = true; + bool DisableVerify; + bool DiscardAll; + bool DiscardLocals; + bool DiscardNone; + bool EhFrameHdr; + bool EnableNewDtags; + bool ExportDynamic; + bool FatalWarnings; + bool GcSections; + bool GnuHash = false; + bool ICF; + bool Mips64EL = false; + bool NoGnuUnique; + bool NoUndefinedVersion; + bool Pic; + bool Pie; + bool PrintGcSections; + bool Rela; + bool Relocatable; + bool SaveTemps; + bool Shared; + bool Static = false; + bool StripAll; + bool StripDebug; + bool SysvHash = true; + bool Threads; + bool Trace; + bool Verbose; + bool WarnCommon; + bool ZCombreloc; + bool ZExecStack; + bool ZNodelete; + bool ZNow; + bool ZOrigin; + bool ZRelro; + UnresolvedPolicy UnresolvedSymbols; + BuildIdKind BuildId = BuildIdKind::None; + ELFKind EKind = ELFNoneKind; + uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; + uint16_t EMachine = llvm::ELF::EM_NONE; + uint64_t EntryAddr = -1; + uint64_t ImageBase; + unsigned LtoJobs; + unsigned LtoO; + unsigned Optimize; +}; + +// The only instance of Configuration struct. +extern Configuration *Config; + +} // namespace elf +} // namespace lld + +#endif diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp new file mode 100644 index 00000000..c6ca2639 --- /dev/null +++ b/ELF/Driver.cpp @@ -0,0 +1,588 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "Error.h" +#include "ICF.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Strings.h" +#include "SymbolListFile.h" +#include "SymbolTable.h" +#include "Target.h" +#include "Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::sys; + +using namespace lld; +using namespace lld::elf; + +Configuration *elf::Config; +LinkerDriver *elf::Driver; + +bool elf::link(ArrayRef Args, raw_ostream &Error) { + HasError = false; + ErrorOS = &Error; + + Configuration C; + LinkerDriver D; + ScriptConfiguration SC; + Config = &C; + Driver = &D; + ScriptConfig = &SC; + + Driver->main(Args); + return !HasError; +} + +// Parses a linker -m option. +static std::pair parseEmulation(StringRef S) { + if (S.endswith("_fbsd")) + S = S.drop_back(5); + + std::pair Ret = + StringSwitch>(S) + .Case("aarch64linux", {ELF64LEKind, EM_AARCH64}) + .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) + .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) + .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) + .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) + .Case("elf32ppc", {ELF32BEKind, EM_PPC}) + .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) + .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) + .Case("elf64ppc", {ELF64BEKind, EM_PPC64}) + .Case("elf_i386", {ELF32LEKind, EM_386}) + .Case("elf_x86_64", {ELF64LEKind, EM_X86_64}) + .Default({ELFNoneKind, EM_NONE}); + + if (Ret.first == ELFNoneKind) { + if (S == "i386pe" || S == "i386pep" || S == "thumb2pe") + error("Windows targets are not supported on the ELF frontend: " + S); + else + error("unknown emulation: " + S); + } + return Ret; +} + +// Returns slices of MB by parsing MB as an archive file. +// Each slice consists of a member file in the archive. +std::vector +LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { + std::unique_ptr File = + check(Archive::create(MB), "failed to parse archive"); + + std::vector V; + Error Err; + for (const ErrorOr &COrErr : File->children(Err)) { + Archive::Child C = check(COrErr, "could not get the child of the archive " + + File->getFileName()); + MemoryBufferRef MBRef = + check(C.getMemoryBufferRef(), + "could not get the buffer for a child of the archive " + + File->getFileName()); + V.push_back(MBRef); + } + if (Err) + Error(Err); + + // Take ownership of memory buffers created for members of thin archives. + for (std::unique_ptr &MB : File->takeThinBuffers()) + OwningMBs.push_back(std::move(MB)); + + return V; +} + +// Opens and parses a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +void LinkerDriver::addFile(StringRef Path) { + using namespace sys::fs; + if (Config->Verbose) + outs() << Path << "\n"; + + Optional Buffer = readFile(Path); + if (!Buffer.hasValue()) + return; + MemoryBufferRef MBRef = *Buffer; + + switch (identify_magic(MBRef.getBuffer())) { + case file_magic::unknown: + readLinkerScript(MBRef); + return; + case file_magic::archive: + if (WholeArchive) { + for (MemoryBufferRef MB : getArchiveMembers(MBRef)) + Files.push_back(createObjectFile(MB, Path)); + return; + } + Files.push_back(make_unique(MBRef)); + return; + case file_magic::elf_shared_object: + if (Config->Relocatable) { + error("attempted static link of dynamic object " + Path); + return; + } + Files.push_back(createSharedFile(MBRef)); + return; + default: + if (InLib) + Files.push_back(make_unique(MBRef)); + else + Files.push_back(createObjectFile(MBRef)); + } +} + +Optional LinkerDriver::readFile(StringRef Path) { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) { + error(EC, "cannot open " + Path); + return None; + } + std::unique_ptr &MB = *MBOrErr; + MemoryBufferRef MBRef = MB->getMemBufferRef(); + OwningMBs.push_back(std::move(MB)); // take MB ownership + + if (Cpio) + Cpio->append(relativeToRoot(Path), MBRef.getBuffer()); + + return MBRef; +} + +// Add a given library by searching it from input search paths. +void LinkerDriver::addLibrary(StringRef Name) { + std::string Path = searchLibrary(Name); + if (Path.empty()) + error("unable to find library -l" + Name); + else + addFile(Path); +} + +// This function is called on startup. We need this for LTO since +// LTO calls LLVM functions to compile bitcode files to native code. +// Technically this can be delayed until we read bitcode files, but +// we don't bother to do lazily because the initialization is fast. +static void initLLVM(opt::InputArgList &Args) { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + // This is a flag to discard all but GlobalValue names. + // We want to enable it by default because it saves memory. + // Disable it only when a developer option (-save-temps) is given. + Driver->Context.setDiscardValueNames(!Config->SaveTemps); + Driver->Context.enableDebugTypeODRUniquing(); + + // Parse and evaluate -mllvm options. + std::vector V; + V.push_back("lld (LLVM option parsing)"); + for (auto *Arg : Args.filtered(OPT_mllvm)) + V.push_back(Arg->getValue()); + cl::ParseCommandLineOptions(V.size(), V.data()); +} + +// Some command line options or some combinations of them are not allowed. +// This function checks for such errors. +static void checkOptions(opt::InputArgList &Args) { + // The MIPS ABI as of 2016 does not support the GNU-style symbol lookup + // table which is a relatively new feature. + if (Config->EMachine == EM_MIPS && Config->GnuHash) + error("the .gnu.hash section is not compatible with the MIPS target."); + + if (Config->EMachine == EM_AMDGPU && !Config->Entry.empty()) + error("-e option is not valid for AMDGPU."); + + if (Config->Pie && Config->Shared) + error("-shared and -pie may not be used together"); + + if (Config->Relocatable) { + if (Config->Shared) + error("-r and -shared may not be used together"); + if (Config->GcSections) + error("-r and --gc-sections may not be used together"); + if (Config->ICF) + error("-r and --icf may not be used together"); + if (Config->Pie) + error("-r and -pie may not be used together"); + } +} + +static StringRef +getString(opt::InputArgList &Args, unsigned Key, StringRef Default = "") { + if (auto *Arg = Args.getLastArg(Key)) + return Arg->getValue(); + return Default; +} + +static int getInteger(opt::InputArgList &Args, unsigned Key, int Default) { + int V = Default; + if (auto *Arg = Args.getLastArg(Key)) { + StringRef S = Arg->getValue(); + if (S.getAsInteger(10, V)) + error(Arg->getSpelling() + ": number expected, but got " + S); + } + return V; +} + +static const char *getReproduceOption(opt::InputArgList &Args) { + if (auto *Arg = Args.getLastArg(OPT_reproduce)) + return Arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + +static bool hasZOption(opt::InputArgList &Args, StringRef Key) { + for (auto *Arg : Args.filtered(OPT_z)) + if (Key == Arg->getValue()) + return true; + return false; +} + +void LinkerDriver::main(ArrayRef ArgsArr) { + ELFOptTable Parser; + opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return; + } + if (Args.hasArg(OPT_version)) { + outs() << getVersionString(); + return; + } + + if (const char *Path = getReproduceOption(Args)) { + // Note that --reproduce is a debug option so you can ignore it + // if you are trying to understand the whole picture of the code. + Cpio.reset(CpioFile::create(Path)); + if (Cpio) { + Cpio->append("response.txt", createResponseFile(Args)); + Cpio->append("version.txt", getVersionString()); + } + } + + readConfigs(Args); + initLLVM(Args); + createFiles(Args); + checkOptions(Args); + if (HasError) + return; + + switch (Config->EKind) { + case ELF32LEKind: + link(Args); + return; + case ELF32BEKind: + link(Args); + return; + case ELF64LEKind: + link(Args); + return; + case ELF64BEKind: + link(Args); + return; + default: + error("-m or at least a .o file required"); + } +} + +static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { + if (Args.hasArg(OPT_noinhibit_exec)) + return UnresolvedPolicy::Warn; + if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs")) + return UnresolvedPolicy::NoUndef; + if (Config->Relocatable) + return UnresolvedPolicy::Ignore; + + if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) { + StringRef S = Arg->getValue(); + if (S == "ignore-all" || S == "ignore-in-object-files") + return UnresolvedPolicy::Ignore; + if (S == "ignore-in-shared-libs" || S == "report-all") + return UnresolvedPolicy::Error; + error("unknown --unresolved-symbols value: " + S); + } + return UnresolvedPolicy::Error; +} + +// Initializes Config members by the command line options. +void LinkerDriver::readConfigs(opt::InputArgList &Args) { + for (auto *Arg : Args.filtered(OPT_L)) + Config->SearchPaths.push_back(Arg->getValue()); + + std::vector RPaths; + for (auto *Arg : Args.filtered(OPT_rpath)) + RPaths.push_back(Arg->getValue()); + if (!RPaths.empty()) + Config->RPath = llvm::join(RPaths.begin(), RPaths.end(), ":"); + + if (auto *Arg = Args.getLastArg(OPT_m)) { + // Parse ELF{32,64}{LE,BE} and CPU type. + StringRef S = Arg->getValue(); + std::tie(Config->EKind, Config->EMachine) = parseEmulation(S); + Config->Emulation = S; + } + + Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); + Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); + Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); + Config->Demangle = !Args.hasArg(OPT_no_demangle); + Config->DisableVerify = Args.hasArg(OPT_disable_verify); + Config->DiscardAll = Args.hasArg(OPT_discard_all); + Config->DiscardLocals = Args.hasArg(OPT_discard_locals); + Config->DiscardNone = Args.hasArg(OPT_discard_none); + Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); + Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); + Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); + Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); + Config->GcSections = Args.hasArg(OPT_gc_sections); + Config->ICF = Args.hasArg(OPT_icf); + Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); + Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); + Config->Pie = Args.hasArg(OPT_pie); + Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); + Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->SaveTemps = Args.hasArg(OPT_save_temps); + Config->Shared = Args.hasArg(OPT_shared); + Config->StripAll = Args.hasArg(OPT_strip_all); + Config->StripDebug = Args.hasArg(OPT_strip_debug); + Config->Threads = Args.hasArg(OPT_threads); + Config->Trace = Args.hasArg(OPT_trace); + Config->Verbose = Args.hasArg(OPT_verbose); + Config->WarnCommon = Args.hasArg(OPT_warn_common); + + Config->DynamicLinker = getString(Args, OPT_dynamic_linker); + Config->Entry = getString(Args, OPT_entry); + Config->Fini = getString(Args, OPT_fini, "_fini"); + Config->Init = getString(Args, OPT_init, "_init"); + Config->LtoAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LtoNewPmPasses = getString(Args, OPT_lto_newpm_passes); + Config->OutputFile = getString(Args, OPT_o); + Config->SoName = getString(Args, OPT_soname); + Config->Sysroot = getString(Args, OPT_sysroot); + + Config->Optimize = getInteger(Args, OPT_O, 1); + Config->LtoO = getInteger(Args, OPT_lto_O, 2); + if (Config->LtoO > 3) + error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); + Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1); + if (Config->LtoJobs == 0) + error("number of threads must be > 0"); + + Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); + Config->ZExecStack = hasZOption(Args, "execstack"); + Config->ZNodelete = hasZOption(Args, "nodelete"); + Config->ZNow = hasZOption(Args, "now"); + Config->ZOrigin = hasZOption(Args, "origin"); + Config->ZRelro = !hasZOption(Args, "norelro"); + + if (Config->Relocatable) + Config->StripAll = false; + + // --strip-all implies --strip-debug. + if (Config->StripAll) + Config->StripDebug = true; + + // Config->Pic is true if we are generating position-independent code. + Config->Pic = Config->Pie || Config->Shared; + + if (auto *Arg = Args.getLastArg(OPT_hash_style)) { + StringRef S = Arg->getValue(); + if (S == "gnu") { + Config->GnuHash = true; + Config->SysvHash = false; + } else if (S == "both") { + Config->GnuHash = true; + } else if (S != "sysv") + error("unknown hash style: " + S); + } + + // Parse --build-id or --build-id= +{% endblock %} + +{% block rootrellink %} +
  • lld Home | 
  • +{% endblock %} diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..88fcdd8a --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +# +# lld documentation build configuration file. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os +from datetime import date + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.todo'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'lld' +copyright = u'2011-%d, LLVM Project' % date.today().year + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '3.2' +# The full version, including alpha/beta/rc tags. +release = '3.2' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%Y-%m-%d' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +show_authors = True + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'friendly' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'llvm-theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ["."] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# If given, this must be the name of an image file (path relative to the +# configuration directory) that is the favicon of the docs. Modern browsers use +# this as icon for tabs, windows and bookmarks. It should be a Windows-style +# icon file (.ico), which is 16x16 or 32x32 pixels large. Default: None. The +# image file will be copied to the _static directory of the output HTML, but +# only if the file does not already exist there. +html_favicon = '_static/favicon.ico' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +html_last_updated_fmt = '%Y-%m-%d' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = {'index': 'indexsidebar.html'} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {'index': 'index.html'} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'llddoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('contents', 'lld.tex', u'lld Documentation', + u'LLVM project', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('contents', 'lld', u'lld Documentation', + [u'LLVM project'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('contents', 'lld', u'lld Documentation', + u'LLVM project', 'lld', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + + +# FIXME: Define intersphinx configration. +intersphinx_mapping = {} + + +# -- Options for extensions ---------------------------------------------------- + +# Enable this if you want TODOs to show up in the generated documentation. +todo_include_todos = True diff --git a/docs/design.rst b/docs/design.rst new file mode 100644 index 00000000..14d18093 --- /dev/null +++ b/docs/design.rst @@ -0,0 +1,418 @@ +.. _design: + +Linker Design +============= + +Introduction +------------ + +lld is a new generation of linker. It is not "section" based like traditional +linkers which mostly just interlace sections from multiple object files into the +output file. Instead, lld is based on "Atoms". Traditional section based +linking work well for simple linking, but their model makes advanced linking +features difficult to implement. Features like dead code stripping, reordering +functions for locality, and C++ coalescing require the linker to work at a finer +grain. + +An atom is an indivisible chunk of code or data. An atom has a set of +attributes, such as: name, scope, content-type, alignment, etc. An atom also +has a list of References. A Reference contains: a kind, an optional offset, an +optional addend, and an optional target atom. + +The Atom model allows the linker to use standard graph theory models for linking +data structures. Each atom is a node, and each Reference is an edge. The +feature of dead code stripping is implemented by following edges to mark all +live atoms, and then delete the non-live atoms. + + +Atom Model +---------- + +An atom is an indivisible chunk of code or data. Typically each user written +function or global variable is an atom. In addition, the compiler may emit +other atoms, such as for literal c-strings or floating point constants, or for +runtime data structures like dwarf unwind info or pointers to initializers. + +A simple "hello world" object file would be modeled like this: + +.. image:: hello.png + +There are three atoms: main, a proxy for printf, and an anonymous atom +containing the c-string literal "hello world". The Atom "main" has two +references. One is the call site for the call to printf, and the other is a +reference for the instruction that loads the address of the c-string literal. + +There are only four different types of atoms: + + * DefinedAtom + 95% of all atoms. This is a chunk of code or data + + * UndefinedAtom + This is a place holder in object files for a reference to some atom + outside the translation unit.During core linking it is usually replaced + by (coalesced into) another Atom. + + * SharedLibraryAtom + If a required symbol name turns out to be defined in a dynamic shared + library (and not some object file). A SharedLibraryAtom is the + placeholder Atom used to represent that fact. + + It is similar to an UndefinedAtom, but it also tracks information + about the associated shared library. + + * AbsoluteAtom + This is for embedded support where some stuff is implemented in ROM at + some fixed address. This atom has no content. It is just an address + that the Writer needs to fix up any references to point to. + + +File Model +---------- + +The linker views the input files as basically containers of Atoms and +References, and just a few attributes of their own. The linker works with three +kinds of files: object files, static libraries, and dynamic shared libraries. +Each kind of file has reader object which presents the file in the model +expected by the linker. + +Object File +~~~~~~~~~~~ + +An object file is just a container of atoms. When linking an object file, a +reader is instantiated which parses the object file and instantiates a set of +atoms representing all content in the .o file. The linker adds all those atoms +to a master graph. + +Static Library (Archive) +~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the traditional unix static archive which is just a collection of object +files with a "table of contents". When linking with a static library, by default +nothing is added to the master graph of atoms. Instead, if after merging all +atoms from object files into a master graph, if any "undefined" atoms are left +remaining in the master graph, the linker reads the table of contents for each +static library to see if any have the needed definitions. If so, the set of +atoms from the specified object file in the static library is added to the +master graph of atoms. + +Dynamic Library (Shared Object) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dynamic libraries are different than object files and static libraries in that +they don't directly add any content. Their purpose is to check at build time +that the remaining undefined references can be resolved at runtime, and provide +a list of dynamic libraries (SO_NEEDED) that will be needed at runtime. The way +this is modeled in the linker is that a dynamic library contributes no atoms to +the initial graph of atoms. Instead, (like static libraries) if there are +"undefined" atoms in the master graph of all atoms, then each dynamic library is +checked to see if exports the required symbol. If so, a "shared library" atom is +instantiated by the by the reader which the linker uses to replace the +"undefined" atom. + +Linking Steps +------------- + +Through the use of abstract Atoms, the core of linking is architecture +independent and file format independent. All command line parsing is factored +out into a separate "options" abstraction which enables the linker to be driven +with different command line sets. + +The overall steps in linking are: + + #. Command line processing + + #. Parsing input files + + #. Resolving + + #. Passes/Optimizations + + #. Generate output file + +The Resolving and Passes steps are done purely on the master graph of atoms, so +they have no notion of file formats such as mach-o or ELF. + + +Input Files +~~~~~~~~~~~ + +Existing developer tools using different file formats for object files. +A goal of lld is to be file format independent. This is done +through a plug-in model for reading object files. The lld::Reader is the base +class for all object file readers. A Reader follows the factory method pattern. +A Reader instantiates an lld::File object (which is a graph of Atoms) from a +given object file (on disk or in-memory). + +Every Reader subclass defines its own "options" class (for instance the mach-o +Reader defines the class ReaderOptionsMachO). This options class is the +one-and-only way to control how the Reader operates when parsing an input file +into an Atom graph. For instance, you may want the Reader to only accept +certain architectures. The options class can be instantiated from command +line options, or it can be subclassed and the ivars programmatically set. + +Resolving +~~~~~~~~~ + +The resolving step takes all the atoms' graphs from each object file and +combines them into one master object graph. Unfortunately, it is not as simple +as appending the atom list from each file into one big list. There are many +cases where atoms need to be coalesced. That is, two or more atoms need to be +coalesced into one atom. This is necessary to support: C language "tentative +definitions", C++ weak symbols for templates and inlines defined in headers, +replacing undefined atoms with actual definition atoms, and for merging copies +of constants like c-strings and floating point constants. + +The linker support coalescing by-name and by-content. By-name is used for +tentative definitions and weak symbols. By-content is used for constant data +that can be merged. + +The resolving process maintains some global linking "state", including a "symbol +table" which is a map from llvm::StringRef to lld::Atom*. With these data +structures, the linker iterates all atoms in all input files. For each atom, it +checks if the atom is named and has a global or hidden scope. If so, the atom +is added to the symbol table map. If there already is a matching atom in that +table, that means the current atom needs to be coalesced with the found atom, or +it is a multiple definition error. + +When all initial input file atoms have been processed by the resolver, a scan is +made to see if there are any undefined atoms in the graph. If there are, the +linker scans all libraries (both static and dynamic) looking for definitions to +replace the undefined atoms. It is an error if any undefined atoms are left +remaining. + +Dead code stripping (if requested) is done at the end of resolving. The linker +does a simple mark-and-sweep. It starts with "root" atoms (like "main" in a main +executable) and follows each references and marks each Atom that it visits as +"live". When done, all atoms not marked "live" are removed. + +The result of the Resolving phase is the creation of an lld::File object. The +goal is that the lld::File model is **the** internal representation +throughout the linker. The file readers parse (mach-o, ELF, COFF) into an +lld::File. The file writers (mach-o, ELF, COFF) taken an lld::File and produce +their file kind, and every Pass only operates on an lld::File. This is not only +a simpler, consistent model, but it enables the state of the linker to be dumped +at any point in the link for testing purposes. + + +Passes +~~~~~~ + +The Passes step is an open ended set of routines that each get a change to +modify or enhance the current lld::File object. Some example Passes are: + + * stub (PLT) generation + + * GOT instantiation + + * order_file optimization + + * branch island generation + + * branch shim generation + + * Objective-C optimizations (Darwin specific) + + * TLV instantiation (Darwin specific) + + * DTrace probe processing (Darwin specific) + + * compact unwind encoding (Darwin specific) + + +Some of these passes are specific to Darwin's runtime environments. But many of +the passes are applicable to any OS (such as generating branch island for out of +range branch instructions). + +The general structure of a pass is to iterate through the atoms in the current +lld::File object, inspecting each atom and doing something. For instance, the +stub pass, looks for call sites to shared library atoms (e.g. call to printf). +It then instantiates a "stub" atom (PLT entry) and a "lazy pointer" atom for +each proxy atom needed, and these new atoms are added to the current lld::File +object. Next, all the noted call sites to shared library atoms have their +References altered to point to the stub atom instead of the shared library atom. + + +Generate Output File +~~~~~~~~~~~~~~~~~~~~ + +Once the passes are done, the output file writer is given current lld::File +object. The writer's job is to create the executable content file wrapper and +place the content of the atoms into it. + +lld uses a plug-in model for writing output files. All concrete writers (e.g. +ELF, mach-o, etc) are subclasses of the lld::Writer class. + +Unlike the Reader class which has just one method to instantiate an lld::File, +the Writer class has multiple methods. The crucial method is to generate the +output file, but there are also methods which allow the Writer to contribute +Atoms to the resolver and specify passes to run. + +An example of contributing +atoms is that if the Writer knows a main executable is being linked and such +an executable requires a specially named entry point (e.g. "_main"), the Writer +can add an UndefinedAtom with that special name to the resolver. This will +cause the resolver to issue an error if that symbol is not defined. + +Sometimes a Writer supports lazily created symbols, such as names for the start +of sections. To support this, the Writer can create a File object which vends +no initial atoms, but does lazily supply atoms by name as needed. + +Every Writer subclass defines its own "options" class (for instance the mach-o +Writer defines the class WriterOptionsMachO). This options class is the +one-and-only way to control how the Writer operates when producing an output +file from an Atom graph. For instance, you may want the Writer to optimize +the output for certain OS versions, or strip local symbols, etc. The options +class can be instantiated from command line options, or it can be subclassed +and the ivars programmatically set. + + +lld::File representations +------------------------- + +Just as LLVM has three representations of its IR model, lld has two +representations of its File/Atom/Reference model: + + * In memory, abstract C++ classes (lld::Atom, lld::Reference, and lld::File). + + * textual (in YAML) + + +Textual representations in YAML +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In designing a textual format we want something easy for humans to read and easy +for the linker to parse. Since an atom has lots of attributes most of which are +usually just the default, we should define default values for every attribute so +that those can be omitted from the text representation. Here is the atoms for a +simple hello world program expressed in YAML:: + + target-triple: x86_64-apple-darwin11 + + atoms: + - name: _main + scope: global + type: code + content: [ 55, 48, 89, e5, 48, 8d, 3d, 00, 00, 00, 00, 30, c0, e8, 00, 00, + 00, 00, 31, c0, 5d, c3 ] + fixups: + - offset: 07 + kind: pcrel32 + target: 2 + - offset: 0E + kind: call32 + target: _fprintf + + - type: c-string + content: [ 73, 5A, 00 ] + + ... + +The biggest use for the textual format will be writing test cases. Writing test +cases in C is problematic because the compiler may vary its output over time for +its own optimization reasons which my inadvertently disable or break the linker +feature trying to be tested. By writing test cases in the linkers own textual +format, we can exactly specify every attribute of every atom and thus target +specific linker logic. + +The textual/YAML format follows the ReaderWriter patterns used in lld. The lld +library comes with the classes: ReaderYAML and WriterYAML. + + +Testing +------- + +The lld project contains a test suite which is being built up as new code is +added to lld. All new lld functionality should have a tests added to the test +suite. The test suite is `lit `_ driven. Each +test is a text file with comments telling lit how to run the test and check the +result To facilitate testing, the lld project builds a tool called lld-core. +This tool reads a YAML file (default from stdin), parses it into one or more +lld::File objects in memory and then feeds those lld::File objects to the +resolver phase. + + +Resolver testing +~~~~~~~~~~~~~~~~ + +Basic testing is the "core linking" or resolving phase. That is where the +linker merges object files. All test cases are written in YAML. One feature of +YAML is that it allows multiple "documents" to be encoding in one YAML stream. +That means one text file can appear to the linker as multiple .o files - the +normal case for the linker. + +Here is a simple example of a core linking test case. It checks that an +undefined atom from one file will be replaced by a definition from another +file:: + + # RUN: lld-core %s | FileCheck %s + + # + # Test that undefined atoms are replaced with defined atoms. + # + + --- + atoms: + - name: foo + definition: undefined + --- + atoms: + - name: foo + scope: global + type: code + ... + + # CHECK: name: foo + # CHECK: scope: global + # CHECK: type: code + # CHECK-NOT: name: foo + # CHECK: ... + + +Passes testing +~~~~~~~~~~~~~~ + +Since Passes just operate on an lld::File object, the lld-core tool has the +option to run a particular pass (after resolving). Thus, you can write a YAML +test case with carefully crafted input to exercise areas of a Pass and the check +the resulting lld::File object as represented in YAML. + + +Design Issues +------------- + +There are a number of open issues in the design of lld. The plan is to wait and +make these design decisions when we need to. + + +Debug Info +~~~~~~~~~~ + +Currently, the lld model says nothing about debug info. But the most popular +debug format is DWARF and there is some impedance mismatch with the lld model +and DWARF. In lld there are just Atoms and only Atoms that need to be in a +special section at runtime have an associated section. Also, Atoms do not have +addresses. The way DWARF is spec'ed different parts of DWARF are supposed to go +into specially named sections and the DWARF references function code by address. + +CPU and OS specific functionality +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, lld has an abstract "Platform" that deals with any CPU or OS specific +differences in linking. We just keep adding virtual methods to the base +Platform class as we find linking areas that might need customization. At some +point we'll need to structure this better. + + +File Attributes +~~~~~~~~~~~~~~~ + +Currently, lld::File just has a path and a way to iterate its atoms. We will +need to add more attributes on a File. For example, some equivalent to the +target triple. There is also a number of cached or computed attributes that +could make various Passes more efficient. For instance, on Darwin there are a +number of Objective-C optimizations that can be done by a Pass. But it would +improve the plain C case if the Objective-C optimization Pass did not have to +scan all atoms looking for any Objective-C data structures. This could be done +if the lld::File object had an attribute that said if the file had any +Objective-C data in it. The Resolving phase would then be required to "merge" +that attribute as object files are added. diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 00000000..918e1778 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,48 @@ +.. _development: + +Development +=========== + +lld is developed as part of the `LLVM `_ project. + +Using C++11 in lld +------------------ + +:doc:`C++11`. + +Creating a Reader +----------------- + +See the :ref:`Creating a Reader ` guide. + + +Modifying the Driver +-------------------- + +See :doc:`Driver`. + + +Debugging +--------- + +You can run lld with ``-mllvm -debug`` command line options to enable debugging +printouts. If you want to enable debug information for some specific pass, you +can run it with ``-mllvm '-debug-only='``, where pass is a name used in +the ``DEBUG_WITH_TYPE()`` macro. + + + +Documentation +------------- + +The project documentation is written in reStructuredText and generated using the +`Sphinx `_ documentation generator. For more +information on writing documentation for the project, see the +:ref:`sphinx_intro`. + +.. toctree:: + :hidden: + + C++11 + Readers + Driver diff --git a/docs/getting_started.rst b/docs/getting_started.rst new file mode 100644 index 00000000..97c3d1bc --- /dev/null +++ b/docs/getting_started.rst @@ -0,0 +1,106 @@ +.. _getting_started: + +Getting Started: Building and Running lld +========================================= + +This page gives you the shortest path to checking out and building lld. If you +run into problems, please file bugs in the `LLVM Bugzilla`__ + +__ http://llvm.org/bugs/ + +Building lld +------------ + +On Unix-like Systems +~~~~~~~~~~~~~~~~~~~~ + +1. Get the required tools. + + * `CMake 2.8`_\+. + * make (or any build system CMake supports). + * `Clang 3.1`_\+ or GCC 4.7+ (C++11 support is required). + + * If using Clang, you will also need `libc++`_. + * `Python 2.4`_\+ (not 3.x) for running tests. + +.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html +.. _Clang 3.1: http://clang.llvm.org/ +.. _libc++: http://libcxx.llvm.org/ +.. _Python 2.4: http://python.org/download/ + +2. Check out LLVM:: + + $ cd path/to/llvm-project + $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm + +3. Check out lld:: + + $ cd llvm/tools + $ svn co http://llvm.org/svn/llvm-project/lld/trunk lld + + * lld can also be checked out to ``path/to/llvm-project`` and built as an external + project. + +4. Build LLVM and lld:: + + $ cd path/to/llvm-build/llvm (out of source build required) + $ cmake -G "Unix Makefiles" path/to/llvm-project/llvm + $ make + + * If you want to build with clang and it is not the default compiler or + it is installed in an alternate location, you'll need to tell the cmake tool + the location of the C and C++ compiler via CMAKE_C_COMPILER and + CMAKE_CXX_COMPILER. For example:: + + $ cmake -DCMAKE_CXX_COMPILER=/path/to/clang++ -DCMAKE_C_COMPILER=/path/to/clang ... + +5. Test:: + + $ make check-lld + +Using Visual Studio +~~~~~~~~~~~~~~~~~~~ + +#. Get the required tools. + + * `CMake 2.8`_\+. + * `Visual Studio 12 (2013) or later`_ (required for C++11 support) + * `Python 2.4`_\+ (not 3.x) for running tests. + +.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html +.. _Visual Studio 12 (2013) or later: http://www.microsoft.com/visualstudio/11/en-us +.. _Python 2.4: http://python.org/download/ + +#. Check out LLVM:: + + $ cd path/to/llvm-project + $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm + +#. Check out lld:: + + $ cd llvm/tools + $ svn co http://llvm.org/svn/llvm-project/lld/trunk lld + + * lld can also be checked out to ``path/to/llvm-project`` and built as an external + project. + +#. Generate Visual Studio project files:: + + $ cd path/to/llvm-build/llvm (out of source build required) + $ cmake -G "Visual Studio 11" path/to/llvm-project/llvm + +#. Build + + * Open LLVM.sln in Visual Studio. + * Build the ``ALL_BUILD`` target. + +#. Test + + * Build the ``lld-test`` target. + +More Information +~~~~~~~~~~~~~~~~ + +For more information on using CMake see the `LLVM CMake guide`_. + +.. _LLVM CMake guide: http://llvm.org/docs/CMake.html diff --git a/docs/hello.png b/docs/hello.png new file mode 100644 index 00000000..70df111f Binary files /dev/null and b/docs/hello.png differ diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..d019c4f9 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,25 @@ +.. _index: + +lld - The LLVM Linker +===================== + +lld contains two linkers whose architectures are different from each other. + +.. toctree:: + :maxdepth: 1 + + NewLLD + AtomLLD + +Source +------ + +lld is available in the LLVM SVN repository:: + + svn co http://llvm.org/svn/llvm-project/lld/trunk lld + +lld is also available via the read-only git mirror:: + + git clone http://llvm.org/git/lld.git + +Put it in llvm's tools/ directory, rerun cmake, then build target lld. diff --git a/docs/llvm-theme/layout.html b/docs/llvm-theme/layout.html new file mode 100644 index 00000000..0cd0918e --- /dev/null +++ b/docs/llvm-theme/layout.html @@ -0,0 +1,22 @@ +{# + sphinxdoc/layout.html + ~~~~~~~~~~~~~~~~~~~~~ + + Sphinx layout template for the sphinxdoc theme. + + :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +#} +{% extends "basic/layout.html" %} + +{% block relbar1 %} + +{{ super() }} +{% endblock %} + +{# put the sidebar before the body #} +{% block sidebar1 %}{{ sidebar() }}{% endblock %} +{% block sidebar2 %}{% endblock %} diff --git a/docs/llvm-theme/static/contents.png b/docs/llvm-theme/static/contents.png new file mode 100644 index 00000000..7fb82154 Binary files /dev/null and b/docs/llvm-theme/static/contents.png differ diff --git a/docs/llvm-theme/static/llvm.css b/docs/llvm-theme/static/llvm.css new file mode 100644 index 00000000..32802bb6 --- /dev/null +++ b/docs/llvm-theme/static/llvm.css @@ -0,0 +1,345 @@ +/* + * sphinxdoc.css_t + * ~~~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- sphinxdoc theme. Originally created by + * Armin Ronacher for Werkzeug. + * + * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; + font-size: 14px; + letter-spacing: -0.01em; + line-height: 150%; + text-align: center; + background-color: #BFD1D4; + color: black; + padding: 0; + border: 1px solid #aaa; + + margin: 0px 80px 0px 80px; + min-width: 740px; +} + +div.logo { + background-color: white; + text-align: left; + padding: 10px 10px 15px 15px; +} + +div.document { + background-color: white; + text-align: left; + background-image: url(contents.png); + background-repeat: repeat-x; +} + +div.bodywrapper { + margin: 0 240px 0 0; + border-right: 1px solid #ccc; +} + +div.body { + margin: 0; + padding: 0.5em 20px 20px 20px; +} + +div.related { + font-size: 1em; +} + +div.related ul { + background-image: url(navigation.png); + height: 2em; + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; +} + +div.related ul li { + margin: 0; + padding: 0; + height: 2em; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 0 5px 0 5px; + line-height: 1.75em; + color: #EE9816; +} + +div.related ul li a:hover { + color: #3CA8E7; +} + +div.sphinxsidebarwrapper { + padding: 0; +} + +div.sphinxsidebar { + margin: 0; + padding: 0.5em 15px 15px 0; + width: 210px; + float: right; + font-size: 1em; + text-align: left; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin: 1em 0 0.5em 0; + font-size: 1em; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border: 1px solid #86989B; + background-color: #AFC1C4; +} + +div.sphinxsidebar h3 a { + color: white; +} + +div.sphinxsidebar ul { + padding-left: 1.5em; + margin-top: 7px; + padding: 0; + line-height: 130%; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; +} + +div.footer { + background-color: #E3EFF1; + color: #86989B; + padding: 3px 8px 3px 0; + clear: both; + font-size: 0.8em; + text-align: right; +} + +div.footer a { + color: #86989B; + text-decoration: underline; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +a { + color: #CA7900; + text-decoration: none; +} + +a:hover { + color: #2491CF; +} + +div.body a { + text-decoration: underline; +} + +h1 { + margin: 0; + padding: 0.7em 0 0.3em 0; + font-size: 1.5em; + color: #11557C; +} + +h2 { + margin: 1.3em 0 0.2em 0; + font-size: 1.35em; + padding: 0; +} + +h3 { + margin: 1em 0 -0.3em 0; + font-size: 1.2em; +} + +div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { + color: black!important; +} + +h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { + display: none; + margin: 0 0 0 0.3em; + padding: 0 0.2em 0 0.2em; + color: #aaa!important; +} + +h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, +h5:hover a.anchor, h6:hover a.anchor { + display: inline; +} + +h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, +h5 a.anchor:hover, h6 a.anchor:hover { + color: #777; + background-color: #eee; +} + +a.headerlink { + color: #c60f0f!important; + font-size: 1em; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none!important; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + +cite, code, tt { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.01em; +} + +tt { + background-color: #f2f2f2; + border-bottom: 1px solid #ddd; + color: #333; +} + +tt.descname, tt.descclassname, tt.xref { + border: 0; +} + +hr { + border: 1px solid #abc; + margin: 2em; +} + +a tt { + border: 0; + color: #CA7900; +} + +a tt:hover { + color: #2491CF; +} + +pre { + font-family: 'Consolas', 'Deja Vu Sans Mono', + 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.5em; + border: 1px solid #ccc; + background-color: #f8f8f8; +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 2px 7px; + border: 1px solid #ccc; +} + +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition, div.warning { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 1px solid #86989B; + background-color: #f7f7f7; + padding: 0; +} + +div.admonition p, div.warning p { + margin: 0.5em 1em 0.5em 1em; + padding: 0; +} + +div.admonition pre, div.warning pre { + margin: 0.4em 1em 0.4em 1em; +} + +div.admonition p.admonition-title, +div.warning p.admonition-title { + margin: 0; + padding: 0.1em 0 0.1em 0.5em; + color: white; + border-bottom: 1px solid #86989B; + font-weight: bold; + background-color: #AFC1C4; +} + +div.warning { + border: 1px solid #940000; +} + +div.warning p.admonition-title { + background-color: #CF0000; + border-bottom-color: #940000; +} + +div.admonition ul, div.admonition ol, +div.warning ul, div.warning ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +div.versioninfo { + margin: 1em 0 0 0; + border: 1px solid #ccc; + background-color: #DDEAF0; + padding: 8px; + line-height: 1.3em; + font-size: 0.9em; +} + +.viewcode-back { + font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} diff --git a/docs/llvm-theme/static/logo.png b/docs/llvm-theme/static/logo.png new file mode 100644 index 00000000..4fc89902 Binary files /dev/null and b/docs/llvm-theme/static/logo.png differ diff --git a/docs/llvm-theme/static/navigation.png b/docs/llvm-theme/static/navigation.png new file mode 100644 index 00000000..1081dc14 Binary files /dev/null and b/docs/llvm-theme/static/navigation.png differ diff --git a/docs/llvm-theme/theme.conf b/docs/llvm-theme/theme.conf new file mode 100644 index 00000000..330fc92f --- /dev/null +++ b/docs/llvm-theme/theme.conf @@ -0,0 +1,4 @@ +[theme] +inherit = basic +stylesheet = llvm.css +pygments_style = friendly diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..8471252d --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,190 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\lld.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\lld.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/docs/open_projects.rst b/docs/open_projects.rst new file mode 100644 index 00000000..eeb9f9f4 --- /dev/null +++ b/docs/open_projects.rst @@ -0,0 +1,11 @@ +.. _open_projects: + +Open Projects +============= + +.. include:: ../include/lld/Core/TODO.txt + +Documentation TODOs +~~~~~~~~~~~~~~~~~~~ + +.. todolist:: diff --git a/docs/sphinx_intro.rst b/docs/sphinx_intro.rst new file mode 100644 index 00000000..6845bc81 --- /dev/null +++ b/docs/sphinx_intro.rst @@ -0,0 +1,147 @@ +.. _sphinx_intro: + +Sphinx Introduction for LLVM Developers +======================================= + +This document is intended as a short and simple introduction to the Sphinx +documentation generation system for LLVM developers. + +Quickstart +---------- + +To get started writing documentation, you will need to: + + 1. Have the Sphinx tools :ref:`installed `. + + 2. Understand how to :ref:`build the documentation + `. + + 3. Start :ref:`writing documentation `! + +.. _installing_sphinx: + +Installing Sphinx +~~~~~~~~~~~~~~~~~ + +You should be able to install Sphinx using the standard Python package +installation tool ``easy_install``, as follows:: + + $ sudo easy_install sphinx + Searching for sphinx + Reading http://pypi.python.org/simple/sphinx/ + Reading http://sphinx.pocoo.org/ + Best match: Sphinx 1.1.3 + ... more lines here .. + +If you do not have root access (or otherwise want to avoid installing Sphinx in +system directories) see the section on :ref:`installing_sphinx_in_a_venv` . + +If you do not have the ``easy_install`` tool on your system, you should be able +to install it using: + + Linux + Use your distribution's standard package management tool to install it, + i.e., ``apt-get install easy_install`` or ``yum install easy_install``. + + Mac OS X + All modern Mac OS X systems come with ``easy_install`` as part of the base + system. + + Windows + See the `setuptools `_ package web + page for instructions. + + +.. _building_the_documentation: + +Building the documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to build the documentation, all you should need to do is change to the +``docs`` directory and invoke make as follows:: + + $ cd path/to/project/docs + $ make html + +Note that on Windows there is a ``make.bat`` command in the docs directory which +supplies the same interface as the ``Makefile``. + +That command will invoke ``sphinx-build`` with the appropriate options for the +project, and generate the HTML documentation in a ``_build`` subdirectory. You +can browse it starting from the index page by visiting +``_build/html/index.html``. + +Sphinx supports a wide variety of generation formats (including LaTeX, man +pages, and plain text). The ``Makefile`` includes a number of convenience +targets for invoking ``sphinx-build`` appropriately, the common ones are: + + make html + Generate the HTML output. + + make latexpdf + Generate LaTeX documentation and convert to a PDF. + + make man + Generate man pages. + + +.. _writing_documentation: + +Writing documentation +~~~~~~~~~~~~~~~~~~~~~ + +The documentation itself is written in the reStructuredText (ReST) format, and Sphinx +defines additional tags to support features like cross-referencing. + +The ReST format itself is organized around documents mostly being readable +plaintext documents. You should generally be able to write new documentation +easily just by following the style of the existing documentation. + +If you want to understand the formatting of the documents more, the best place +to start is Sphinx's own `ReST Primer `_. + + +Learning More +------------- + +If you want to learn more about the Sphinx system, the best place to start is +the Sphinx documentation itself, available `here +`_. + + +.. _installing_sphinx_in_a_venv: + +Installing Sphinx in a Virtual Environment +------------------------------------------ + +Most Python developers prefer to work with tools inside a *virtualenv* (virtual +environment) instance, which functions as an application sandbox. This avoids +polluting your system installation with different packages used by various +projects (and ensures that dependencies for different packages don't conflict +with one another). Of course, you need to first have the virtualenv software +itself which generally would be installed at the system level:: + + $ sudo easy_install virtualenv + +but after that you no longer need to install additional packages in the system +directories. + +Once you have the *virtualenv* tool itself installed, you can create a +virtualenv for Sphinx using:: + + $ virtualenv ~/my-sphinx-install + New python executable in /Users/dummy/my-sphinx-install/bin/python + Installing setuptools............done. + Installing pip...............done. + + $ ~/my-sphinx-install/bin/easy_install sphinx + ... install messages here ... + +and from now on you can "activate" the *virtualenv* using:: + + $ source ~/my-sphinx-install/bin/activate + +which will change your PATH to ensure the sphinx-build tool from inside the +virtual environment will be used. See the `virtualenv website +`_ for more information on using +virtual environments. diff --git a/docs/windows_support.rst b/docs/windows_support.rst new file mode 100644 index 00000000..c4c338a0 --- /dev/null +++ b/docs/windows_support.rst @@ -0,0 +1,97 @@ +.. raw:: html + + + +.. role:: none +.. role:: partial +.. role:: good + +=============== +Windows support +=============== + +LLD supports Windows operating system. When invoked as ``lld-link.exe`` or with +``-flavor link``, the driver for Windows operating system is used to parse +command line options, and it drives further linking processes. LLD accepts +almost all command line options that the linker shipped with Microsoft Visual +C++ (link.exe) supports. + +The current status is that LLD can link itself on Windows x86/x64 +using Visual C++ 2013 as the compiler. + +Development status +================== + +Driver + :good:`Mostly done`. Some exotic command line options that are not usually + used for application develompent, such as ``/DRIVER``, are not supported. + Options for Windows 8 app store are not recognized too + (e.g. ``/APPCONTAINER``). + +Linking against DLL + :good:`Done`. LLD can read import libraries needed to link against DLL. Both + export-by-name and export-by-ordinal are supported. + +Linking against static library + :good:`Done`. The format of static library (.lib) on Windows is actually the + same as on Unix (.a). LLD can read it. + +Creating DLL + :good:`Done`. LLD creates a DLL if ``/DLL`` option is given. Exported + functions can be specified either via command line (``/EXPORT``) or via + module-definition file (.def). Both export-by-name and export-by-ordinal are + supported. LLD uses Microsoft ``lib.exe`` tool to create an import library + file. + +Windows resource files support + :good:`Done`. If an ``.rc`` file is given, LLD converts the file to a COFF + file using some external commands and link it. Specifically, ``rc.exe`` is + used to compile a resource file (.rc) to a compiled resource (.res) + file. ``rescvt.exe`` is then used to convert a compiled resource file to a + COFF object file section. Both tools are shipped with MSVC. + +Safe Structured Exception Handler (SEH) + :good:`Done` for both x86 and x64. + +Module-definition file + :partial:`Partially done`. LLD currently recognizes these directives: + ``EXPORTS``, ``HEAPSIZE``, ``STACKSIZE``, ``NAME``, and ``VERSION``. + +Debug info + :none:`No progress has been made`. Microsoft linker can interpret the CodeGen + debug info (old-style debug info) and PDB to emit an .pdb file. LLD doesn't + support neither. + + +Building LLD +============ + +Using Visual Studio IDE/MSBuild +------------------------------- + +1. Check out LLVM and LLD from the LLVM SVN repository (or Git mirror), +#. run ``cmake -G "Visual Studio 12" `` from VS command prompt, +#. open LLVM.sln with Visual Studio, and +#. build ``lld`` target in ``lld executables`` folder + +Alternatively, you can use msbuild if you don't like to work in an IDE:: + + msbuild LLVM.sln /m /target:"lld executables\lld" + +MSBuild.exe had been shipped as a component of the .NET framework, but since +2013 it's part of Visual Studio. You can find it at "C:\\Program Files +(x86)\\msbuild". + +You can build LLD as a 64 bit application. To do that, open VS2013 x64 command +prompt and run cmake for "Visual Studio 12 Win64" target. + +Using Ninja +----------- + +1. Check out LLVM and LLD from the LLVM SVN repository (or Git mirror), +#. run ``cmake -G ninja `` from VS command prompt, +#. run ``ninja lld`` diff --git a/include/lld/Config/Version.h b/include/lld/Config/Version.h new file mode 100644 index 00000000..41433c11 --- /dev/null +++ b/include/lld/Config/Version.h @@ -0,0 +1,51 @@ +//===- lld/Config/Version.h - LLD Version Number ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines version macros and version-related utility functions +/// for lld. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_VERSION_H +#define LLD_VERSION_H + +#include "lld/Config/Version.inc" +#include "llvm/ADT/StringRef.h" +#include + +/// \brief Helper macro for LLD_VERSION_STRING. +#define LLD_MAKE_VERSION_STRING2(X) #X + +/// \brief Helper macro for LLD_VERSION_STRING. +#define LLD_MAKE_VERSION_STRING(X, Y) LLD_MAKE_VERSION_STRING2(X.Y) + +/// \brief A string that describes the lld version number, e.g., "1.0". +#define LLD_VERSION_STRING \ + LLD_MAKE_VERSION_STRING(LLD_VERSION_MAJOR, LLD_VERSION_MINOR) + +namespace lld { +/// \brief Retrieves the repository path (e.g., Subversion path) that +/// identifies the particular lld branch, tag, or trunk from which this +/// lld was built. +llvm::StringRef getLLDRepositoryPath(); + +/// \brief Retrieves the repository revision number (or identifer) from which +/// this lld was built. +llvm::StringRef getLLDRevision(); + +/// \brief Retrieves the full repository version that is an amalgamation of +/// the information in getLLDRepositoryPath() and getLLDRevision(). +std::string getLLDRepositoryVersion(); + +/// \brief Retrieves a string representing the complete lld version. +llvm::StringRef getLLDVersion(); +} + +#endif // LLD_VERSION_H diff --git a/include/lld/Config/Version.inc.in b/include/lld/Config/Version.inc.in new file mode 100644 index 00000000..c893a566 --- /dev/null +++ b/include/lld/Config/Version.inc.in @@ -0,0 +1,5 @@ +#define LLD_VERSION @LLD_VERSION@ +#define LLD_VERSION_MAJOR @LLD_VERSION_MAJOR@ +#define LLD_VERSION_MINOR @LLD_VERSION_MINOR@ +#define LLD_REVISION_STRING "@LLD_REVISION@" +#define LLD_REPOSITORY_STRING "@LLD_REPOSITORY@" diff --git a/include/lld/Core/AbsoluteAtom.h b/include/lld/Core/AbsoluteAtom.h new file mode 100644 index 00000000..ed25297c --- /dev/null +++ b/include/lld/Core/AbsoluteAtom.h @@ -0,0 +1,43 @@ +//===- Core/AbsoluteAtom.h - An absolute Atom -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ABSOLUTE_ATOM_H +#define LLD_CORE_ABSOLUTE_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// An AbsoluteAtom has no content. +/// It exists to represent content at fixed addresses in memory. +class AbsoluteAtom : public Atom { +public: + + virtual uint64_t value() const = 0; + + /// scope - The visibility of this atom to other atoms. C static functions + /// have scope scopeTranslationUnit. Regular C functions have scope + /// scopeGlobal. Functions compiled with visibility=hidden have scope + /// scopeLinkageUnit so they can be see by other atoms being linked but not + /// by the OS loader. + virtual Scope scope() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionAbsolute; + } + + static bool classof(const AbsoluteAtom *) { return true; } + +protected: + AbsoluteAtom() : Atom(definitionAbsolute) {} +}; + +} // namespace lld + +#endif // LLD_CORE_ABSOLUTE_ATOM_H diff --git a/include/lld/Core/ArchiveLibraryFile.h b/include/lld/Core/ArchiveLibraryFile.h new file mode 100644 index 00000000..2c736e7d --- /dev/null +++ b/include/lld/Core/ArchiveLibraryFile.h @@ -0,0 +1,47 @@ +//===- Core/ArchiveLibraryFile.h - Models static library ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ARCHIVE_LIBRARY_FILE_H +#define LLD_CORE_ARCHIVE_LIBRARY_FILE_H + +#include "lld/Core/File.h" +#include + +namespace lld { + +/// +/// The ArchiveLibraryFile subclass of File is used to represent unix +/// static library archives. These libraries provide no atoms to the +/// initial set of atoms linked. Instead, when the Resolver will query +/// ArchiveLibraryFile instances for specific symbols names using the +/// find() method. If the archive contains an object file which has a +/// DefinedAtom whose scope is not translationUnit, then that entire +/// object file File is returned. +/// +class ArchiveLibraryFile : public File { +public: + static bool classof(const File *f) { + return f->kind() == kindArchiveLibrary; + } + + /// Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + virtual File *find(StringRef name) = 0; + + virtual std::error_code + parseAllMembers(std::vector> &result) = 0; + +protected: + /// only subclasses of ArchiveLibraryFile can be instantiated + ArchiveLibraryFile(StringRef path) : File(path, kindArchiveLibrary) {} +}; + +} // namespace lld + +#endif // LLD_CORE_ARCHIVE_LIBRARY_FILE_H diff --git a/include/lld/Core/Atom.h b/include/lld/Core/Atom.h new file mode 100644 index 00000000..42ca2bb8 --- /dev/null +++ b/include/lld/Core/Atom.h @@ -0,0 +1,129 @@ +//===- Core/Atom.h - A node in linking graph ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ATOM_H +#define LLD_CORE_ATOM_H + +#include "lld/Core/LLVM.h" + +namespace lld { + +class File; + +template +class OwningAtomPtr; + +/// +/// The linker has a Graph Theory model of linking. An object file is seen +/// as a set of Atoms with References to other Atoms. Each Atom is a node +/// and each Reference is an edge. An Atom can be a DefinedAtom which has +/// content or a UndefinedAtom which is a placeholder and represents an +/// undefined symbol (extern declaration). +/// +class Atom { + template friend class OwningAtomPtr; +public: + /// Whether this atom is defined or a proxy for an undefined symbol + enum Definition { + definitionRegular, ///< Normal C/C++ function or global variable. + definitionAbsolute, ///< Asm-only (foo = 10). Not tied to any content. + definitionUndefined, ///< Only in .o files to model reference to undef. + definitionSharedLibrary ///< Only in shared libraries to model export. + }; + + /// The scope in which this atom is acessible to other atoms. + enum Scope { + scopeTranslationUnit, ///< Accessible only to atoms in the same translation + /// unit (e.g. a C static). + scopeLinkageUnit, ///< Accessible to atoms being linked but not visible + /// to runtime loader (e.g. visibility=hidden). + scopeGlobal ///< Accessible to all atoms and visible to runtime + /// loader (e.g. visibility=default). + }; + + + /// file - returns the File that produced/owns this Atom + virtual const File& file() const = 0; + + /// name - The name of the atom. For a function atom, it is the (mangled) + /// name of the function. + virtual StringRef name() const = 0; + + /// definition - Whether this atom is a definition or represents an undefined + /// symbol. + Definition definition() const { return _definition; } + + static bool classof(const Atom *a) { return true; } + +protected: + /// Atom is an abstract base class. Only subclasses can access constructor. + explicit Atom(Definition def) : _definition(def) {} + + /// The memory for Atom objects is always managed by the owning File + /// object. Therefore, no one but the owning File object should call + /// delete on an Atom. In fact, some File objects may bulk allocate + /// an array of Atoms, so they cannot be individually deleted by anyone. + virtual ~Atom() {} + +private: + Definition _definition; +}; + +/// Class which owns an atom pointer and runs the atom destructor when the +/// owning pointer goes out of scope. +template +class OwningAtomPtr { +private: + OwningAtomPtr(const OwningAtomPtr &) = delete; + void operator=(const OwningAtomPtr&) = delete; +public: + OwningAtomPtr() : atom(nullptr) { } + OwningAtomPtr(T *atom) : atom(atom) { } + + ~OwningAtomPtr() { + if (atom) + runDestructor(atom); + } + + void runDestructor(Atom *atom) { + atom->~Atom(); + } + + OwningAtomPtr(OwningAtomPtr &&ptr) : atom(ptr.atom) { + ptr.atom = nullptr; + } + + void operator=(OwningAtomPtr&& ptr) { + if (atom) + runDestructor(atom); + atom = ptr.atom; + ptr.atom = nullptr; + } + + T *const &get() const { + return atom; + } + + T *&get() { + return atom; + } + + T *release() { + auto *v = atom; + atom = nullptr; + return v; + } + +private: + T *atom; +}; + +} // namespace lld + +#endif // LLD_CORE_ATOM_H diff --git a/include/lld/Core/DefinedAtom.h b/include/lld/Core/DefinedAtom.h new file mode 100644 index 00000000..e3193f8a --- /dev/null +++ b/include/lld/Core/DefinedAtom.h @@ -0,0 +1,378 @@ +//===- Core/DefinedAtom.h - An Atom with content --------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_DEFINED_ATOM_H +#define LLD_CORE_DEFINED_ATOM_H + +#include "lld/Core/Atom.h" +#include "lld/Core/Reference.h" +#include "lld/Core/LLVM.h" +#include "llvm/Support/ErrorHandling.h" + +namespace lld { +class File; + +/// \brief The fundamental unit of linking. +/// +/// A C function or global variable is an atom. An atom has content and +/// attributes. The content of a function atom is the instructions that +/// implement the function. The content of a global variable atom is its +/// initial bytes. +/// +/// Here are some example attribute sets for common atoms. If a particular +/// attribute is not listed, the default values are: definition=regular, +/// sectionChoice=basedOnContent, scope=translationUnit, merge=no, +/// deadStrip=normal, interposable=no +/// +/// C function: void foo() {}
    +/// name=foo, type=code, perm=r_x, scope=global +/// +/// C static function: staic void func() {}
    +/// name=func, type=code, perm=r_x +/// +/// C global variable: int count = 1;
    +/// name=count, type=data, perm=rw_, scope=global +/// +/// C tentative definition: int bar;
    +/// name=bar, type=zerofill, perm=rw_, scope=global, +/// merge=asTentative, interposable=yesAndRuntimeWeak +/// +/// Uninitialized C static variable: static int stuff;
    +/// name=stuff, type=zerofill, perm=rw_ +/// +/// Weak C function: __attribute__((weak)) void foo() {}
    +/// name=foo, type=code, perm=r_x, scope=global, merge=asWeak +/// +/// Hidden C function: __attribute__((visibility("hidden"))) void foo() {}
    +/// name=foo, type=code, perm=r_x, scope=linkageUnit +/// +/// No-dead-strip function: __attribute__((used)) void foo() {}
    +/// name=foo, type=code, perm=r_x, scope=global, deadStrip=never +/// +/// Non-inlined C++ inline method: inline void Foo::doit() {}
    +/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global, +/// mergeDupes=asWeak +/// +/// Non-inlined C++ inline method whose address is taken: +/// inline void Foo::doit() {}
    +/// name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global, +/// mergeDupes=asAddressedWeak +/// +/// literal c-string: "hello"
    +/// name="" type=cstring, perm=r__, scope=linkageUnit +/// +/// literal double: 1.234
    +/// name="" type=literal8, perm=r__, scope=linkageUnit +/// +/// constant: { 1,2,3 }
    +/// name="" type=constant, perm=r__, scope=linkageUnit +/// +/// Pointer to initializer function:
    +/// name="" type=initializer, perm=rw_l, +/// sectionChoice=customRequired +/// +/// C function place in custom section: __attribute__((section("__foo"))) +/// void foo() {}
    +/// name=foo, type=code, perm=r_x, scope=global, +/// sectionChoice=customRequired, customSectionName=__foo +/// +class DefinedAtom : public Atom { +public: + enum Interposable { + interposeNo, // linker can directly bind uses of this atom + interposeYes, // linker must indirect (through GOT) uses + interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final + // linked image + }; + + enum Merge { + mergeNo, // Another atom with same name is error + mergeAsTentative, // Is ANSI C tentative definition, can be coalesced + mergeAsWeak, // Is C++ inline definition that was not inlined, + // but address was not taken, so atom can be hidden + // by linker + mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose + // address was taken. + mergeSameNameAndSize, // Another atom with different size is error + mergeByLargestSection, // Choose an atom whose section is the largest. + mergeByContent, // Merge with other constants with same content. + }; + + enum ContentType { + typeUnknown, // for use with definitionUndefined + typeMachHeader, // atom representing mach_header [Darwin] + typeCode, // executable code + typeResolver, // function which returns address of target + typeBranchIsland, // linker created for large binaries + typeBranchShim, // linker created to switch thumb mode + typeStub, // linker created for calling external function + typeStubHelper, // linker created for initial stub binding + typeConstant, // a read-only constant + typeCString, // a zero terminated UTF8 C string + typeUTF16String, // a zero terminated UTF16 string + typeCFI, // a FDE or CIE from dwarf unwind info + typeLSDA, // extra unwinding info + typeLiteral4, // a four-btye read-only constant + typeLiteral8, // an eight-btye read-only constant + typeLiteral16, // a sixteen-btye read-only constant + typeData, // read-write data + typeDataFast, // allow data to be quickly accessed + typeZeroFill, // zero-fill data + typeZeroFillFast, // allow zero-fill data to be quicky accessed + typeConstData, // read-only data after dynamic linker is done + typeObjC1Class, // ObjC1 class [Darwin] + typeLazyPointer, // pointer through which a stub jumps + typeLazyDylibPointer, // pointer through which a stub jumps [Darwin] + typeNonLazyPointer, // pointer to external symbol + typeCFString, // NS/CFString object [Darwin] + typeGOT, // pointer to external symbol + typeInitializerPtr, // pointer to initializer function + typeTerminatorPtr, // pointer to terminator function + typeCStringPtr, // pointer to UTF8 C string [Darwin] + typeObjCClassPtr, // pointer to ObjC class [Darwin] + typeObjC2CategoryList, // pointers to ObjC category [Darwin] + typeObjCImageInfo, // pointer to ObjC class [Darwin] + typeObjCMethodList, // pointer to ObjC method list [Darwin] + typeDTraceDOF, // runtime data for Dtrace [Darwin] + typeInterposingTuples, // tuples of interposing info for dyld [Darwin] + typeTempLTO, // temporary atom for bitcode reader + typeCompactUnwindInfo, // runtime data for unwinder [Darwin] + typeProcessedUnwindInfo,// compressed compact unwind info [Darwin] + typeThunkTLV, // thunk used to access a TLV [Darwin] + typeTLVInitialData, // initial data for a TLV [Darwin] + typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin] + typeTLVInitializerPtr, // pointer to thread local initializer [Darwin] + typeDSOHandle, // atom representing DSO handle [Darwin] + typeSectCreate, // Created via the -sectcreate option [Darwin] + }; + + // Permission bits for atoms and segments. The order of these values are + // important, because the layout pass may sort atoms by permission if other + // attributes are the same. + enum ContentPermissions { + perm___ = 0, // mapped as unaccessible + permR__ = 8, // mapped read-only + permRW_ = 8 + 2, // mapped readable and writable + permRW_L = 8 + 2 + 1, // initially mapped r/w, then made read-only + // loader writable + permR_X = 8 + 4, // mapped readable and executable + permRWX = 8 + 2 + 4, // mapped readable and writable and executable + permUnknown = 16 // unknown or invalid permissions + }; + + enum SectionChoice { + sectionBasedOnContent, // linker infers final section based on content + sectionCustomPreferred, // linker may place in specific section + sectionCustomRequired // linker must place in specific section + }; + + enum DeadStripKind { + deadStripNormal, // linker may dead strip this atom + deadStripNever, // linker must never dead strip this atom + deadStripAlways // linker must remove this atom if unused + }; + + enum DynamicExport { + /// \brief The linker may or may not export this atom dynamically depending + /// on the output type and other context of the link. + dynamicExportNormal, + /// \brief The linker will always export this atom dynamically. + dynamicExportAlways, + }; + + // Attributes describe a code model used by the atom. + enum CodeModel { + codeNA, // no specific code model + // MIPS code models + codeMipsPIC, // PIC function in a PIC / non-PIC mixed file + codeMipsMicro, // microMIPS instruction encoding + codeMipsMicroPIC, // microMIPS instruction encoding + PIC + codeMips16, // MIPS-16 instruction encoding + // ARM code models + codeARMThumb, // ARM Thumb instruction set + codeARM_a, // $a-like mapping symbol (for ARM code) + codeARM_d, // $d-like mapping symbol (for data) + codeARM_t, // $t-like mapping symbol (for Thumb code) + }; + + struct Alignment { + Alignment(int v, int m = 0) : value(v), modulus(m) {} + + uint16_t value; + uint16_t modulus; + + bool operator==(const Alignment &rhs) const { + return (value == rhs.value) && (modulus == rhs.modulus); + } + }; + + /// \brief returns a value for the order of this Atom within its file. + /// + /// This is used by the linker to order the layout of Atoms so that the + /// resulting image is stable and reproducible. + virtual uint64_t ordinal() const = 0; + + /// \brief the number of bytes of space this atom's content will occupy in the + /// final linked image. + /// + /// For a function atom, it is the number of bytes of code in the function. + virtual uint64_t size() const = 0; + + /// \brief The size of the section from which the atom is instantiated. + /// + /// Merge::mergeByLargestSection is defined in terms of section size + /// and not in terms of atom size, so we need this function separate + /// from size(). + virtual uint64_t sectionSize() const { return 0; } + + /// \brief The visibility of this atom to other atoms. + /// + /// C static functions have scope scopeTranslationUnit. Regular C functions + /// have scope scopeGlobal. Functions compiled with visibility=hidden have + /// scope scopeLinkageUnit so they can be see by other atoms being linked but + /// not by the OS loader. + virtual Scope scope() const = 0; + + /// \brief Whether the linker should use direct or indirect access to this + /// atom. + virtual Interposable interposable() const = 0; + + /// \brief how the linker should handle if multiple atoms have the same name. + virtual Merge merge() const = 0; + + /// \brief The type of this atom, such as code or data. + virtual ContentType contentType() const = 0; + + /// \brief The alignment constraints on how this atom must be laid out in the + /// final linked image (e.g. 16-byte aligned). + virtual Alignment alignment() const = 0; + + /// \brief Whether this atom must be in a specially named section in the final + /// linked image, or if the linker can infer the section based on the + /// contentType(). + virtual SectionChoice sectionChoice() const = 0; + + /// \brief If sectionChoice() != sectionBasedOnContent, then this return the + /// name of the section the atom should be placed into. + virtual StringRef customSectionName() const = 0; + + /// \brief constraints on whether the linker may dead strip away this atom. + virtual DeadStripKind deadStrip() const = 0; + + /// \brief Under which conditions should this atom be dynamically exported. + virtual DynamicExport dynamicExport() const { + return dynamicExportNormal; + } + + /// \brief Code model used by the atom. + virtual CodeModel codeModel() const { return codeNA; } + + /// \brief Returns the OS memory protections required for this atom's content + /// at runtime. + /// + /// A function atom is R_X, a global variable is RW_, and a read-only constant + /// is R__. + virtual ContentPermissions permissions() const; + + /// \brief returns a reference to the raw (unrelocated) bytes of this Atom's + /// content. + virtual ArrayRef rawContent() const = 0; + + /// This class abstracts iterating over the sequence of References + /// in an Atom. Concrete instances of DefinedAtom must implement + /// the derefIterator() and incrementIterator() methods. + class reference_iterator { + public: + reference_iterator(const DefinedAtom &a, const void *it) + : _atom(a), _it(it) { } + + const Reference *operator*() const { + return _atom.derefIterator(_it); + } + + const Reference *operator->() const { + return _atom.derefIterator(_it); + } + + bool operator==(const reference_iterator &other) const { + return _it == other._it; + } + + bool operator!=(const reference_iterator &other) const { + return !(*this == other); + } + + reference_iterator &operator++() { + _atom.incrementIterator(_it); + return *this; + } + private: + const DefinedAtom &_atom; + const void *_it; + }; + + /// \brief Returns an iterator to the beginning of this Atom's References. + virtual reference_iterator begin() const = 0; + + /// \brief Returns an iterator to the end of this Atom's References. + virtual reference_iterator end() const = 0; + + /// Adds a reference to this atom. + virtual void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) { + llvm_unreachable("Subclass does not permit adding references"); + } + + static bool classof(const Atom *a) { + return a->definition() == definitionRegular; + } + + /// Utility for deriving permissions from content type + static ContentPermissions permissions(ContentType type); + + /// Utility function to check if the atom occupies file space + bool occupiesDiskSpace() const { + ContentType atomContentType = contentType(); + return !(atomContentType == DefinedAtom::typeZeroFill || + atomContentType == DefinedAtom::typeZeroFillFast || + atomContentType == DefinedAtom::typeTLVInitialZeroFill); + } + + /// Utility function to check if relocations in this atom to other defined + /// atoms can be implicitly generated, and so we don't need to explicitly + /// emit those relocations. + bool relocsToDefinedCanBeImplicit() const { + ContentType atomContentType = contentType(); + return atomContentType == typeCFI; + } + + // Returns true if lhs should be placed before rhs in the final output. + static bool compareByPosition(const DefinedAtom *lhs, + const DefinedAtom *rhs); + +protected: + // DefinedAtom is an abstract base class. Only subclasses can access + // constructor. + DefinedAtom() : Atom(definitionRegular) { } + + ~DefinedAtom() override = default; + + /// \brief Returns a pointer to the Reference object that the abstract + /// iterator "points" to. + virtual const Reference *derefIterator(const void *iter) const = 0; + + /// \brief Adjusts the abstract iterator to "point" to the next Reference + /// object for this Atom. + virtual void incrementIterator(const void *&iter) const = 0; +}; +} // end namespace lld + +#endif diff --git a/include/lld/Core/Error.h b/include/lld/Core/Error.h new file mode 100644 index 00000000..b0bf73b1 --- /dev/null +++ b/include/lld/Core/Error.h @@ -0,0 +1,68 @@ +//===- Error.h - system_error extensions for lld ----------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares a new error_category for the lld library. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_ERROR_H +#define LLD_CORE_ERROR_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include + +namespace lld { + +const std::error_category &YamlReaderCategory(); + +enum class YamlReaderError { + unknown_keyword, + illegal_value +}; + +inline std::error_code make_error_code(YamlReaderError e) { + return std::error_code(static_cast(e), YamlReaderCategory()); +} + +/// Creates an error_code object that has associated with it an arbitrary +/// error messsage. The value() of the error_code will always be non-zero +/// but its value is meaningless. The messsage() will be (a copy of) the +/// supplied error string. +/// Note: Once ErrorOr<> is updated to work with errors other than error_code, +/// this can be updated to return some other kind of error. +std::error_code make_dynamic_error_code(StringRef msg); + +/// Generic error. +/// +/// For errors that don't require their own specific sub-error (most errors) +/// this class can be used to describe the error via a string message. +class GenericError : public llvm::ErrorInfo { +public: + static char ID; + GenericError(Twine Msg); + const std::string &getMessage() const { return Msg; } + void log(llvm::raw_ostream &OS) const override; + + std::error_code convertToErrorCode() const override { + return make_dynamic_error_code(getMessage()); + } + +private: + std::string Msg; +}; + +} // end namespace lld + +namespace std { +template <> struct is_error_code_enum : std::true_type {}; +} + +#endif diff --git a/include/lld/Core/File.h b/include/lld/Core/File.h new file mode 100644 index 00000000..20418688 --- /dev/null +++ b/include/lld/Core/File.h @@ -0,0 +1,278 @@ +//===- Core/File.h - A Container of Atoms ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_FILE_H +#define LLD_CORE_FILE_H + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include + +namespace lld { + +class LinkingContext; + +/// Every Atom is owned by some File. A common scenario is for a single +/// object file (.o) to be parsed by some reader and produce a single +/// File object that represents the content of that object file. +/// +/// To iterate through the Atoms in a File there are four methods that +/// return collections. For instance to iterate through all the DefinedAtoms +/// in a File object use: +/// for (const DefinedAtoms *atom : file->defined()) { +/// } +/// +/// The Atom objects in a File are owned by the File object. The Atom objects +/// are destroyed when the File object is destroyed. +class File { +public: + virtual ~File(); + + /// \brief Kinds of files that are supported. + enum Kind { + kindErrorObject, ///< a error object file (.o) + kindNormalizedObject, ///< a normalized file (.o) + kindMachObject, ///< a MachO object file (.o) + kindCEntryObject, ///< a file for CEntries + kindHeaderObject, ///< a file for file headers + kindEntryObject, ///< a file for the entry + kindUndefinedSymsObject, ///< a file for undefined symbols + kindStubHelperObject, ///< a file for stub helpers + kindResolverMergedObject, ///< the resolver merged file. + kindSectCreateObject, ///< a sect create object file (.o) + kindSharedLibrary, ///< shared library (.so) + kindArchiveLibrary ///< archive (.a) + }; + + /// \brief Returns file kind. Need for dyn_cast<> on File objects. + Kind kind() const { + return _kind; + } + + /// This returns the path to the file which was used to create this object + /// (e.g. "/tmp/foo.o"). If the file is a member of an archive file, the + /// returned string includes the archive file name. + StringRef path() const { + if (_archivePath.empty()) + return _path; + if (_archiveMemberPath.empty()) + _archiveMemberPath = (_archivePath + "(" + _path + ")").str(); + return _archiveMemberPath; + } + + /// Returns the path of the archive file name if this file is instantiated + /// from an archive file. Otherwise returns the empty string. + StringRef archivePath() const { return _archivePath; } + void setArchivePath(StringRef path) { _archivePath = path; } + + /// Returns the path name of this file. It doesn't include archive file name. + StringRef memberPath() const { return _path; } + + /// Returns the command line order of the file. + uint64_t ordinal() const { + assert(_ordinal != UINT64_MAX); + return _ordinal; + } + + /// Returns true/false depending on whether an ordinal has been set. + bool hasOrdinal() const { return (_ordinal != UINT64_MAX); } + + /// Sets the command line order of the file. + void setOrdinal(uint64_t ordinal) const { _ordinal = ordinal; } + + /// Returns the ordinal for the next atom to be defined in this file. + uint64_t getNextAtomOrdinalAndIncrement() const { + return _nextAtomOrdinal++; + } + + /// For allocating any objects owned by this File. + llvm::BumpPtrAllocator &allocator() const { + return _allocator; + } + + /// The type of atom mutable container. + template using AtomVector = std::vector>; + + /// The range type for the atoms. + template class AtomRange { + public: + AtomRange(AtomVector &v) : _v(v) {} + AtomRange(const AtomVector &v) : _v(const_cast &>(v)) {} + + typedef std::pointer_to_unary_function&, + const T*> ConstDerefFn; + + typedef std::pointer_to_unary_function&, T*> DerefFn; + + typedef llvm::mapped_iterator::const_iterator, + ConstDerefFn> ConstItTy; + typedef llvm::mapped_iterator::iterator, + DerefFn> ItTy; + + static const T* DerefConst(const OwningAtomPtr &p) { + return p.get(); + } + + static T* Deref(OwningAtomPtr &p) { + return p.get(); + } + + ConstItTy begin() const { + return ConstItTy(_v.begin(), ConstDerefFn(DerefConst)); + } + ConstItTy end() const { + return ConstItTy(_v.end(), ConstDerefFn(DerefConst)); + } + + ItTy begin() { + return ItTy(_v.begin(), DerefFn(Deref)); + } + ItTy end() { + return ItTy(_v.end(), DerefFn(Deref)); + } + + llvm::iterator_range::iterator> owning_ptrs() { + return llvm::make_range(_v.begin(), _v.end()); + } + + llvm::iterator_range::iterator> owning_ptrs() const { + return llvm::make_range(_v.begin(), _v.end()); + } + + bool empty() const { + return _v.empty(); + } + + size_t size() const { + return _v.size(); + } + + const OwningAtomPtr &operator[](size_t idx) const { + return _v[idx]; + } + + OwningAtomPtr &operator[](size_t idx) { + return _v[idx]; + } + + private: + AtomVector &_v; + }; + + /// \brief Must be implemented to return the AtomVector object for + /// all DefinedAtoms in this File. + virtual const AtomRange defined() const = 0; + + /// \brief Must be implemented to return the AtomVector object for + /// all UndefinedAtomw in this File. + virtual const AtomRange undefined() const = 0; + + /// \brief Must be implemented to return the AtomVector object for + /// all SharedLibraryAtoms in this File. + virtual const AtomRange sharedLibrary() const = 0; + + /// \brief Must be implemented to return the AtomVector object for + /// all AbsoluteAtoms in this File. + virtual const AtomRange absolute() const = 0; + + /// Drop all of the atoms owned by this file. This will result in all of + /// the atoms running their destructors. + /// This is required because atoms may be allocated on a BumpPtrAllocator + /// of a different file. We need to destruct all atoms before any files. + virtual void clearAtoms() = 0; + + /// \brief If a file is parsed using a different method than doParse(), + /// one must use this method to set the last error status, so that + /// doParse will not be called twice. Only YAML reader uses this + /// (because YAML reader does not read blobs but structured data). + void setLastError(std::error_code err) { _lastError = err; } + + std::error_code parse(); + + // Usually each file owns a std::unique_ptr. + // However, there's one special case. If a file is an archive file, + // the archive file and its children all shares the same memory buffer. + // This method is used by the ArchiveFile to give its children + // co-ownership of the buffer. + void setSharedMemoryBuffer(std::shared_ptr mb) { + _sharedMemoryBuffer = mb; + } + +protected: + /// \brief only subclasses of File can be instantiated + File(StringRef p, Kind kind) + : _path(p), _kind(kind), _ordinal(UINT64_MAX), + _nextAtomOrdinal(0) {} + + /// \brief Subclasses should override this method to parse the + /// memory buffer passed to this file's constructor. + virtual std::error_code doParse() { return std::error_code(); } + + static AtomVector _noDefinedAtoms; + static AtomVector _noUndefinedAtoms; + static AtomVector _noSharedLibraryAtoms; + static AtomVector _noAbsoluteAtoms; + mutable llvm::BumpPtrAllocator _allocator; + +private: + StringRef _path; + std::string _archivePath; + mutable std::string _archiveMemberPath; + Kind _kind; + mutable uint64_t _ordinal; + mutable uint64_t _nextAtomOrdinal; + std::shared_ptr _sharedMemoryBuffer; + llvm::Optional _lastError; + std::mutex _parseMutex; +}; + +/// An ErrorFile represents a file that doesn't exist. +/// If you try to parse a file which doesn't exist, an instance of this +/// class will be returned. That's parse method always returns an error. +/// This is useful to delay erroring on non-existent files, so that we +/// can do unit testing a driver using non-existing file paths. +class ErrorFile : public File { +public: + ErrorFile(StringRef path, std::error_code ec) + : File(path, kindErrorObject), _ec(ec) {} + + std::error_code doParse() override { return _ec; } + + const AtomRange defined() const override { + llvm_unreachable("internal error"); + } + const AtomRange undefined() const override { + llvm_unreachable("internal error"); + } + const AtomRange sharedLibrary() const override { + llvm_unreachable("internal error"); + } + const AtomRange absolute() const override { + llvm_unreachable("internal error"); + } + + void clearAtoms() override { + } + +private: + std::error_code _ec; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/Instrumentation.h b/include/lld/Core/Instrumentation.h new file mode 100644 index 00000000..16237590 --- /dev/null +++ b/include/lld/Core/Instrumentation.h @@ -0,0 +1,132 @@ +//===- include/Core/Instrumentation.h - Instrumentation API ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide an Instrumentation API that optionally uses VTune interfaces. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_INSTRUMENTATION_H +#define LLD_CORE_INSTRUMENTATION_H + +#include "llvm/Support/Compiler.h" +#include + +#ifdef LLD_HAS_VTUNE +# include +#endif + +namespace lld { +#ifdef LLD_HAS_VTUNE +/// \brief A unique global scope for instrumentation data. +/// +/// Domains last for the lifetime of the application and cannot be destroyed. +/// Multiple Domains created with the same name represent the same domain. +class Domain { + __itt_domain *_domain; + +public: + explicit Domain(const char *name) : _domain(__itt_domain_createA(name)) {} + + operator __itt_domain *() const { return _domain; } + __itt_domain *operator->() const { return _domain; } +}; + +/// \brief A global reference to a string constant. +/// +/// These are uniqued by the ITT runtime and cannot be deleted. They are not +/// specific to a domain. +/// +/// Prefer reusing a single StringHandle over passing a ntbs when the same +/// string will be used often. +class StringHandle { + __itt_string_handle *_handle; + +public: + StringHandle(const char *name) : _handle(__itt_string_handle_createA(name)) {} + + operator __itt_string_handle *() const { return _handle; } +}; + +/// \brief A task on a single thread. Nests within other tasks. +/// +/// Each thread has its own task stack and tasks nest recursively on that stack. +/// A task cannot transfer threads. +/// +/// SBRM is used to ensure task starts and ends are ballanced. The lifetime of +/// a task is either the lifetime of this object, or until end is called. +class ScopedTask { + __itt_domain *_domain; + + ScopedTask(const ScopedTask &) = delete; + ScopedTask &operator=(const ScopedTask &) = delete; + +public: + /// \brief Create a task in Domain \p d named \p s. + ScopedTask(const Domain &d, const StringHandle &s) : _domain(d) { + __itt_task_begin(d, __itt_null, __itt_null, s); + } + + ScopedTask(ScopedTask &&other) { + *this = std::move(other); + } + + ScopedTask &operator=(ScopedTask &&other) { + _domain = other._domain; + other._domain = nullptr; + return *this; + } + + /// \brief Prematurely end this task. + void end() { + if (_domain) + __itt_task_end(_domain); + _domain = nullptr; + } + + ~ScopedTask() { end(); } +}; + +/// \brief A specific point in time. Allows metadata to be associated. +class Marker { +public: + Marker(const Domain &d, const StringHandle &s) { + __itt_marker(d, __itt_null, s, __itt_scope_global); + } +}; +#else +class Domain { +public: + Domain(const char *name) {} +}; + +class StringHandle { +public: + StringHandle(const char *name) {} +}; + +class ScopedTask { +public: + ScopedTask(const Domain &d, const StringHandle &s) {} + void end() {} +}; + +class Marker { +public: + Marker(const Domain &d, const StringHandle &s) {} +}; +#endif + +inline const Domain &getDefaultDomain() { + static Domain domain("org.llvm.lld"); + return domain; +} +} // end namespace lld. + +#endif diff --git a/include/lld/Core/LLVM.h b/include/lld/Core/LLVM.h new file mode 100644 index 00000000..ccf08859 --- /dev/null +++ b/include/lld/Core/LLVM.h @@ -0,0 +1,83 @@ +//===--- LLVM.h - Import various common LLVM datatypes ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file forward declares and imports various common LLVM datatypes that +// lld wants to use unqualified. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_LLVM_H +#define LLD_CORE_LLVM_H + +// This should be the only #include, force #includes of all the others on +// clients. +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/Casting.h" +#include + +namespace llvm { + // ADT's. + class Error; + class StringRef; + class Twine; + class MemoryBuffer; + class MemoryBufferRef; + template class ArrayRef; + template class SmallString; + template class SmallVector; + template class SmallVectorImpl; + + template + struct SaveAndRestore; + + template + class ErrorOr; + + template + class Expected; + + class raw_ostream; + // TODO: DenseMap, ... +} + +namespace lld { + // Casting operators. + using llvm::isa; + using llvm::cast; + using llvm::dyn_cast; + using llvm::dyn_cast_or_null; + using llvm::cast_or_null; + + // ADT's. + using llvm::Error; + using llvm::StringRef; + using llvm::Twine; + using llvm::MemoryBuffer; + using llvm::MemoryBufferRef; + using llvm::ArrayRef; + using llvm::SmallString; + using llvm::SmallVector; + using llvm::SmallVectorImpl; + using llvm::SaveAndRestore; + using llvm::ErrorOr; + using llvm::Expected; + + using llvm::raw_ostream; +} // end namespace lld. + +namespace std { +template <> struct hash { +public: + size_t operator()(const llvm::StringRef &s) const { + return llvm::hash_value(s); + } +}; +} + +#endif diff --git a/include/lld/Core/LinkingContext.h b/include/lld/Core/LinkingContext.h new file mode 100644 index 00000000..7e4edaf2 --- /dev/null +++ b/include/lld/Core/LinkingContext.h @@ -0,0 +1,251 @@ +//===- lld/Core/LinkingContext.h - Linker Target Info Interface -----------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_LINKING_CONTEXT_H +#define LLD_CORE_LINKING_CONTEXT_H + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Node.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Reader.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace lld { +class PassManager; +class File; +class Writer; +class Node; +class SharedLibraryFile; + +/// \brief The LinkingContext class encapsulates "what and how" to link. +/// +/// The base class LinkingContext contains the options needed by core linking. +/// Subclasses of LinkingContext have additional options needed by specific +/// Writers. +class LinkingContext { +public: + virtual ~LinkingContext(); + + /// \name Methods needed by core linking + /// @{ + + /// Name of symbol linker should use as "entry point" to program, + /// usually "main" or "start". + virtual StringRef entrySymbolName() const { return _entrySymbolName; } + + /// Whether core linking should remove Atoms not reachable by following + /// References from the entry point Atom or from all global scope Atoms + /// if globalsAreDeadStripRoots() is true. + bool deadStrip() const { return _deadStrip; } + + /// Only used if deadStrip() returns true. Means all global scope Atoms + /// should be marked live (along with all Atoms they reference). Usually + /// this method returns false for main executables, but true for dynamic + /// shared libraries. + bool globalsAreDeadStripRoots() const { return _globalsAreDeadStripRoots; } + + /// Only used if deadStrip() returns true. This method returns the names + /// of DefinedAtoms that should be marked live (along with all Atoms they + /// reference). Only Atoms with scope scopeLinkageUnit or scopeGlobal can + /// be kept live using this method. + const std::vector &deadStripRoots() const { + return _deadStripRoots; + } + + /// Add the given symbol name to the dead strip root set. Only used if + /// deadStrip() returns true. + void addDeadStripRoot(StringRef symbolName) { + assert(!symbolName.empty() && "Empty symbol cannot be a dead strip root"); + _deadStripRoots.push_back(symbolName); + } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking prints out a list of remaining UndefinedAtoms. + /// + /// \todo This should be a method core linking calls with a list of the + /// UndefinedAtoms so that different drivers can format the error message + /// as needed. + bool printRemainingUndefines() const { return _printRemainingUndefines; } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking considers remaining undefines to be an error. + bool allowRemainingUndefines() const { return _allowRemainingUndefines; } + + /// Normally, every UndefinedAtom must be replaced by a DefinedAtom or a + /// SharedLibraryAtom for the link to be successful. This method controls + /// whether core linking considers remaining undefines from the shared library + /// to be an error. + bool allowShlibUndefines() const { return _allowShlibUndefines; } + + /// If true, core linking will write the path to each input file to stdout + /// (i.e. llvm::outs()) as it is used. This is used to implement the -t + /// linker option. + /// + /// \todo This should be a method core linking calls so that drivers can + /// format the line as needed. + bool logInputFiles() const { return _logInputFiles; } + + /// Parts of LLVM use global variables which are bound to command line + /// options (see llvm::cl::Options). This method returns "command line" + /// options which are used to configure LLVM's command line settings. + /// For instance the -debug-only XXX option can be used to dynamically + /// trace different parts of LLVM and lld. + const std::vector &llvmOptions() const { return _llvmOptions; } + + /// \name Methods used by Drivers to configure TargetInfo + /// @{ + void setOutputPath(StringRef str) { _outputPath = str; } + + // Set the entry symbol name. You may also need to call addDeadStripRoot() for + // the symbol if your platform supports dead-stripping, so that the symbol + // will not be removed from the output. + void setEntrySymbolName(StringRef name) { + _entrySymbolName = name; + } + + void setDeadStripping(bool enable) { _deadStrip = enable; } + void setGlobalsAreDeadStripRoots(bool v) { _globalsAreDeadStripRoots = v; } + void setPrintRemainingUndefines(bool print) { + _printRemainingUndefines = print; + } + void setAllowRemainingUndefines(bool allow) { + _allowRemainingUndefines = allow; + } + void setAllowShlibUndefines(bool allow) { _allowShlibUndefines = allow; } + void setLogInputFiles(bool log) { _logInputFiles = log; } + + void appendLLVMOption(const char *opt) { _llvmOptions.push_back(opt); } + + std::vector> &getNodes() { return _nodes; } + const std::vector> &getNodes() const { return _nodes; } + + /// This method adds undefined symbols specified by the -u option to the to + /// the list of undefined symbols known to the linker. This option essentially + /// forces an undefined symbol to be created. You may also need to call + /// addDeadStripRoot() for the symbol if your platform supports dead + /// stripping, so that the symbol will not be removed from the output. + void addInitialUndefinedSymbol(StringRef symbolName) { + _initialUndefinedSymbols.push_back(symbolName); + } + + /// Iterators for symbols that appear on the command line. + typedef std::vector StringRefVector; + typedef StringRefVector::iterator StringRefVectorIter; + typedef StringRefVector::const_iterator StringRefVectorConstIter; + + /// Create linker internal files containing atoms for the linker to include + /// during link. Flavors can override this function in their LinkingContext + /// to add more internal files. These internal files are positioned before + /// the actual input files. + virtual void createInternalFiles(std::vector > &) const; + + /// Return the list of undefined symbols that are specified in the + /// linker command line, using the -u option. + ArrayRef initialUndefinedSymbols() const { + return _initialUndefinedSymbols; + } + + /// After all set* methods are called, the Driver calls this method + /// to validate that there are no missing options or invalid combinations + /// of options. If there is a problem, a description of the problem + /// is written to the supplied stream. + /// + /// \returns true if there is an error with the current settings. + bool validate(raw_ostream &diagnostics); + + /// Formats symbol name for use in error messages. + virtual std::string demangle(StringRef symbolName) const = 0; + + /// @} + /// \name Methods used by Driver::link() + /// @{ + + /// Returns the file system path to which the linked output should be written. + /// + /// \todo To support in-memory linking, we need an abstraction that allows + /// the linker to write to an in-memory buffer. + StringRef outputPath() const { return _outputPath; } + + /// Accessor for Register object embedded in LinkingContext. + const Registry ®istry() const { return _registry; } + Registry ®istry() { return _registry; } + + /// This method is called by core linking to give the Writer a chance + /// to add file format specific "files" to set of files to be linked. This is + /// how file format specific atoms can be added to the link. + virtual void createImplicitFiles(std::vector> &) = 0; + + /// This method is called by core linking to build the list of Passes to be + /// run on the merged/linked graph of all input files. + virtual void addPasses(PassManager &pm) = 0; + + /// Calls through to the writeFile() method on the specified Writer. + /// + /// \param linkedFile This is the merged/linked graph of all input file Atoms. + virtual llvm::Error writeFile(const File &linkedFile) const; + + /// Return the next ordinal and Increment it. + virtual uint64_t getNextOrdinalAndIncrement() const { return _nextOrdinal++; } + + // This function is called just before the Resolver kicks in. + // Derived classes may use it to change the list of input files. + virtual void finalizeInputFiles() = 0; + + /// Callback invoked for each file the Resolver decides we are going to load. + /// This can be used to update context state based on the file, and emit + /// errors for any differences between the context state and a loaded file. + /// For example, we can error if we try to load a file which is a different + /// arch from that being linked. + virtual llvm::Error handleLoadedFile(File &file) = 0; + + /// @} +protected: + LinkingContext(); // Must be subclassed + + /// Abstract method to lazily instantiate the Writer. + virtual Writer &writer() const = 0; + + /// Method to create an internal file for the entry symbol + virtual std::unique_ptr createEntrySymbolFile() const; + std::unique_ptr createEntrySymbolFile(StringRef filename) const; + + /// Method to create an internal file for an undefined symbol + virtual std::unique_ptr createUndefinedSymbolFile() const; + std::unique_ptr createUndefinedSymbolFile(StringRef filename) const; + + StringRef _outputPath; + StringRef _entrySymbolName; + bool _deadStrip = false; + bool _globalsAreDeadStripRoots = false; + bool _printRemainingUndefines = true; + bool _allowRemainingUndefines = false; + bool _logInputFiles = false; + bool _allowShlibUndefines = false; + std::vector _deadStripRoots; + std::vector _llvmOptions; + StringRefVector _initialUndefinedSymbols; + std::vector> _nodes; + mutable llvm::BumpPtrAllocator _allocator; + mutable uint64_t _nextOrdinal = 0; + Registry _registry; + +private: + /// Validate the subclass bits. Only called by validate. + virtual bool validateImpl(raw_ostream &diagnostics) = 0; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/Core/Node.h b/include/lld/Core/Node.h new file mode 100644 index 00000000..8de0ecdb --- /dev/null +++ b/include/lld/Core/Node.h @@ -0,0 +1,74 @@ +//===- lld/Core/Node.h - Input file class ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// The classes in this file represents inputs to the linker. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_NODE_H +#define LLD_CORE_NODE_H + +#include "lld/Core/File.h" +#include "llvm/Option/ArgList.h" +#include +#include + +namespace lld { + +// A Node represents a FileNode or other type of Node. In the latter case, +// the node contains meta information about the input file list. +// Currently only GroupEnd node is defined as a meta node. +class Node { +public: + enum class Kind { File, GroupEnd }; + explicit Node(Kind type) : _kind(type) {} + virtual ~Node() {} + virtual Kind kind() const { return _kind; } + +private: + Kind _kind; +}; + +// This is a marker for --end-group. getSize() returns the number of +// files between the corresponding --start-group and this marker. +class GroupEnd : public Node { +public: + explicit GroupEnd(int size) : Node(Kind::GroupEnd), _size(size) {} + + int getSize() const { return _size; } + + static bool classof(const Node *a) { + return a->kind() == Kind::GroupEnd; + } + +private: + int _size; +}; + +// A container of File. +class FileNode : public Node { +public: + explicit FileNode(std::unique_ptr f) + : Node(Node::Kind::File), _file(std::move(f)) {} + + static bool classof(const Node *a) { + return a->kind() == Node::Kind::File; + } + + File *getFile() { return _file.get(); } + +protected: + std::unique_ptr _file; +}; + +} // namespace lld + +#endif // LLD_CORE_NODE_H diff --git a/include/lld/Core/Parallel.h b/include/lld/Core/Parallel.h new file mode 100644 index 00000000..2dde97d9 --- /dev/null +++ b/include/lld/Core/Parallel.h @@ -0,0 +1,297 @@ +//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PARALLEL_H +#define LLD_CORE_PARALLEL_H + +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/thread.h" + +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && LLVM_ENABLE_THREADS +#include +#include +#endif + +namespace lld { +/// \brief Allows one or more threads to wait on a potentially unknown number of +/// events. +/// +/// A latch starts at \p count. inc() increments this, and dec() decrements it. +/// All calls to sync() will block while the count is not 0. +/// +/// Calling dec() on a Latch with a count of 0 has undefined behaivor. +class Latch { + uint32_t _count; + mutable std::mutex _condMut; + mutable std::condition_variable _cond; + +public: + explicit Latch(uint32_t count = 0) : _count(count) {} + ~Latch() { sync(); } + + void inc() { + std::unique_lock lock(_condMut); + ++_count; + } + + void dec() { + std::unique_lock lock(_condMut); + if (--_count == 0) + _cond.notify_all(); + } + + void sync() const { + std::unique_lock lock(_condMut); + _cond.wait(lock, [&] { + return _count == 0; + }); + } +}; + +// Classes in this namespace are implementation details of this header. +namespace internal { + +/// \brief An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() = default; + virtual void add(std::function func) = 0; +}; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +class SyncExecutor : public Executor { +public: + virtual void add(std::function func) { + func(); + } +}; + +inline Executor *getDefaultExecutor() { + static SyncExecutor exec; + return &exec; +} +#elif defined(_MSC_VER) +/// \brief An Executor that runs tasks via ConcRT. +class ConcRTExecutor : public Executor { + struct Taskish { + Taskish(std::function task) : _task(task) {} + + std::function _task; + + static void run(void *p) { + Taskish *self = static_cast(p); + self->_task(); + concurrency::Free(self); + } + }; + +public: + virtual void add(std::function func) { + Concurrency::CurrentScheduler::ScheduleTask(Taskish::run, + new (concurrency::Alloc(sizeof(Taskish))) Taskish(func)); + } +}; + +inline Executor *getDefaultExecutor() { + static ConcRTExecutor exec; + return &exec; +} +#else +/// \brief An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor(unsigned threadCount = + std::thread::hardware_concurrency()) + : _stop(false), _done(threadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, threadCount] { + for (std::size_t i = 1; i < threadCount; ++i) { + std::thread([=] { + work(); + }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() override { + std::unique_lock lock(_mutex); + _stop = true; + lock.unlock(); + _cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function f) override { + std::unique_lock lock(_mutex); + _workStack.push(f); + lock.unlock(); + _cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock lock(_mutex); + _cond.wait(lock, [&] { + return _stop || !_workStack.empty(); + }); + if (_stop) + break; + auto task = _workStack.top(); + _workStack.pop(); + lock.unlock(); + task(); + } + _done.dec(); + } + + std::atomic _stop; + std::stack> _workStack; + std::mutex _mutex; + std::condition_variable _cond; + Latch _done; +}; + +inline Executor *getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +#endif + +} // namespace internal + +/// \brief Allows launching a number of tasks and waiting for them to finish +/// either explicitly via sync() or implicitly on destruction. +class TaskGroup { + Latch _latch; + +public: + void spawn(std::function f) { + _latch.inc(); + internal::getDefaultExecutor()->add([&, f] { + f(); + _latch.dec(); + }); + } + + void sync() const { _latch.sync(); } +}; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +template +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits::value_type>()) { + std::sort(start, end, comp); +} +#elif defined(_MSC_VER) +// Use ppl parallel_sort on Windows. +template +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits::value_type>()) { + concurrency::parallel_sort(start, end, comp); +} +#else +namespace detail { +const ptrdiff_t minParallelSize = 1024; + +/// \brief Inclusive median. +template +RandomAccessIterator medianOf3(RandomAccessIterator start, + RandomAccessIterator end, const Comp &comp) { + RandomAccessIterator mid = start + (std::distance(start, end) / 2); + return comp(*start, *(end - 1)) + ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start) + : end - 1) + : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1) + : start); +} + +template +void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp, TaskGroup &tg, size_t depth) { + // Do a sequential sort for small inputs. + if (std::distance(start, end) < detail::minParallelSize || depth == 0) { + std::sort(start, end, comp); + return; + } + + // Partition. + auto pivot = medianOf3(start, end, comp); + // Move pivot to end. + std::swap(*(end - 1), *pivot); + pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) { + return comp(v, *(end - 1)); + }); + // Move pivot to middle of partition. + std::swap(*pivot, *(end - 1)); + + // Recurse. + tg.spawn([=, &comp, &tg] { + parallel_quick_sort(start, pivot, comp, tg, depth - 1); + }); + parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1); +} +} + +template +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits::value_type>()) { + TaskGroup tg; + detail::parallel_quick_sort(start, end, comp, tg, + llvm::Log2_64(std::distance(start, end)) + 1); +} +#endif + +template void parallel_sort(T *start, T *end) { + parallel_sort(start, end, std::less()); +} + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +template +void parallel_for_each(Iterator begin, Iterator end, Func func) { + std::for_each(begin, end, func); +} +#elif defined(_MSC_VER) +// Use ppl parallel_for_each on Windows. +template +void parallel_for_each(Iterator begin, Iterator end, Func func) { + concurrency::parallel_for_each(begin, end, func); +} +#else +template +void parallel_for_each(Iterator begin, Iterator end, Func func) { + TaskGroup tg; + ptrdiff_t taskSize = 1024; + while (taskSize <= std::distance(begin, end)) { + tg.spawn([=, &func] { std::for_each(begin, begin + taskSize, func); }); + begin += taskSize; + } + std::for_each(begin, end, func); +} +#endif +} // end namespace lld + +#endif // LLD_CORE_PARALLEL_H diff --git a/include/lld/Core/Pass.h b/include/lld/Core/Pass.h new file mode 100644 index 00000000..0527f02c --- /dev/null +++ b/include/lld/Core/Pass.h @@ -0,0 +1,46 @@ +//===------ Core/Pass.h - Base class for linker passes --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PASS_H +#define LLD_CORE_PASS_H + +#include "lld/Core/Atom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "llvm/Support/Error.h" +#include + +namespace lld { +class SimpleFile; + +/// Once the core linking is done (which resolves references, coalesces atoms +/// and produces a complete Atom graph), the linker runs a series of passes +/// on the Atom graph. The graph is modeled as a File, which means the pass +/// has access to all the atoms and to File level attributes. Each pass does +/// a particular transformation to the Atom graph or to the File attributes. +/// +/// This is the abstract base class for all passes. A Pass does its +/// actual work in it perform() method. It can iterator over Atoms in the +/// graph using the *begin()/*end() atom iterator of the File. It can add +/// new Atoms to the graph using the File's addAtom() method. +class Pass { +public: + virtual ~Pass() { } + + /// Do the actual work of the Pass. + virtual llvm::Error perform(SimpleFile &mergedFile) = 0; + +protected: + // Only subclassess can be instantiated. + Pass() { } +}; + +} // namespace lld + +#endif // LLD_CORE_PASS_H diff --git a/include/lld/Core/PassManager.h b/include/lld/Core/PassManager.h new file mode 100644 index 00000000..71a25cc7 --- /dev/null +++ b/include/lld/Core/PassManager.h @@ -0,0 +1,48 @@ +//===- lld/Core/PassManager.h - Manage linker passes ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_PASS_MANAGER_H +#define LLD_CORE_PASS_MANAGER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Pass.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace lld { +class SimpleFile; +class Pass; + +/// \brief Owns and runs a collection of passes. +/// +/// This class is currently just a container for passes and a way to run them. +/// +/// In the future this should handle timing pass runs, running parallel passes, +/// and validate/satisfy pass dependencies. +class PassManager { +public: + void add(std::unique_ptr pass) { + _passes.push_back(std::move(pass)); + } + + llvm::Error runOnFile(SimpleFile &file) { + for (std::unique_ptr &pass : _passes) + if (llvm::Error EC = pass->perform(file)) + return EC; + return llvm::Error(); + } + +private: + /// \brief Passes in the order they should run. + std::vector> _passes; +}; +} // end namespace lld + +#endif diff --git a/include/lld/Core/Reader.h b/include/lld/Core/Reader.h new file mode 100644 index 00000000..66df4380 --- /dev/null +++ b/include/lld/Core/Reader.h @@ -0,0 +1,157 @@ +//===- lld/Core/Reader.h - Abstract File Format Reading Interface ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_READER_H +#define LLD_CORE_READER_H + +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +using llvm::sys::fs::file_magic; + +namespace llvm { +namespace yaml { +class IO; +} +} + +namespace lld { +class File; +class LinkingContext; +class MachOLinkingContext; + +/// \brief An abstract class for reading object files, library files, and +/// executable files. +/// +/// Each file format (e.g. mach-o, etc) has a concrete subclass of Reader. +class Reader { +public: + virtual ~Reader() {} + + /// Sniffs the file to determine if this Reader can parse it. + /// The method is called with: + /// 1) the file_magic enumeration returned by identify_magic() + /// 2) the whole file content buffer if the above is not enough. + virtual bool canParse(file_magic magic, MemoryBufferRef mb) const = 0; + + /// \brief Parse a supplied buffer (already filled with the contents of a + /// file) and create a File object. + /// The resulting File object takes ownership of the MemoryBuffer. + virtual ErrorOr> + loadFile(std::unique_ptr mb, const class Registry &) const = 0; +}; + + +/// \brief An abstract class for handling alternate yaml representations +/// of object files. +/// +/// The YAML syntax allows "tags" which are used to specify the type of +/// the YAML node. In lld, top level YAML documents can be in many YAML +/// representations (e.g mach-o encoded as yaml, etc). A tag is used to +/// specify which representation is used in the following YAML document. +/// To work, there must be a YamlIOTaggedDocumentHandler registered that +/// handles each tag type. +class YamlIOTaggedDocumentHandler { +public: + virtual ~YamlIOTaggedDocumentHandler(); + + /// This method is called on each registered YamlIOTaggedDocumentHandler + /// until one returns true. If the subclass handles tag type !xyz, then + /// this method should call io.mapTag("!xzy") to see if that is the current + /// document type, and if so, process the rest of the document using + /// YAML I/O, then convert the result into an lld::File* and return it. + virtual bool handledDocTag(llvm::yaml::IO &io, const lld::File *&f) const = 0; +}; + + +/// A registry to hold the list of currently registered Readers and +/// tables which map Reference kind values to strings. +/// The linker does not directly invoke Readers. Instead, it registers +/// Readers based on it configuration and command line options, then calls +/// the Registry object to parse files. +class Registry { +public: + Registry(); + + /// Walk the list of registered Readers and find one that can parse the + /// supplied file and parse it. + ErrorOr> + loadFile(std::unique_ptr mb) const; + + /// Walk the list of registered kind tables to convert a Reference Kind + /// name to a value. + bool referenceKindFromString(StringRef inputStr, Reference::KindNamespace &ns, + Reference::KindArch &a, + Reference::KindValue &value) const; + + /// Walk the list of registered kind tables to convert a Reference Kind + /// value to a string. + bool referenceKindToString(Reference::KindNamespace ns, Reference::KindArch a, + Reference::KindValue value, StringRef &) const; + + /// Walk the list of registered tag handlers and have the one that handles + /// the current document type process the yaml into an lld::File*. + bool handleTaggedDoc(llvm::yaml::IO &io, const lld::File *&file) const; + + // These methods are called to dynamically add support for various file + // formats. The methods are also implemented in the appropriate lib*.a + // library, so that the code for handling a format is only linked in, if this + // method is used. Any options that a Reader might need must be passed + // as parameters to the addSupport*() method. + void addSupportArchives(bool logLoading); + void addSupportYamlFiles(); + void addSupportMachOObjects(MachOLinkingContext &); + + /// To convert between kind values and names, the registry walks the list + /// of registered kind tables. Each table is a zero terminated array of + /// KindStrings elements. + struct KindStrings { + Reference::KindValue value; + StringRef name; + }; + + /// A Reference Kind value is a tuple of . All + /// entries in a conversion table have the same . The + /// array then contains the value/name pairs. + void addKindTable(Reference::KindNamespace ns, Reference::KindArch arch, + const KindStrings array[]); + + +private: + struct KindEntry { + Reference::KindNamespace ns; + Reference::KindArch arch; + const KindStrings *array; + }; + + void add(std::unique_ptr); + void add(std::unique_ptr); + + std::vector> _readers; + std::vector> _yamlHandlers; + std::vector _kindEntries; +}; + +// Utilities for building a KindString table. For instance: +// static const Registry::KindStrings table[] = { +// LLD_KIND_STRING_ENTRY(R_VAX_ADDR16), +// LLD_KIND_STRING_ENTRY(R_VAX_DATA16), +// LLD_KIND_STRING_END +// }; +#define LLD_KIND_STRING_ENTRY(name) { name, #name } +#define LLD_KIND_STRING_END { 0, "" } + +} // end namespace lld + +#endif diff --git a/include/lld/Core/Reference.h b/include/lld/Core/Reference.h new file mode 100644 index 00000000..86de4f6a --- /dev/null +++ b/include/lld/Core/Reference.h @@ -0,0 +1,119 @@ +//===- Core/References.h - A Reference to Another Atom --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_REFERENCES_H +#define LLD_CORE_REFERENCES_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringSwitch.h" + +namespace lld { +class Atom; + +/// +/// The linker has a Graph Theory model of linking. An object file is seen +/// as a set of Atoms with References to other Atoms. Each Atom is a node +/// and each Reference is an edge. +/// +/// For example if a function contains a call site to "malloc" 40 bytes into +/// the Atom, then the function Atom will have a Reference of: offsetInAtom=40, +/// kind=callsite, target=malloc, addend=0. +/// +/// Besides supporting traditional "relocations", references are also used +/// forcing layout (one atom must follow another), marking data-in-code +/// (jump tables or ARM constants), etc. +/// +/// The "kind" of a reference is a tuple of . This +/// enable us to re-use existing relocation types definded for various +/// file formats and architectures. +/// +/// References and atoms form a directed graph. The dead-stripping pass +/// traverses them starting from dead-strip root atoms to garbage collect +/// unreachable ones. +/// +/// References of any kind are considered as directed edges. In addition to +/// that, references of some kind is considered as bidirected edges. +class Reference { +public: + /// Which universe defines the kindValue(). + enum class KindNamespace { + all = 0, + testing = 1, + mach_o = 2, + }; + + KindNamespace kindNamespace() const { return (KindNamespace)_kindNamespace; } + void setKindNamespace(KindNamespace ns) { _kindNamespace = (uint8_t)ns; } + + // Which architecture the kind value is for. + enum class KindArch { all, AArch64, ARM, x86, x86_64}; + + KindArch kindArch() const { return (KindArch)_kindArch; } + void setKindArch(KindArch a) { _kindArch = (uint8_t)a; } + + typedef uint16_t KindValue; + + KindValue kindValue() const { return _kindValue; } + + /// setKindValue() is needed because during linking, some optimizations may + /// change the codegen and hence the reference kind. + void setKindValue(KindValue value) { + _kindValue = value; + } + + /// KindValues used with KindNamespace::all and KindArch::all. + enum { + // kindLayoutAfter is treated as a bidirected edge by the dead-stripping + // pass. + kindLayoutAfter = 1, + kindAssociate, + }; + + // A value to be added to the value of a target + typedef int64_t Addend; + + /// If the reference is a fixup in the Atom, then this returns the + /// byte offset into the Atom's content to do the fix up. + virtual uint64_t offsetInAtom() const = 0; + + /// Returns the atom this reference refers to. + virtual const Atom *target() const = 0; + + /// During linking, the linker may merge graphs which coalesces some nodes + /// (i.e. Atoms). To switch the target of a reference, this method is called. + virtual void setTarget(const Atom *) = 0; + + /// Some relocations require a symbol and a value (e.g. foo + 4). + virtual Addend addend() const = 0; + + /// During linking, some optimzations may change addend value. + virtual void setAddend(Addend) = 0; + + /// Returns target specific attributes of the reference. + virtual uint32_t tag() const { return 0; } + +protected: + /// Reference is an abstract base class. Only subclasses can use constructor. + Reference(KindNamespace ns, KindArch a, KindValue value) + : _kindValue(value), _kindNamespace((uint8_t)ns), _kindArch((uint8_t)a) {} + + /// The memory for Reference objects is always managed by the owning File + /// object. Therefore, no one but the owning File object should call + /// delete on an Reference. In fact, some File objects may bulk allocate + /// an array of References, so they cannot be individually deleted by anyone. + virtual ~Reference() {} + + KindValue _kindValue; + uint8_t _kindNamespace; + uint8_t _kindArch; +}; + +} // namespace lld + +#endif // LLD_CORE_REFERENCES_H diff --git a/include/lld/Core/Resolver.h b/include/lld/Core/Resolver.h new file mode 100644 index 00000000..fb62a779 --- /dev/null +++ b/include/lld/Core/Resolver.h @@ -0,0 +1,106 @@ +//===- Core/Resolver.h - Resolves Atom References -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_RESOLVER_H +#define LLD_CORE_RESOLVER_H + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "lld/Core/SymbolTable.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/ErrorOr.h" +#include +#include +#include +#include + +namespace lld { + +class Atom; +class LinkingContext; + +/// \brief The Resolver is responsible for merging all input object files +/// and producing a merged graph. +class Resolver { +public: + Resolver(LinkingContext &ctx) : _ctx(ctx), _result(new MergedFile()) {} + + // InputFiles::Handler methods + void doDefinedAtom(OwningAtomPtr atom); + bool doUndefinedAtom(OwningAtomPtr atom); + void doSharedLibraryAtom(OwningAtomPtr atom); + void doAbsoluteAtom(OwningAtomPtr atom); + + // Handle files, this adds atoms from the current file thats + // being processed by the resolver + llvm::Expected handleFile(File &); + + // Handle an archive library file. + llvm::Expected handleArchiveFile(File &); + + // Handle a shared library file. + llvm::Error handleSharedLibrary(File &); + + /// @brief do work of merging and resolving and return list + bool resolve(); + + std::unique_ptr resultFile() { return std::move(_result); } + +private: + typedef std::function(StringRef)> UndefCallback; + + bool undefinesAdded(int begin, int end); + File *getFile(int &index); + + /// \brief The main function that iterates over the files to resolve + bool resolveUndefines(); + void updateReferences(); + void deadStripOptimize(); + bool checkUndefines(); + void removeCoalescedAwayAtoms(); + llvm::Expected forEachUndefines(File &file, UndefCallback callback); + + void markLive(const Atom *atom); + + class MergedFile : public SimpleFile { + public: + MergedFile() : SimpleFile("", kindResolverMergedObject) {} + void addAtoms(llvm::MutableArrayRef> atoms); + }; + + LinkingContext &_ctx; + SymbolTable _symbolTable; + std::vector> _atoms; + std::set _deadStripRoots; + llvm::DenseSet _liveAtoms; + llvm::DenseSet _deadAtoms; + std::unique_ptr _result; + std::unordered_multimap _reverseRef; + + // --start-group and --end-group + std::vector _files; + std::map _newUndefinesAdded; + + // List of undefined symbols. + std::vector _undefines; + + // Start position in _undefines for each archive/shared library file. + // Symbols from index 0 to the start position are already searched before. + // Searching them again would never succeed. When we look for undefined + // symbols from an archive/shared library file, start from its start + // position to save time. + std::map _undefineIndex; +}; + +} // namespace lld + +#endif // LLD_CORE_RESOLVER_H diff --git a/include/lld/Core/SharedLibraryAtom.h b/include/lld/Core/SharedLibraryAtom.h new file mode 100644 index 00000000..7fec7a3e --- /dev/null +++ b/include/lld/Core/SharedLibraryAtom.h @@ -0,0 +1,53 @@ +//===- Core/SharedLibraryAtom.h - A Shared Library Atom -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SHARED_LIBRARY_ATOM_H +#define LLD_CORE_SHARED_LIBRARY_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// A SharedLibraryAtom has no content. +/// It exists to represent a symbol which will be bound at runtime. +class SharedLibraryAtom : public Atom { +public: + enum class Type : uint32_t { + Unknown, + Code, + Data, + }; + + /// Returns shared library name used to load it at runtime. + /// On Darwin it is the LC_DYLIB_LOAD dylib name. + virtual StringRef loadName() const = 0; + + /// Returns if shared library symbol can be missing at runtime and if + /// so the loader should silently resolve address of symbol to be nullptr. + virtual bool canBeNullAtRuntime() const = 0; + + virtual Type type() const = 0; + + virtual uint64_t size() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionSharedLibrary; + } + + static inline bool classof(const SharedLibraryAtom *) { return true; } + +protected: + SharedLibraryAtom() : Atom(definitionSharedLibrary) {} + + ~SharedLibraryAtom() override = default; +}; + +} // namespace lld + +#endif // LLD_CORE_SHARED_LIBRARY_ATOM_H diff --git a/include/lld/Core/SharedLibraryFile.h b/include/lld/Core/SharedLibraryFile.h new file mode 100644 index 00000000..53bf967b --- /dev/null +++ b/include/lld/Core/SharedLibraryFile.h @@ -0,0 +1,70 @@ +//===- Core/SharedLibraryFile.h - Models shared libraries as Atoms --------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SHARED_LIBRARY_FILE_H +#define LLD_CORE_SHARED_LIBRARY_FILE_H + +#include "lld/Core/File.h" + +namespace lld { + +/// +/// The SharedLibraryFile subclass of File is used to represent dynamic +/// shared libraries being linked against. +/// +class SharedLibraryFile : public File { +public: + static bool classof(const File *f) { + return f->kind() == kindSharedLibrary; + } + + /// Check if the shared library exports a symbol with the specified name. + /// If so, return a SharedLibraryAtom which represents that exported + /// symbol. Otherwise return nullptr. + virtual OwningAtomPtr exports(StringRef name) const = 0; + + // Returns the install name. + virtual StringRef getDSOName() const = 0; + + const AtomRange defined() const override { + return _definedAtoms; + } + + const AtomRange undefined() const override { + return _undefinedAtoms; + } + + const AtomRange sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const AtomRange absolute() const override { + return _absoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _undefinedAtoms.clear(); + _sharedLibraryAtoms.clear(); + _absoluteAtoms.clear(); + } + +protected: + /// only subclasses of SharedLibraryFile can be instantiated + explicit SharedLibraryFile(StringRef path) : File(path, kindSharedLibrary) {} + + AtomVector _definedAtoms; + AtomVector _undefinedAtoms; + AtomVector _sharedLibraryAtoms; + AtomVector _absoluteAtoms; +}; + +} // namespace lld + +#endif // LLD_CORE_SHARED_LIBRARY_FILE_H diff --git a/include/lld/Core/Simple.h b/include/lld/Core/Simple.h new file mode 100644 index 00000000..f75b4032 --- /dev/null +++ b/include/lld/Core/Simple.h @@ -0,0 +1,324 @@ +//===- lld/Core/Simple.h - Simple implementations of Atom and File --------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provide simple implementations for Atoms and File. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SIMPLE_H +#define LLD_CORE_SIMPLE_H + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Reference.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include + +namespace lld { + +class SimpleFile : public File { +public: + SimpleFile(StringRef path, File::Kind kind) + : File(path, kind) {} + + ~SimpleFile() override { + _defined.clear(); + _undefined.clear(); + _shared.clear(); + _absolute.clear(); + } + + void addAtom(DefinedAtom &a) { + _defined.push_back(OwningAtomPtr(&a)); + } + void addAtom(UndefinedAtom &a) { + _undefined.push_back(OwningAtomPtr(&a)); + } + void addAtom(SharedLibraryAtom &a) { + _shared.push_back(OwningAtomPtr(&a)); + } + void addAtom(AbsoluteAtom &a) { + _absolute.push_back(OwningAtomPtr(&a)); + } + + void addAtom(const Atom &atom) { + if (auto *p = dyn_cast(&atom)) { + addAtom(const_cast(*p)); + } else if (auto *p = dyn_cast(&atom)) { + addAtom(const_cast(*p)); + } else if (auto *p = dyn_cast(&atom)) { + addAtom(const_cast(*p)); + } else if (auto *p = dyn_cast(&atom)) { + addAtom(const_cast(*p)); + } else { + llvm_unreachable("atom has unknown definition kind"); + } + } + + void removeDefinedAtomsIf(std::function pred) { + auto &atoms = _defined; + auto newEnd = std::remove_if(atoms.begin(), atoms.end(), + [&pred](OwningAtomPtr &p) { + return pred(p.get()); + }); + atoms.erase(newEnd, atoms.end()); + } + + const AtomRange defined() const override { return _defined; } + + const AtomRange undefined() const override { + return _undefined; + } + + const AtomRange sharedLibrary() const override { + return _shared; + } + + const AtomRange absolute() const override { + return _absolute; + } + + void clearAtoms() override { + _defined.clear(); + _undefined.clear(); + _shared.clear(); + _absolute.clear(); + } + +private: + AtomVector _defined; + AtomVector _undefined; + AtomVector _shared; + AtomVector _absolute; +}; + +class SimpleReference : public Reference { +public: + SimpleReference(Reference::KindNamespace ns, Reference::KindArch arch, + Reference::KindValue value, uint64_t off, const Atom *t, + Reference::Addend a) + : Reference(ns, arch, value), _target(t), _offsetInAtom(off), _addend(a), + _next(nullptr), _prev(nullptr) { + } + SimpleReference() + : Reference(Reference::KindNamespace::all, Reference::KindArch::all, 0), + _target(nullptr), _offsetInAtom(0), _addend(0), _next(nullptr), + _prev(nullptr) { + } + + uint64_t offsetInAtom() const override { return _offsetInAtom; } + + const Atom *target() const override { + assert(_target); + return _target; + } + + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const Atom *newAtom) override { _target = newAtom; } + SimpleReference *getNext() const { return _next; } + SimpleReference *getPrev() const { return _prev; } + void setNext(SimpleReference *n) { _next = n; } + void setPrev(SimpleReference *p) { _prev = p; } + +private: + const Atom *_target; + uint64_t _offsetInAtom; + Addend _addend; + SimpleReference *_next; + SimpleReference *_prev; +}; + +} // end namespace lld + +// ilist will lazily create a sentinal (so end() can return a node past the +// end of the list). We need this trait so that the sentinal is allocated +// via the BumpPtrAllocator. +namespace llvm { + +template<> +struct ilist_sentinel_traits { + + ilist_sentinel_traits() : _allocator(nullptr) { } + + void setAllocator(llvm::BumpPtrAllocator *alloc) { + _allocator = alloc; + } + + lld::SimpleReference *createSentinel() const { + return new (*_allocator) lld::SimpleReference(); + } + + static void destroySentinel(lld::SimpleReference*) {} + + static lld::SimpleReference *provideInitialHead() { return nullptr; } + + lld::SimpleReference *ensureHead(lld::SimpleReference *&head) const { + if (!head) { + head = createSentinel(); + noteHead(head, head); + ilist_traits::setNext(head, nullptr); + return head; + } + return ilist_traits::getPrev(head); + } + + void noteHead(lld::SimpleReference *newHead, + lld::SimpleReference *sentinel) const { + ilist_traits::setPrev(newHead, sentinel); + } + +private: + mutable llvm::BumpPtrAllocator *_allocator; +}; + +} // end namespace llvm + +namespace lld { + +class SimpleDefinedAtom : public DefinedAtom { +public: + explicit SimpleDefinedAtom(const File &f) + : _file(f), _ordinal(f.getNextAtomOrdinalAndIncrement()) { + _references.setAllocator(&f.allocator()); + } + + ~SimpleDefinedAtom() override { + _references.clearAndLeakNodesUnsafely(); + } + + const File &file() const override { return _file; } + + StringRef name() const override { return StringRef(); } + + uint64_t ordinal() const override { return _ordinal; } + + Scope scope() const override { return DefinedAtom::scopeLinkageUnit; } + + Interposable interposable() const override { + return DefinedAtom::interposeNo; + } + + Merge merge() const override { return DefinedAtom::mergeNo; } + + Alignment alignment() const override { return 1; } + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionBasedOnContent; + } + + StringRef customSectionName() const override { return StringRef(); } + DeadStripKind deadStrip() const override { + return DefinedAtom::deadStripNormal; + } + + DefinedAtom::reference_iterator begin() const override { + const void *it = reinterpret_cast(&*_references.begin()); + return reference_iterator(*this, it); + } + + DefinedAtom::reference_iterator end() const override { + const void *it = reinterpret_cast(&*_references.end()); + return reference_iterator(*this, it); + } + + const Reference *derefIterator(const void *it) const override { + return reinterpret_cast(it); + } + + void incrementIterator(const void *&it) const override { + const SimpleReference* node = reinterpret_cast(it); + const SimpleReference* next = node->getNext(); + it = reinterpret_cast(next); + } + + void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) override { + assert(target && "trying to create reference to nothing"); + auto node = new (_file.allocator()) + SimpleReference(ns, arch, kindValue, off, target, a); + _references.push_back(node); + } + + /// Sort references in a canonical order (by offset, then by kind). + void sortReferences() const { + // Cannot sort a linked list, so move elements into a temporary vector, + // sort the vector, then reconstruct the list. + llvm::SmallVector elements; + for (SimpleReference &node : _references) { + elements.push_back(&node); + } + std::sort(elements.begin(), elements.end(), + [] (const SimpleReference *lhs, const SimpleReference *rhs) -> bool { + uint64_t lhsOffset = lhs->offsetInAtom(); + uint64_t rhsOffset = rhs->offsetInAtom(); + if (rhsOffset != lhsOffset) + return (lhsOffset < rhsOffset); + if (rhs->kindNamespace() != lhs->kindNamespace()) + return (lhs->kindNamespace() < rhs->kindNamespace()); + if (rhs->kindArch() != lhs->kindArch()) + return (lhs->kindArch() < rhs->kindArch()); + return (lhs->kindValue() < rhs->kindValue()); + }); + _references.clearAndLeakNodesUnsafely(); + for (SimpleReference *node : elements) { + _references.push_back(node); + } + } + + void setOrdinal(uint64_t ord) { _ordinal = ord; } + +private: + typedef llvm::ilist RefList; + + const File &_file; + uint64_t _ordinal; + mutable RefList _references; +}; + +class SimpleUndefinedAtom : public UndefinedAtom { +public: + SimpleUndefinedAtom(const File &f, StringRef name) : _file(f), _name(name) { + assert(!name.empty() && "UndefinedAtoms must have a name"); + } + + ~SimpleUndefinedAtom() override = default; + + /// file - returns the File that produced/owns this Atom + const File &file() const override { return _file; } + + /// name - The name of the atom. For a function atom, it is the (mangled) + /// name of the function. + StringRef name() const override { return _name; } + + CanBeNull canBeNull() const override { return UndefinedAtom::canBeNullNever; } + +private: + const File &_file; + StringRef _name; +}; + +} // end namespace lld + +#endif // LLD_CORE_SIMPLE_H diff --git a/include/lld/Core/SymbolTable.h b/include/lld/Core/SymbolTable.h new file mode 100644 index 00000000..db610ad1 --- /dev/null +++ b/include/lld/Core/SymbolTable.h @@ -0,0 +1,106 @@ +//===- Core/SymbolTable.h - Main Symbol Table -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_SYMBOL_TABLE_H +#define LLD_CORE_SYMBOL_TABLE_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include +#include +#include + +namespace lld { + +class AbsoluteAtom; +class Atom; +class DefinedAtom; +class LinkingContext; +class ResolverOptions; +class SharedLibraryAtom; +class UndefinedAtom; + +/// \brief The SymbolTable class is responsible for coalescing atoms. +/// +/// All atoms coalescable by-name or by-content should be added. +/// The method replacement() can be used to find the replacement atom +/// if an atom has been coalesced away. +class SymbolTable { +public: + /// @brief add atom to symbol table + bool add(const DefinedAtom &); + + /// @brief add atom to symbol table + bool add(const UndefinedAtom &); + + /// @brief add atom to symbol table + bool add(const SharedLibraryAtom &); + + /// @brief add atom to symbol table + bool add(const AbsoluteAtom &); + + /// @brief checks if name is in symbol table and if so atom is not + /// UndefinedAtom + bool isDefined(StringRef sym); + + /// @brief returns atom in symbol table for specified name (or nullptr) + const Atom *findByName(StringRef sym); + + /// @brief returns vector of remaining UndefinedAtoms + std::vector undefines(); + + /// returns vector of tentative definitions + std::vector tentativeDefinitions(); + + /// @brief add atom to replacement table + void addReplacement(const Atom *replaced, const Atom *replacement); + + /// @brief if atom has been coalesced away, return replacement, else return atom + const Atom *replacement(const Atom *); + + /// @brief if atom has been coalesced away, return true + bool isCoalescedAway(const Atom *); + +private: + typedef llvm::DenseMap AtomToAtom; + + struct StringRefMappingInfo { + static StringRef getEmptyKey() { return StringRef(); } + static StringRef getTombstoneKey() { return StringRef(" ", 1); } + static unsigned getHashValue(StringRef const val) { + return llvm::HashString(val); + } + static bool isEqual(StringRef const lhs, StringRef const rhs) { + return lhs.equals(rhs); + } + }; + typedef llvm::DenseMap NameToAtom; + + struct AtomMappingInfo { + static const DefinedAtom * getEmptyKey() { return nullptr; } + static const DefinedAtom * getTombstoneKey() { return (DefinedAtom*)(-1); } + static unsigned getHashValue(const DefinedAtom * const Val); + static bool isEqual(const DefinedAtom * const LHS, + const DefinedAtom * const RHS); + }; + typedef llvm::DenseSet AtomContentSet; + + bool addByName(const Atom &); + bool addByContent(const DefinedAtom &); + + AtomToAtom _replacedAtoms; + NameToAtom _nameTable; + AtomContentSet _contentTable; +}; + +} // namespace lld + +#endif // LLD_CORE_SYMBOL_TABLE_H diff --git a/include/lld/Core/TODO.txt b/include/lld/Core/TODO.txt new file mode 100644 index 00000000..8b523045 --- /dev/null +++ b/include/lld/Core/TODO.txt @@ -0,0 +1,17 @@ +include/lld/Core +~~~~~~~~~~~~~~~~ + +* The yaml reader/writer interfaces should be changed to return + an explanatory string if there is an error. The existing error_code + abstraction only works for returning low level OS errors. It does not + work for describing formatting issues. + +* We need to design a diagnostics interface. It would be nice to share code + with Clang_ where possible. + +* We need to add more attributes to File. In particular, we need cpu + and OS information (like target triples). We should also provide explicit + support for `LLVM IR module flags metadata`__. + +.. __: http://llvm.org/docs/LangRef.html#module_flags +.. _Clang: http://clang.llvm.org/docs/InternalsManual.html#Diagnostics diff --git a/include/lld/Core/UndefinedAtom.h b/include/lld/Core/UndefinedAtom.h new file mode 100644 index 00000000..f45d6ecd --- /dev/null +++ b/include/lld/Core/UndefinedAtom.h @@ -0,0 +1,68 @@ +//===- Core/UndefinedAtom.h - An Undefined Atom ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_UNDEFINED_ATOM_H +#define LLD_CORE_UNDEFINED_ATOM_H + +#include "lld/Core/Atom.h" + +namespace lld { + +/// An UndefinedAtom has no content. +/// It exists as a placeholder for a future atom. +class UndefinedAtom : public Atom { +public: + /// Whether this undefined symbol needs to be resolved, + /// or whether it can just evaluate to nullptr. + /// This concept is often called "weak", but that term + /// is overloaded to mean other things too. + enum CanBeNull { + /// Normal symbols must be resolved at build time + canBeNullNever, + + /// This symbol can be missing at runtime and will evalute to nullptr. + /// That is, the static linker still must find a definition (usually + /// is some shared library), but at runtime, the dynamic loader + /// will allow the symbol to be missing and resolved to nullptr. + /// + /// On Darwin this is generated using a function prototype with + /// __attribute__((weak_import)). + /// On linux this is generated using a function prototype with + /// __attribute__((weak)). + /// On Windows this feature is not supported. + canBeNullAtRuntime, + + /// This symbol can be missing at build time. + /// That is, the static linker will not error if a definition for + /// this symbol is not found at build time. Instead, the linker + /// will build an executable that lets the dynamic loader find the + /// symbol at runtime. + /// This feature is not supported on Darwin nor Windows. + /// On linux this is generated using a function prototype with + /// __attribute__((weak)). + canBeNullAtBuildtime + }; + + virtual CanBeNull canBeNull() const = 0; + + static bool classof(const Atom *a) { + return a->definition() == definitionUndefined; + } + + static bool classof(const UndefinedAtom *) { return true; } + +protected: + UndefinedAtom() : Atom(definitionUndefined) {} + + ~UndefinedAtom() override = default; +}; + +} // namespace lld + +#endif // LLD_CORE_UNDEFINED_ATOM_H diff --git a/include/lld/Core/Writer.h b/include/lld/Core/Writer.h new file mode 100644 index 00000000..216f9349 --- /dev/null +++ b/include/lld/Core/Writer.h @@ -0,0 +1,47 @@ +//===- lld/Core/Writer.h - Abstract File Format Interface -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_CORE_WRITER_H +#define LLD_CORE_WRITER_H + +#include "lld/Core/LLVM.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace lld { +class File; +class LinkingContext; +class MachOLinkingContext; + +/// \brief The Writer is an abstract class for writing object files, shared +/// library files, and executable files. Each file format (e.g. mach-o, etc) +/// has a concrete subclass of Writer. +class Writer { +public: + virtual ~Writer(); + + /// \brief Write a file from the supplied File object + virtual llvm::Error writeFile(const File &linkedFile, StringRef path) = 0; + + /// \brief This method is called by Core Linking to give the Writer a chance + /// to add file format specific "files" to set of files to be linked. This is + /// how file format specific atoms can be added to the link. + virtual void createImplicitFiles(std::vector> &) {} + +protected: + // only concrete subclasses can be instantiated + Writer(); +}; + +std::unique_ptr createWriterMachO(const MachOLinkingContext &); +std::unique_ptr createWriterYAML(const LinkingContext &); +} // end namespace lld + +#endif diff --git a/include/lld/Driver/Driver.h b/include/lld/Driver/Driver.h new file mode 100644 index 00000000..312f4f81 --- /dev/null +++ b/include/lld/Driver/Driver.h @@ -0,0 +1,32 @@ +//===- lld/Driver/Driver.h - Linker Driver Emulator -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_DRIVER_DRIVER_H +#define LLD_DRIVER_DRIVER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace coff { +bool link(llvm::ArrayRef Args); +} + +namespace elf { +bool link(llvm::ArrayRef Args, + llvm::raw_ostream &Diag = llvm::errs()); +} + +namespace mach_o { +bool link(llvm::ArrayRef Args, + llvm::raw_ostream &Diag = llvm::errs()); +} +} + +#endif diff --git a/include/lld/ReaderWriter/MachOLinkingContext.h b/include/lld/ReaderWriter/MachOLinkingContext.h new file mode 100644 index 00000000..7b673f0d --- /dev/null +++ b/include/lld/ReaderWriter/MachOLinkingContext.h @@ -0,0 +1,504 @@ +//===- lld/ReaderWriter/MachOLinkingContext.h -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H +#define LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +#include + +using llvm::MachO::HeaderFileType; + +namespace lld { + +namespace mach_o { +class ArchHandler; +class MachODylibFile; +class MachOFile; +class SectCreateFile; +} + +class MachOLinkingContext : public LinkingContext { +public: + MachOLinkingContext(); + ~MachOLinkingContext() override; + + enum Arch { + arch_unknown, + arch_ppc, + arch_x86, + arch_x86_64, + arch_armv6, + arch_armv7, + arch_armv7s, + arch_arm64, + }; + + enum class OS { + unknown, + macOSX, + iOS, + iOS_simulator + }; + + enum class ExportMode { + globals, // Default, all global symbols exported. + whiteList, // -exported_symbol[s_list], only listed symbols exported. + blackList // -unexported_symbol[s_list], no listed symbol exported. + }; + + enum class DebugInfoMode { + addDebugMap, // Default + noDebugMap // -S option + }; + + enum class UndefinedMode { + error, + warning, + suppress, + dynamicLookup + }; + + enum ObjCConstraint { + objc_unknown = 0, + objc_supports_gc = 2, + objc_gc_only = 4, + // Image optimized by dyld = 8 + // GC compaction = 16 + objc_retainReleaseForSimulator = 32, + objc_retainRelease + }; + + /// Initializes the context to sane default values given the specified output + /// file type, arch, os, and minimum os version. This should be called before + /// other setXXX() methods. + void configure(HeaderFileType type, Arch arch, OS os, uint32_t minOSVersion, + bool exportDynamicSymbols); + + void addPasses(PassManager &pm) override; + bool validateImpl(raw_ostream &diagnostics) override; + std::string demangle(StringRef symbolName) const override; + + void createImplicitFiles(std::vector> &) override; + + /// Creates a new file which is owned by the context. Returns a pointer to + /// the new file. + template + typename std::enable_if::value, T *>::type + make_file(Args &&... args) const { + auto file = std::unique_ptr(new T(std::forward(args)...)); + auto *filePtr = file.get(); + auto *ctx = const_cast(this); + ctx->getNodes().push_back(llvm::make_unique(std::move(file))); + return filePtr; + } + + uint32_t getCPUType() const; + uint32_t getCPUSubType() const; + + bool addEntryPointLoadCommand() const; + bool addUnixThreadLoadCommand() const; + bool outputTypeHasEntry() const; + bool is64Bit() const; + + virtual uint64_t pageZeroSize() const { return _pageZeroSize; } + virtual uint64_t pageSize() const { return _pageSize; } + + mach_o::ArchHandler &archHandler() const; + + HeaderFileType outputMachOType() const { return _outputMachOType; } + + Arch arch() const { return _arch; } + StringRef archName() const { return nameFromArch(_arch); } + OS os() const { return _os; } + + ExportMode exportMode() const { return _exportMode; } + void setExportMode(ExportMode mode) { _exportMode = mode; } + void addExportSymbol(StringRef sym); + bool exportRestrictMode() const { return _exportMode != ExportMode::globals; } + bool exportSymbolNamed(StringRef sym) const; + + DebugInfoMode debugInfoMode() const { return _debugInfoMode; } + void setDebugInfoMode(DebugInfoMode mode) { + _debugInfoMode = mode; + } + + void appendOrderedSymbol(StringRef symbol, StringRef filename); + + bool keepPrivateExterns() const { return _keepPrivateExterns; } + void setKeepPrivateExterns(bool v) { _keepPrivateExterns = v; } + bool demangleSymbols() const { return _demangle; } + void setDemangleSymbols(bool d) { _demangle = d; } + bool mergeObjCCategories() const { return _mergeObjCCategories; } + void setMergeObjCCategories(bool v) { _mergeObjCCategories = v; } + /// Create file at specified path which will contain a binary encoding + /// of all input and output file paths. + std::error_code createDependencyFile(StringRef path); + void addInputFileDependency(StringRef path) const; + void addInputFileNotFound(StringRef path) const; + void addOutputFileDependency(StringRef path) const; + + bool minOS(StringRef mac, StringRef iOS) const; + void setDoNothing(bool value) { _doNothing = value; } + bool doNothing() const { return _doNothing; } + bool printAtoms() const { return _printAtoms; } + bool testingFileUsage() const { return _testingFileUsage; } + const StringRefVector &searchDirs() const { return _searchDirs; } + const StringRefVector &frameworkDirs() const { return _frameworkDirs; } + void setSysLibRoots(const StringRefVector &paths); + const StringRefVector &sysLibRoots() const { return _syslibRoots; } + bool PIE() const { return _pie; } + void setPIE(bool pie) { _pie = pie; } + bool generateVersionLoadCommand() const { + return _generateVersionLoadCommand; + } + void setGenerateVersionLoadCommand(bool v) { + _generateVersionLoadCommand = v; + } + + bool generateFunctionStartsLoadCommand() const { + return _generateFunctionStartsLoadCommand; + } + void setGenerateFunctionStartsLoadCommand(bool v) { + _generateFunctionStartsLoadCommand = v; + } + + bool generateDataInCodeLoadCommand() const { + return _generateDataInCodeLoadCommand; + } + void setGenerateDataInCodeLoadCommand(bool v) { + _generateDataInCodeLoadCommand = v; + } + + uint64_t stackSize() const { return _stackSize; } + void setStackSize(uint64_t stackSize) { _stackSize = stackSize; } + + uint64_t baseAddress() const { return _baseAddress; } + void setBaseAddress(uint64_t baseAddress) { _baseAddress = baseAddress; } + + ObjCConstraint objcConstraint() const { return _objcConstraint; } + + uint32_t osMinVersion() const { return _osMinVersion; } + + uint32_t sdkVersion() const { return _sdkVersion; } + void setSdkVersion(uint64_t v) { _sdkVersion = v; } + + uint64_t sourceVersion() const { return _sourceVersion; } + void setSourceVersion(uint64_t v) { _sourceVersion = v; } + + uint32_t swiftVersion() const { return _swiftVersion; } + + /// \brief Checks whether a given path on the filesystem exists. + /// + /// When running in -test_file_usage mode, this method consults an + /// internally maintained list of files that exist (provided by -path_exists) + /// instead of the actual filesystem. + bool pathExists(StringRef path) const; + + /// Like pathExists() but only used on files - not directories. + bool fileExists(StringRef path) const; + + /// \brief Adds any library search paths derived from the given base, possibly + /// modified by -syslibroots. + /// + /// The set of paths added consists of approximately all syslibroot-prepended + /// versions of libPath that exist, or the original libPath if there are none + /// for whatever reason. With various edge-cases for compatibility. + void addModifiedSearchDir(StringRef libPath, bool isSystemPath = false); + + /// \brief Determine whether -lFoo can be resolve within the given path, and + /// return the filename if so. + /// + /// The -lFoo option is documented to search for libFoo.dylib and libFoo.a in + /// that order, unless Foo ends in ".o", in which case only the exact file + /// matches (e.g. -lfoo.o would only find foo.o). + llvm::Optional searchDirForLibrary(StringRef path, + StringRef libName) const; + + /// \brief Iterates through all search path entries looking for libName (as + /// specified by -lFoo). + llvm::Optional searchLibrary(StringRef libName) const; + + /// Add a framework search path. Internally, this method may be prepended + /// the path with syslibroot. + void addFrameworkSearchDir(StringRef fwPath, bool isSystemPath = false); + + /// \brief Iterates through all framework directories looking for + /// Foo.framework/Foo (when fwName = "Foo"). + llvm::Optional findPathForFramework(StringRef fwName) const; + + /// \brief The dylib's binary compatibility version, in the raw uint32 format. + /// + /// When building a dynamic library, this is the compatibility version that + /// gets embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. At + /// runtime, the loader will verify that the binary is compatible with the + /// installed dynamic library. + uint32_t compatibilityVersion() const { return _compatibilityVersion; } + + /// \brief The dylib's current version, in the the raw uint32 format. + /// + /// When building a dynamic library, this is the current version that gets + /// embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. + uint32_t currentVersion() const { return _currentVersion; } + + /// \brief The dylib's install name. + /// + /// Binaries that link against the dylib will embed this path into the dylib + /// load command. When loading the binaries at runtime, this is the location + /// on disk that the loader will look for the dylib. + StringRef installName() const { return _installName; } + + /// \brief Whether or not the dylib has side effects during initialization. + /// + /// Dylibs marked as being dead strippable provide the guarantee that loading + /// the dylib has no side effects, allowing the linker to strip out the dylib + /// when linking a binary that does not use any of its symbols. + bool deadStrippableDylib() const { return _deadStrippableDylib; } + + /// \brief Whether or not to use flat namespace. + /// + /// MachO usually uses a two-level namespace, where each external symbol + /// referenced by the target is associated with the dylib that will provide + /// the symbol's definition at runtime. Using flat namespace overrides this + /// behavior: the linker searches all dylibs on the command line and all + /// dylibs those original dylibs depend on, but does not record which dylib + /// an external symbol came from. At runtime dyld again searches all images + /// and uses the first definition it finds. In addition, any undefines in + /// loaded flat_namespace dylibs must be resolvable at build time. + bool useFlatNamespace() const { return _flatNamespace; } + + /// \brief How to handle undefined symbols. + /// + /// Options are: + /// * error: Report an error and terminate linking. + /// * warning: Report a warning, but continue linking. + /// * suppress: Ignore and continue linking. + /// * dynamic_lookup: For use with -twolevel namespace: Records source dylibs + /// for symbols that are defined in a linked dylib at static link time. + /// Undefined symbols are handled by searching all loaded images at + /// runtime. + UndefinedMode undefinedMode() const { return _undefinedMode; } + + /// \brief The path to the executable that will load the bundle at runtime. + /// + /// When building a Mach-O bundle, this executable will be examined if there + /// are undefined symbols after the main link phase. It is expected that this + /// binary will be loading the bundle at runtime and will provide the symbols + /// at that point. + StringRef bundleLoader() const { return _bundleLoader; } + + void setCompatibilityVersion(uint32_t vers) { _compatibilityVersion = vers; } + void setCurrentVersion(uint32_t vers) { _currentVersion = vers; } + void setInstallName(StringRef name) { _installName = name; } + void setDeadStrippableDylib(bool deadStrippable) { + _deadStrippableDylib = deadStrippable; + } + void setUseFlatNamespace(bool flatNamespace) { + _flatNamespace = flatNamespace; + } + + void setUndefinedMode(UndefinedMode undefinedMode) { + _undefinedMode = undefinedMode; + } + + void setBundleLoader(StringRef loader) { _bundleLoader = loader; } + void setPrintAtoms(bool value=true) { _printAtoms = value; } + void setTestingFileUsage(bool value = true) { + _testingFileUsage = value; + } + void addExistingPathForDebug(StringRef path) { + _existingPaths.insert(path); + } + + void addRpath(StringRef rpath); + const StringRefVector &rpaths() const { return _rpaths; } + + /// Add section alignment constraint on final layout. + void addSectionAlignment(StringRef seg, StringRef sect, uint16_t align); + + /// \brief Add a section based on a command-line sectcreate option. + void addSectCreateSection(StringRef seg, StringRef sect, + std::unique_ptr content); + + /// Returns true if specified section had alignment constraints. + bool sectionAligned(StringRef seg, StringRef sect, uint16_t &align) const; + + StringRef dyldPath() const { return "/usr/lib/dyld"; } + + /// Stub creation Pass should be run. + bool needsStubsPass() const; + + // GOT creation Pass should be run. + bool needsGOTPass() const; + + /// Pass to add TLV sections. + bool needsTLVPass() const; + + /// Pass to transform __compact_unwind into __unwind_info should be run. + bool needsCompactUnwindPass() const; + + /// Pass to add shims switching between thumb and arm mode. + bool needsShimPass() const; + + /// Pass to add objc image info and optimized objc data. + bool needsObjCPass() const; + + /// Magic symbol name stubs will need to help lazy bind. + StringRef binderSymbolName() const; + + /// Used to keep track of direct and indirect dylibs. + void registerDylib(mach_o::MachODylibFile *dylib, bool upward) const; + + // Reads a file from disk to memory. Returns only a needed chunk + // if a fat binary. + ErrorOr> getMemoryBuffer(StringRef path); + + /// Used to find indirect dylibs. Instantiates a MachODylibFile if one + /// has not already been made for the requested dylib. Uses -L and -F + /// search paths to allow indirect dylibs to be overridden. + mach_o::MachODylibFile* findIndirectDylib(StringRef path); + + uint32_t dylibCurrentVersion(StringRef installName) const; + + uint32_t dylibCompatVersion(StringRef installName) const; + + /// Creates a copy (owned by this MachOLinkingContext) of a string. + StringRef copy(StringRef str) { return str.copy(_allocator); } + + /// If the memoryBuffer is a fat file with a slice for the current arch, + /// this method will return the offset and size of that slice. + bool sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, uint32_t &size); + + /// Returns if a command line option specified dylib is an upward link. + bool isUpwardDylib(StringRef installName) const; + + static bool isThinObjectFile(StringRef path, Arch &arch); + static Arch archFromCpuType(uint32_t cputype, uint32_t cpusubtype); + static Arch archFromName(StringRef archName); + static StringRef nameFromArch(Arch arch); + static uint32_t cpuTypeFromArch(Arch arch); + static uint32_t cpuSubtypeFromArch(Arch arch); + static bool is64Bit(Arch arch); + static bool isHostEndian(Arch arch); + static bool isBigEndian(Arch arch); + + /// Construct 32-bit value from string "X.Y.Z" where + /// bits are xxxx.yy.zz. Largest number is 65535.255.255 + static bool parsePackedVersion(StringRef str, uint32_t &result); + + /// Construct 64-bit value from string "A.B.C.D.E" where + /// bits are aaaa.bb.cc.dd.ee. Largest number is 16777215.1023.1023.1023.1023 + static bool parsePackedVersion(StringRef str, uint64_t &result); + + void finalizeInputFiles() override; + + llvm::Error handleLoadedFile(File &file) override; + + bool customAtomOrderer(const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight) const; + + /// Return the 'flat namespace' file. This is the file that supplies + /// atoms for otherwise undefined symbols when the -flat_namespace or + /// -undefined dynamic_lookup options are used. + File* flatNamespaceFile() const { return _flatNamespaceFile; } + +private: + Writer &writer() const override; + mach_o::MachODylibFile* loadIndirectDylib(StringRef path); + void checkExportWhiteList(const DefinedAtom *atom) const; + void checkExportBlackList(const DefinedAtom *atom) const; + struct ArchInfo { + StringRef archName; + MachOLinkingContext::Arch arch; + bool littleEndian; + uint32_t cputype; + uint32_t cpusubtype; + }; + + struct SectionAlign { + StringRef segmentName; + StringRef sectionName; + uint16_t align; + }; + + struct OrderFileNode { + StringRef fileFilter; + unsigned order; + }; + + static bool findOrderOrdinal(const std::vector &nodes, + const DefinedAtom *atom, unsigned &ordinal); + + static ArchInfo _s_archInfos[]; + + std::set _existingPaths; // For testing only. + StringRefVector _searchDirs; + StringRefVector _syslibRoots; + StringRefVector _frameworkDirs; + HeaderFileType _outputMachOType = llvm::MachO::MH_EXECUTE; + bool _outputMachOTypeStatic = false; // Disambiguate static vs dynamic prog + bool _doNothing = false; // for -help and -v which just print info + bool _pie = false; + Arch _arch = arch_unknown; + OS _os = OS::macOSX; + uint32_t _osMinVersion = 0; + uint32_t _sdkVersion = 0; + uint64_t _sourceVersion = 0; + uint64_t _pageZeroSize = 0; + uint64_t _pageSize = 4096; + uint64_t _baseAddress = 0; + uint64_t _stackSize = 0; + uint32_t _compatibilityVersion = 0; + uint32_t _currentVersion = 0; + ObjCConstraint _objcConstraint = objc_unknown; + uint32_t _swiftVersion = 0; + StringRef _installName; + StringRefVector _rpaths; + bool _flatNamespace = false; + UndefinedMode _undefinedMode = UndefinedMode::error; + bool _deadStrippableDylib = false; + bool _printAtoms = false; + bool _testingFileUsage = false; + bool _keepPrivateExterns = false; + bool _demangle = false; + bool _mergeObjCCategories = true; + bool _generateVersionLoadCommand = false; + bool _generateFunctionStartsLoadCommand = false; + bool _generateDataInCodeLoadCommand = false; + StringRef _bundleLoader; + mutable std::unique_ptr _archHandler; + mutable std::unique_ptr _writer; + std::vector _sectAligns; + mutable llvm::StringMap _pathToDylibMap; + mutable std::set _allDylibs; + mutable std::set _upwardDylibs; + mutable std::vector> _indirectDylibs; + mutable std::mutex _dylibsMutex; + ExportMode _exportMode = ExportMode::globals; + llvm::StringSet<> _exportedSymbols; + DebugInfoMode _debugInfoMode = DebugInfoMode::addDebugMap; + std::unique_ptr _dependencyInfo; + llvm::StringMap> _orderFiles; + unsigned _orderFileEntries = 0; + File *_flatNamespaceFile = nullptr; + mach_o::SectCreateFile *_sectCreateFile = nullptr; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H diff --git a/include/lld/ReaderWriter/YamlContext.h b/include/lld/ReaderWriter/YamlContext.h new file mode 100644 index 00000000..b26161a1 --- /dev/null +++ b/include/lld/ReaderWriter/YamlContext.h @@ -0,0 +1,42 @@ +//===- lld/ReaderWriter/YamlContext.h - object used in YAML I/O context ---===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_YAML_CONTEXT_H +#define LLD_READER_WRITER_YAML_CONTEXT_H + +#include "lld/Core/LLVM.h" +#include +#include +#include + +namespace lld { +class File; +class LinkingContext; +namespace mach_o { +namespace normalized { +struct NormalizedFile; +} +} + +using lld::mach_o::normalized::NormalizedFile; + +/// When YAML I/O is used in lld, the yaml context always holds a YamlContext +/// object. We need to support hetergenous yaml documents which each require +/// different context info. This struct supports all clients. +struct YamlContext { + const LinkingContext *_ctx = nullptr; + const Registry *_registry = nullptr; + File *_file = nullptr; + NormalizedFile *_normalizeMachOFile = nullptr; + StringRef _path; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_YAML_CONTEXT_H diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 00000000..699f5e93 --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,4 @@ +add_subdirectory(Config) +add_subdirectory(Core) +add_subdirectory(Driver) +add_subdirectory(ReaderWriter) diff --git a/lib/Config/CMakeLists.txt b/lib/Config/CMakeLists.txt new file mode 100644 index 00000000..e971b0b7 --- /dev/null +++ b/lib/Config/CMakeLists.txt @@ -0,0 +1,9 @@ +add_lld_library(lldConfig + Version.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Config + + LINK_LIBS + LLVMSupport + ) diff --git a/lib/Config/Version.cpp b/lib/Config/Version.cpp new file mode 100644 index 00000000..60687b9d --- /dev/null +++ b/lib/Config/Version.cpp @@ -0,0 +1,57 @@ +//===- lib/Config/Version.cpp - LLD Version Number ---------------*- C++-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several version-related utility functions for LLD. +// +//===----------------------------------------------------------------------===// + +#include "lld/Config/Version.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace lld { + +StringRef getLLDRepositoryPath() { +#ifdef LLD_REPOSITORY_STRING + return LLD_REPOSITORY_STRING; +#else + return ""; +#endif +} + +StringRef getLLDRevision() { +#ifdef LLD_REVISION_STRING + return LLD_REVISION_STRING; +#else + return ""; +#endif +} + +std::string getLLDRepositoryVersion() { + std::string S = getLLDRepositoryPath(); + std::string T = getLLDRevision(); + if (S.empty() && T.empty()) + return ""; + if (!S.empty() && !T.empty()) + return "(" + S + " " + T + ")"; + if (!S.empty()) + return "(" + S + ")"; + return "(" + T + ")"; +} + +StringRef getLLDVersion() { +#ifdef LLD_VERSION_STRING + return LLD_VERSION_STRING; +#else + return ""; +#endif +} + +} // end namespace lld diff --git a/lib/Core/CMakeLists.txt b/lib/Core/CMakeLists.txt new file mode 100644 index 00000000..41e0e766 --- /dev/null +++ b/lib/Core/CMakeLists.txt @@ -0,0 +1,16 @@ +add_lld_library(lldCore + DefinedAtom.cpp + Error.cpp + File.cpp + LinkingContext.cpp + Reader.cpp + Resolver.cpp + SymbolTable.cpp + Writer.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Core + + LINK_LIBS + LLVMSupport + ) diff --git a/lib/Core/DefinedAtom.cpp b/lib/Core/DefinedAtom.cpp new file mode 100644 index 00000000..8dc4d4a1 --- /dev/null +++ b/lib/Core/DefinedAtom.cpp @@ -0,0 +1,94 @@ +//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ErrorHandling.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" + +namespace lld { + +DefinedAtom::ContentPermissions DefinedAtom::permissions() const { + // By default base permissions on content type. + return permissions(this->contentType()); +} + +// Utility function for deriving permissions from content type +DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { + switch (type) { + case typeCode: + case typeResolver: + case typeBranchIsland: + case typeBranchShim: + case typeStub: + case typeStubHelper: + case typeMachHeader: + return permR_X; + + case typeConstant: + case typeCString: + case typeUTF16String: + case typeCFI: + case typeLSDA: + case typeLiteral4: + case typeLiteral8: + case typeLiteral16: + case typeDTraceDOF: + case typeCompactUnwindInfo: + case typeProcessedUnwindInfo: + case typeObjCImageInfo: + case typeObjCMethodList: + return permR__; + + case typeData: + case typeDataFast: + case typeZeroFill: + case typeZeroFillFast: + case typeObjC1Class: + case typeLazyPointer: + case typeLazyDylibPointer: + case typeNonLazyPointer: + case typeThunkTLV: + return permRW_; + + case typeGOT: + case typeConstData: + case typeCFString: + case typeInitializerPtr: + case typeTerminatorPtr: + case typeCStringPtr: + case typeObjCClassPtr: + case typeObjC2CategoryList: + case typeInterposingTuples: + case typeTLVInitialData: + case typeTLVInitialZeroFill: + case typeTLVInitializerPtr: + return permRW_L; + + case typeUnknown: + case typeTempLTO: + case typeSectCreate: + case typeDSOHandle: + return permUnknown; + } + llvm_unreachable("unknown content type"); +} + +bool DefinedAtom::compareByPosition(const DefinedAtom *lhs, + const DefinedAtom *rhs) { + if (lhs == rhs) + return false; + const File *lhsFile = &lhs->file(); + const File *rhsFile = &rhs->file(); + if (lhsFile->ordinal() != rhsFile->ordinal()) + return lhsFile->ordinal() < rhsFile->ordinal(); + assert(lhs->ordinal() != rhs->ordinal()); + return lhs->ordinal() < rhs->ordinal(); +} + +} // namespace diff --git a/lib/Core/Error.cpp b/lib/Core/Error.cpp new file mode 100644 index 00000000..4df1ce12 --- /dev/null +++ b/lib/Core/Error.cpp @@ -0,0 +1,91 @@ +//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include + +using namespace lld; + +class _YamlReaderErrorCategory : public std::error_category { +public: + const char* name() const LLVM_NOEXCEPT override { + return "lld.yaml.reader"; + } + + std::string message(int ev) const override { + switch (static_cast(ev)) { + case YamlReaderError::unknown_keyword: + return "Unknown keyword found in yaml file"; + case YamlReaderError::illegal_value: + return "Bad value found in yaml file"; + } + llvm_unreachable("An enumerator of YamlReaderError does not have a " + "message defined."); + } +}; + +const std::error_category &lld::YamlReaderCategory() { + static _YamlReaderErrorCategory o; + return o; +} + +namespace lld { + +/// Temporary class to enable make_dynamic_error_code() until +/// llvm::ErrorOr<> is updated to work with error encapsulations +/// other than error_code. +class dynamic_error_category : public std::error_category { +public: + ~dynamic_error_category() override = default; + + const char *name() const LLVM_NOEXCEPT override { + return "lld.dynamic_error"; + } + + std::string message(int ev) const override { + assert(ev >= 0); + assert(ev < (int)_messages.size()); + // The value is an index into the string vector. + return _messages[ev]; + } + + int add(std::string msg) { + std::lock_guard lock(_mutex); + // Value zero is always the successs value. + if (_messages.empty()) + _messages.push_back("Success"); + _messages.push_back(msg); + // Return the index of the string just appended. + return _messages.size() - 1; + } + +private: + std::vector _messages; + std::recursive_mutex _mutex; +}; + +static dynamic_error_category categorySingleton; + +std::error_code make_dynamic_error_code(StringRef msg) { + return std::error_code(categorySingleton.add(msg), categorySingleton); +} + +char GenericError::ID = 0; + +GenericError::GenericError(Twine Msg) : Msg(Msg.str()) { } + +void GenericError::log(raw_ostream &OS) const { + OS << Msg; +} + +} // namespace lld diff --git a/lib/Core/File.cpp b/lib/Core/File.cpp new file mode 100644 index 00000000..b84132bf --- /dev/null +++ b/lib/Core/File.cpp @@ -0,0 +1,30 @@ +//===- Core/File.cpp - A Container of Atoms -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include + +namespace lld { + +File::~File() { } + +File::AtomVector File::_noDefinedAtoms; +File::AtomVector File::_noUndefinedAtoms; +File::AtomVector File::_noSharedLibraryAtoms; +File::AtomVector File::_noAbsoluteAtoms; + +std::error_code File::parse() { + std::lock_guard lock(_parseMutex); + if (!_lastError.hasValue()) + _lastError = doParse(); + return _lastError.getValue(); +} + +} // namespace lld diff --git a/lib/Core/LinkingContext.cpp b/lib/Core/LinkingContext.cpp new file mode 100644 index 00000000..2732543d --- /dev/null +++ b/lib/Core/LinkingContext.cpp @@ -0,0 +1,69 @@ +//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/Triple.h" + +namespace lld { + +LinkingContext::LinkingContext() {} + +LinkingContext::~LinkingContext() {} + +bool LinkingContext::validate(raw_ostream &diagnostics) { + return validateImpl(diagnostics); +} + +llvm::Error LinkingContext::writeFile(const File &linkedFile) const { + return this->writer().writeFile(linkedFile, _outputPath); +} + +std::unique_ptr LinkingContext::createEntrySymbolFile() const { + return createEntrySymbolFile(""); +} + +std::unique_ptr +LinkingContext::createEntrySymbolFile(StringRef filename) const { + if (entrySymbolName().empty()) + return nullptr; + std::unique_ptr entryFile(new SimpleFile(filename, + File::kindEntryObject)); + entryFile->addAtom( + *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName()))); + return std::move(entryFile); +} + +std::unique_ptr LinkingContext::createUndefinedSymbolFile() const { + return createUndefinedSymbolFile(""); +} + +std::unique_ptr +LinkingContext::createUndefinedSymbolFile(StringRef filename) const { + if (_initialUndefinedSymbols.empty()) + return nullptr; + std::unique_ptr undefinedSymFile( + new SimpleFile(filename, File::kindUndefinedSymsObject)); + for (StringRef undefSym : _initialUndefinedSymbols) + undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom( + *undefinedSymFile, undefSym))); + return std::move(undefinedSymFile); +} + +void LinkingContext::createInternalFiles( + std::vector > &result) const { + if (std::unique_ptr file = createEntrySymbolFile()) + result.push_back(std::move(file)); + if (std::unique_ptr file = createUndefinedSymbolFile()) + result.push_back(std::move(file)); +} + +} // end namespace lld diff --git a/lib/Core/Reader.cpp b/lib/Core/Reader.cpp new file mode 100644 index 00000000..107db078 --- /dev/null +++ b/lib/Core/Reader.cpp @@ -0,0 +1,110 @@ +//===- lib/Core/Reader.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include + +namespace lld { + +YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() {} + +void Registry::add(std::unique_ptr reader) { + _readers.push_back(std::move(reader)); +} + +void Registry::add(std::unique_ptr handler) { + _yamlHandlers.push_back(std::move(handler)); +} + +ErrorOr> +Registry::loadFile(std::unique_ptr mb) const { + // Get file magic. + StringRef content(mb->getBufferStart(), mb->getBufferSize()); + llvm::sys::fs::file_magic fileType = llvm::sys::fs::identify_magic(content); + + // Ask each registered reader if it can handle this file type or extension. + for (const std::unique_ptr &reader : _readers) { + if (!reader->canParse(fileType, mb->getMemBufferRef())) + continue; + return reader->loadFile(std::move(mb), *this); + } + + // No Reader could parse this file. + return make_error_code(llvm::errc::executable_format_error); +} + +static const Registry::KindStrings kindStrings[] = { + {Reference::kindLayoutAfter, "layout-after"}, + {Reference::kindAssociate, "associate"}, + LLD_KIND_STRING_END}; + +Registry::Registry() { + addKindTable(Reference::KindNamespace::all, Reference::KindArch::all, + kindStrings); +} + +bool Registry::handleTaggedDoc(llvm::yaml::IO &io, + const lld::File *&file) const { + for (const std::unique_ptr &h : _yamlHandlers) + if (h->handledDocTag(io, file)) + return true; + return false; +} + + +void Registry::addKindTable(Reference::KindNamespace ns, + Reference::KindArch arch, + const KindStrings array[]) { + KindEntry entry = { ns, arch, array }; + _kindEntries.push_back(entry); +} + +bool Registry::referenceKindFromString(StringRef inputStr, + Reference::KindNamespace &ns, + Reference::KindArch &arch, + Reference::KindValue &value) const { + for (const KindEntry &entry : _kindEntries) { + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (!inputStr.equals(pair->name)) + continue; + ns = entry.ns; + arch = entry.arch; + value = pair->value; + return true; + } + } + return false; +} + +bool Registry::referenceKindToString(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue value, + StringRef &str) const { + for (const KindEntry &entry : _kindEntries) { + if (entry.ns != ns) + continue; + if (entry.arch != arch) + continue; + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (pair->value != value) + continue; + str = pair->name; + return true; + } + } + return false; +} + +} // end namespace lld diff --git a/lib/Core/Resolver.cpp b/lib/Core/Resolver.cpp new file mode 100644 index 00000000..ef694fd9 --- /dev/null +++ b/lib/Core/Resolver.cpp @@ -0,0 +1,505 @@ +//===- Core/Resolver.cpp - Resolves Atom References -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Atom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/SymbolTable.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace lld { + +llvm::Expected Resolver::handleFile(File &file) { + if (auto ec = _ctx.handleLoadedFile(file)) + return std::move(ec); + bool undefAdded = false; + for (auto &atom : file.defined().owning_ptrs()) + doDefinedAtom(std::move(atom)); + for (auto &atom : file.undefined().owning_ptrs()) { + if (doUndefinedAtom(std::move(atom))) + undefAdded = true; + } + for (auto &atom : file.sharedLibrary().owning_ptrs()) + doSharedLibraryAtom(std::move(atom)); + for (auto &atom : file.absolute().owning_ptrs()) + doAbsoluteAtom(std::move(atom)); + return undefAdded; +} + +llvm::Expected Resolver::forEachUndefines(File &file, + UndefCallback callback) { + size_t i = _undefineIndex[&file]; + bool undefAdded = false; + do { + for (; i < _undefines.size(); ++i) { + StringRef undefName = _undefines[i]; + if (undefName.empty()) + continue; + const Atom *atom = _symbolTable.findByName(undefName); + if (!isa(atom) || _symbolTable.isCoalescedAway(atom)) { + // The symbol was resolved by some other file. Cache the result. + _undefines[i] = ""; + continue; + } + auto undefAddedOrError = callback(undefName); + if (auto ec = undefAddedOrError.takeError()) + return std::move(ec); + undefAdded |= undefAddedOrError.get(); + } + } while (i < _undefines.size()); + _undefineIndex[&file] = i; + return undefAdded; +} + +llvm::Expected Resolver::handleArchiveFile(File &file) { + ArchiveLibraryFile *archiveFile = cast(&file); + return forEachUndefines(file, + [&](StringRef undefName) -> llvm::Expected { + if (File *member = archiveFile->find(undefName)) { + member->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + return handleFile(*member); + } + return false; + }); +} + +llvm::Error Resolver::handleSharedLibrary(File &file) { + // Add all the atoms from the shared library + SharedLibraryFile *sharedLibrary = cast(&file); + auto undefAddedOrError = handleFile(*sharedLibrary); + if (auto ec = undefAddedOrError.takeError()) + return ec; + undefAddedOrError = + forEachUndefines(file, [&](StringRef undefName) -> llvm::Expected { + auto atom = sharedLibrary->exports(undefName); + if (atom.get()) + doSharedLibraryAtom(std::move(atom)); + return false; + }); + + if (auto ec = undefAddedOrError.takeError()) + return ec; + return llvm::Error(); +} + +bool Resolver::doUndefinedAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " UndefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" << atom.get()->name() << "\n"); + + // tell symbol table + bool newUndefAdded = _symbolTable.add(*atom.get()); + if (newUndefAdded) + _undefines.push_back(atom.get()->name()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr(atom.release())); + + return newUndefAdded; +} + +// Called on each atom when a file is added. Returns true if a given +// atom is added to the symbol table. +void Resolver::doDefinedAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " DefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", file=#" + << atom.get()->file().ordinal() + << ", atom=#" + << atom.get()->ordinal() + << ", name=" + << atom.get()->name() + << ", type=" + << atom.get()->contentType() + << "\n"); + + // An atom that should never be dead-stripped is a dead-strip root. + if (_ctx.deadStrip() && + atom.get()->deadStrip() == DefinedAtom::deadStripNever) { + _deadStripRoots.insert(atom.get()); + } + + // add to list of known atoms + _symbolTable.add(*atom.get()); + _atoms.push_back(OwningAtomPtr(atom.release())); +} + +void Resolver::doSharedLibraryAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " SharedLibraryAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr(atom.release())); +} + +void Resolver::doAbsoluteAtom(OwningAtomPtr atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " AbsoluteAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + if (atom.get()->scope() != Atom::scopeTranslationUnit) + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr(atom.release())); +} + +// Returns true if at least one of N previous files has created an +// undefined symbol. +bool Resolver::undefinesAdded(int begin, int end) { + std::vector> &inputs = _ctx.getNodes(); + for (int i = begin; i < end; ++i) + if (FileNode *node = dyn_cast(inputs[i].get())) + if (_newUndefinesAdded[node->getFile()]) + return true; + return false; +} + +File *Resolver::getFile(int &index) { + std::vector> &inputs = _ctx.getNodes(); + if ((size_t)index >= inputs.size()) + return nullptr; + if (GroupEnd *group = dyn_cast(inputs[index].get())) { + // We are at the end of the current group. If one or more new + // undefined atom has been added in the last groupSize files, we + // reiterate over the files. + int size = group->getSize(); + if (undefinesAdded(index - size, index)) { + index -= size; + return getFile(index); + } + ++index; + return getFile(index); + } + return cast(inputs[index++].get())->getFile(); +} + +// Keep adding atoms until _ctx.getNextFile() returns an error. This +// function is where undefined atoms are resolved. +bool Resolver::resolveUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving undefines:\n"); + ScopedTask task(getDefaultDomain(), "resolveUndefines"); + int index = 0; + std::set seen; + for (;;) { + bool undefAdded = false; + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loading file #" << index << "\n"); + File *file = getFile(index); + if (!file) + return true; + if (std::error_code ec = file->parse()) { + llvm::errs() << "Cannot open " + file->path() + << ": " << ec.message() << "\n"; + return false; + } + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loaded file: " << file->path() << "\n"); + switch (file->kind()) { + case File::kindErrorObject: + case File::kindNormalizedObject: + case File::kindMachObject: + case File::kindCEntryObject: + case File::kindHeaderObject: + case File::kindEntryObject: + case File::kindUndefinedSymsObject: + case File::kindStubHelperObject: + case File::kindResolverMergedObject: + case File::kindSectCreateObject: { + // The same file may be visited more than once if the file is + // in --start-group and --end-group. Only library files should + // be processed more than once. + if (seen.count(file)) + break; + seen.insert(file); + assert(!file->hasOrdinal()); + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindArchiveLibrary: { + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleArchiveFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindSharedLibrary: + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + if (auto EC = handleSharedLibrary(*file)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + break; + } + _newUndefinesAdded[file] = undefAdded; + } +} + +// switch all references to undefined or coalesced away atoms +// to the new defined atom +void Resolver::updateReferences() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Updating references:\n"); + ScopedTask task(getDefaultDomain(), "updateReferences"); + for (const OwningAtomPtr &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast(atom.get())) { + for (const Reference *ref : *defAtom) { + // A reference of type kindAssociate should't be updated. + // Instead, an atom having such reference will be removed + // if the target atom is coalesced away, so that they will + // go away as a group. + if (ref->kindNamespace() == lld::Reference::KindNamespace::all && + ref->kindValue() == lld::Reference::kindAssociate) { + if (_symbolTable.isCoalescedAway(atom.get())) + _deadAtoms.insert(ref->target()); + continue; + } + const Atom *newTarget = _symbolTable.replacement(ref->target()); + const_cast(ref)->setTarget(newTarget); + } + } + } +} + +// For dead code stripping, recursively mark atoms "live" +void Resolver::markLive(const Atom *atom) { + // Mark the atom is live. If it's already marked live, then stop recursion. + auto exists = _liveAtoms.insert(atom); + if (!exists.second) + return; + + // Mark all atoms it references as live + if (const DefinedAtom *defAtom = dyn_cast(atom)) { + for (const Reference *ref : *defAtom) + markLive(ref->target()); + for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) { + const Atom *target = p.second; + markLive(target); + } + } +} + +static bool isBackref(const Reference *ref) { + if (ref->kindNamespace() != lld::Reference::KindNamespace::all) + return false; + return (ref->kindValue() == lld::Reference::kindLayoutAfter); +} + +// remove all atoms not actually used +void Resolver::deadStripOptimize() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Dead stripping unused atoms:\n"); + ScopedTask task(getDefaultDomain(), "deadStripOptimize"); + // only do this optimization with -dead_strip + if (!_ctx.deadStrip()) + return; + + // Some type of references prevent referring atoms to be dead-striped. + // Make a reverse map of such references before traversing the graph. + // While traversing the list of atoms, mark AbsoluteAtoms as live + // in order to avoid reclaim. + for (const OwningAtomPtr &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast(atom.get())) + for (const Reference *ref : *defAtom) + if (isBackref(ref)) + _reverseRef.insert(std::make_pair(ref->target(), atom.get())); + if (const AbsoluteAtom *absAtom = dyn_cast(atom.get())) + markLive(absAtom); + } + + // By default, shared libraries are built with all globals as dead strip roots + if (_ctx.globalsAreDeadStripRoots()) + for (const OwningAtomPtr &atom : _atoms) + if (const DefinedAtom *defAtom = dyn_cast(atom.get())) + if (defAtom->scope() == DefinedAtom::scopeGlobal) + _deadStripRoots.insert(defAtom); + + // Or, use list of names that are dead strip roots. + for (const StringRef &name : _ctx.deadStripRoots()) { + const Atom *symAtom = _symbolTable.findByName(name); + assert(symAtom); + _deadStripRoots.insert(symAtom); + } + + // mark all roots as live, and recursively all atoms they reference + for (const Atom *dsrAtom : _deadStripRoots) + markLive(dsrAtom); + + // now remove all non-live atoms from _atoms + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), + [&](OwningAtomPtr &a) { + return _liveAtoms.count(a.get()) == 0; + }), + _atoms.end()); +} + +// error out if some undefines remain +bool Resolver::checkUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Checking for undefines:\n"); + + // build vector of remaining undefined symbols + std::vector undefinedAtoms = _symbolTable.undefines(); + if (_ctx.deadStrip()) { + // When dead code stripping, we don't care if dead atoms are undefined. + undefinedAtoms.erase( + std::remove_if(undefinedAtoms.begin(), undefinedAtoms.end(), + [&](const Atom *a) { return _liveAtoms.count(a) == 0; }), + undefinedAtoms.end()); + } + + if (undefinedAtoms.empty()) + return false; + + // Warn about unresolved symbols. + bool foundUndefines = false; + for (const UndefinedAtom *undef : undefinedAtoms) { + // Skip over a weak symbol. + if (undef->canBeNull() != UndefinedAtom::canBeNullNever) + continue; + + // If this is a library and undefined symbols are allowed on the + // target platform, skip over it. + if (isa(undef->file()) && _ctx.allowShlibUndefines()) + continue; + + // If the undefine is coalesced away, skip over it. + if (_symbolTable.isCoalescedAway(undef)) + continue; + + // Seems like this symbol is undefined. Warn that. + foundUndefines = true; + if (_ctx.printRemainingUndefines()) { + llvm::errs() << "Undefined symbol: " << undef->file().path() + << ": " << _ctx.demangle(undef->name()) + << "\n"; + } + } + if (!foundUndefines) + return false; + if (_ctx.printRemainingUndefines()) + llvm::errs() << "symbol(s) not found\n"; + return true; +} + +// remove from _atoms all coaleseced away atoms +void Resolver::removeCoalescedAwayAtoms() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Removing coalesced away atoms:\n"); + ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms"); + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), + [&](OwningAtomPtr &a) { + return _symbolTable.isCoalescedAway(a.get()) || + _deadAtoms.count(a.get()); + }), + _atoms.end()); +} + +bool Resolver::resolve() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving atom references:\n"); + if (!resolveUndefines()) + return false; + updateReferences(); + deadStripOptimize(); + if (checkUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Found undefines... "); + if (!_ctx.allowRemainingUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we don't allow\n"); + return false; + } + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we are ok with\n"); + } + removeCoalescedAwayAtoms(); + _result->addAtoms(_atoms); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "******** Finished resolver\n"); + return true; +} + +void Resolver::MergedFile::addAtoms( + llvm::MutableArrayRef> all) { + ScopedTask task(getDefaultDomain(), "addAtoms"); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n"); + + for (OwningAtomPtr &atom : all) { +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast(atom.get())) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", definedAtom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + } +#endif + addAtom(*atom.release()); + } +} + +} // namespace lld diff --git a/lib/Core/SymbolTable.cpp b/lib/Core/SymbolTable.cpp new file mode 100644 index 00000000..44631a5d --- /dev/null +++ b/lib/Core/SymbolTable.cpp @@ -0,0 +1,319 @@ +//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/SymbolTable.h" +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace lld { +bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const DefinedAtom &atom) { + if (!atom.name().empty() && + atom.scope() != DefinedAtom::scopeTranslationUnit) { + // Named atoms cannot be merged by content. + assert(atom.merge() != DefinedAtom::mergeByContent); + // Track named atoms that are not scoped to file (static). + return addByName(atom); + } + if (atom.merge() == DefinedAtom::mergeByContent) { + // Named atoms cannot be merged by content. + assert(atom.name().empty()); + // Currently only read-only constants can be merged. + if (atom.permissions() == DefinedAtom::permR__) + return addByContent(atom); + // TODO: support mergeByContent of data atoms by comparing content & fixups. + } + return false; +} + +enum NameCollisionResolution { + NCR_First, + NCR_Second, + NCR_DupDef, + NCR_DupUndef, + NCR_DupShLib, + NCR_Error +}; + +static NameCollisionResolution cases[4][4] = { + //regular absolute undef sharedLib + { + // first is regular + NCR_DupDef, NCR_Error, NCR_First, NCR_First + }, + { + // first is absolute + NCR_Error, NCR_Error, NCR_First, NCR_First + }, + { + // first is undef + NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second + }, + { + // first is sharedLib + NCR_Second, NCR_Second, NCR_First, NCR_DupShLib + } +}; + +static NameCollisionResolution collide(Atom::Definition first, + Atom::Definition second) { + return cases[first][second]; +} + +enum MergeResolution { + MCR_First, + MCR_Second, + MCR_Largest, + MCR_SameSize, + MCR_Error +}; + +static MergeResolution mergeCases[][6] = { + // no tentative weak weakAddress sameNameAndSize largest + {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no + {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative + {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak + {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress + {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize + {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest +}; + +static MergeResolution mergeSelect(DefinedAtom::Merge first, + DefinedAtom::Merge second) { + assert(first != DefinedAtom::mergeByContent); + assert(second != DefinedAtom::mergeByContent); + return mergeCases[first][second]; +} + +bool SymbolTable::addByName(const Atom &newAtom) { + StringRef name = newAtom.name(); + assert(!name.empty()); + const Atom *existing = findByName(name); + if (existing == nullptr) { + // Name is not in symbol table yet, add it associate with this atom. + _nameTable[name] = &newAtom; + return true; + } + + // Do nothing if the same object is added more than once. + if (existing == &newAtom) + return false; + + // Name is already in symbol table and associated with another atom. + bool useNew = true; + switch (collide(existing->definition(), newAtom.definition())) { + case NCR_First: + useNew = false; + break; + case NCR_Second: + useNew = true; + break; + case NCR_DupDef: { + const auto *existingDef = cast(existing); + const auto *newDef = cast(&newAtom); + switch (mergeSelect(existingDef->merge(), newDef->merge())) { + case MCR_First: + useNew = false; + break; + case MCR_Second: + useNew = true; + break; + case MCR_Largest: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + useNew = (newSize >= existingSize); + break; + } + case MCR_SameSize: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + if (existingSize == newSize) { + useNew = true; + break; + } + llvm::errs() << "Size mismatch: " + << existing->name() << " (" << existingSize << ") " + << newAtom.name() << " (" << newSize << ")\n"; + // fallthrough + } + case MCR_Error: + llvm::errs() << "Duplicate symbols: " + << existing->name() + << ":" + << existing->file().path() + << " and " + << newAtom.name() + << ":" + << newAtom.file().path() + << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + break; + } + case NCR_DupUndef: { + const UndefinedAtom* existingUndef = cast(existing); + const UndefinedAtom* newUndef = cast(&newAtom); + + bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull()); + if (sameCanBeNull) + useNew = false; + else + useNew = (newUndef->canBeNull() < existingUndef->canBeNull()); + break; + } + case NCR_DupShLib: { + useNew = false; + break; + } + case NCR_Error: + llvm::errs() << "SymbolTable: error while merging " << name << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + + if (useNew) { + // Update name table to use new atom. + _nameTable[name] = &newAtom; + // Add existing atom to replacement table. + _replacedAtoms[existing] = &newAtom; + } else { + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + } + return false; +} + +unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) { + auto content = atom->rawContent(); + return llvm::hash_combine(atom->size(), + atom->contentType(), + llvm::hash_combine_range(content.begin(), + content.end())); +} + +bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, + const DefinedAtom * const r) { + if (l == r) + return true; + if (l == getEmptyKey()) + return false; + if (r == getEmptyKey()) + return false; + if (l == getTombstoneKey()) + return false; + if (r == getTombstoneKey()) + return false; + if (l->contentType() != r->contentType()) + return false; + if (l->size() != r->size()) + return false; + if (l->sectionChoice() != r->sectionChoice()) + return false; + if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) { + if (!l->customSectionName().equals(r->customSectionName())) + return false; + } + ArrayRef lc = l->rawContent(); + ArrayRef rc = r->rawContent(); + return memcmp(lc.data(), rc.data(), lc.size()) == 0; +} + +bool SymbolTable::addByContent(const DefinedAtom &newAtom) { + AtomContentSet::iterator pos = _contentTable.find(&newAtom); + if (pos == _contentTable.end()) { + _contentTable.insert(&newAtom); + return true; + } + const Atom* existing = *pos; + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + return false; +} + +const Atom *SymbolTable::findByName(StringRef sym) { + NameToAtom::iterator pos = _nameTable.find(sym); + if (pos == _nameTable.end()) + return nullptr; + return pos->second; +} + +bool SymbolTable::isDefined(StringRef sym) { + if (const Atom *atom = findByName(sym)) + return !isa(atom); + return false; +} + +void SymbolTable::addReplacement(const Atom *replaced, + const Atom *replacement) { + _replacedAtoms[replaced] = replacement; +} + +const Atom *SymbolTable::replacement(const Atom *atom) { + // Find the replacement for a given atom. Atoms in _replacedAtoms + // may be chained, so find the last one. + for (;;) { + AtomToAtom::iterator pos = _replacedAtoms.find(atom); + if (pos == _replacedAtoms.end()) + return atom; + atom = pos->second; + } +} + +bool SymbolTable::isCoalescedAway(const Atom *atom) { + return _replacedAtoms.count(atom) > 0; +} + +std::vector SymbolTable::undefines() { + std::vector ret; + for (auto it : _nameTable) { + const Atom *atom = it.second; + assert(atom != nullptr); + if (const auto *undef = dyn_cast(atom)) + if (_replacedAtoms.count(undef) == 0) + ret.push_back(undef); + } + return ret; +} + +std::vector SymbolTable::tentativeDefinitions() { + std::vector ret; + for (auto entry : _nameTable) { + const Atom *atom = entry.second; + StringRef name = entry.first; + assert(atom != nullptr); + if (const DefinedAtom *defAtom = dyn_cast(atom)) + if (defAtom->merge() == DefinedAtom::mergeAsTentative) + ret.push_back(name); + } + return ret; +} + +} // namespace lld diff --git a/lib/Core/Writer.cpp b/lib/Core/Writer.cpp new file mode 100644 index 00000000..93e6438a --- /dev/null +++ b/lib/Core/Writer.cpp @@ -0,0 +1,19 @@ +//===- lib/Core/Writer.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" + +namespace lld { +Writer::Writer() { +} + +Writer::~Writer() { +} +} // end namespace lld diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt new file mode 100644 index 00000000..1bd1f212 --- /dev/null +++ b/lib/Driver/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td) +tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(DriverOptionsTableGen) + +add_lld_library(lldDriver + DarwinLdDriver.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Driver + + LINK_LIBS + lldConfig + lldMachO + lldCore + lldReaderWriter + lldYAML + LLVMObject + LLVMOption + LLVMSupport + ) + +add_dependencies(lldDriver DriverOptionsTableGen) diff --git a/lib/Driver/DarwinLdDriver.cpp b/lib/Driver/DarwinLdDriver.cpp new file mode 100644 index 00000000..496b651b --- /dev/null +++ b/lib/Driver/DarwinLdDriver.cpp @@ -0,0 +1,1215 @@ +//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for darwin's ld. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in DarwinLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in DarwinLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "DarwinLdOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in DarwinLdOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class DarwinLdOptTable : public llvm::opt::OptTable { +public: + DarwinLdOptTable() : OptTable(infoTable) {} +}; + +static std::vector> +makeErrorFile(StringRef path, std::error_code ec) { + std::vector> result; + result.push_back(llvm::make_unique(path, ec)); + return result; +} + +static std::vector> +parseMemberFiles(std::unique_ptr file) { + std::vector> members; + if (auto *archive = dyn_cast(file.get())) { + if (std::error_code ec = archive->parseAllMembers(members)) + return makeErrorFile(file->path(), ec); + } else { + members.push_back(std::move(file)); + } + return members; +} + +std::vector> +loadFile(MachOLinkingContext &ctx, StringRef path, + raw_ostream &diag, bool wholeArchive, bool upwardDylib) { + if (ctx.logInputFiles()) + diag << path << "\n"; + + ErrorOr> mbOrErr = ctx.getMemoryBuffer(path); + if (std::error_code ec = mbOrErr.getError()) + return makeErrorFile(path, ec); + ErrorOr> fileOrErr = + ctx.registry().loadFile(std::move(mbOrErr.get())); + if (std::error_code ec = fileOrErr.getError()) + return makeErrorFile(path, ec); + std::unique_ptr &file = fileOrErr.get(); + + // If file is a dylib, inform LinkingContext about it. + if (SharedLibraryFile *shl = dyn_cast(file.get())) { + if (std::error_code ec = shl->parse()) + return makeErrorFile(path, ec); + ctx.registerDylib(reinterpret_cast(shl), + upwardDylib); + } + if (wholeArchive) + return parseMemberFiles(std::move(file)); + std::vector> files; + files.push_back(std::move(file)); + return files; +} + +} // anonymous namespace + +// Test may be running on Windows. Canonicalize the path +// separator to '/' to get consistent outputs for tests. +static std::string canonicalizePath(StringRef path) { + char sep = llvm::sys::path::get_separator().front(); + if (sep != '/') { + std::string fixedPath = path; + std::replace(fixedPath.begin(), fixedPath.end(), sep, '/'); + return fixedPath; + } else { + return path; + } +} + +static void addFile(StringRef path, MachOLinkingContext &ctx, + bool loadWholeArchive, + bool upwardDylib, raw_ostream &diag) { + std::vector> files = + loadFile(ctx, path, diag, loadWholeArchive, upwardDylib); + for (std::unique_ptr &file : files) + ctx.getNodes().push_back(llvm::make_unique(std::move(file))); +} + +// Export lists are one symbol per line. Blank lines are ignored. +// Trailing comments start with #. +static std::error_code parseExportsList(StringRef exportFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in export list file. + ErrorOr> mb = + MemoryBuffer::getFileOrSTDIN(exportFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(exportFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + // Ignore trailing # comments. + std::pair symAndComment = line.split('#'); + StringRef sym = symAndComment.first.trim(); + if (!sym.empty()) + ctx.addExportSymbol(sym); + buffer = lineAndRest.second; + } + return std::error_code(); +} + + + +/// Order files are one symbol per line. Blank lines are ignored. +/// Trailing comments start with #. Symbol names can be prefixed with an +/// architecture name and/or .o leaf name. Examples: +/// _foo +/// bar.o:_bar +/// libfrob.a(bar.o):_bar +/// x86_64:_foo64 +static std::error_code parseOrderFile(StringRef orderFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in order file. + ErrorOr> mb = + MemoryBuffer::getFileOrSTDIN(orderFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(orderFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + buffer = lineAndRest.second; + // Ignore trailing # comments. + std::pair symAndComment = line.split('#'); + if (symAndComment.first.empty()) + continue; + StringRef sym = symAndComment.first.trim(); + if (sym.empty()) + continue; + // Check for prefix. + StringRef prefix; + std::pair prefixAndSym = sym.split(':'); + if (!prefixAndSym.second.empty()) { + sym = prefixAndSym.second; + prefix = prefixAndSym.first; + if (!prefix.endswith(".o") && !prefix.endswith(".o)")) { + // If arch name prefix does not match arch being linked, ignore symbol. + if (!ctx.archName().equals(prefix)) + continue; + prefix = ""; + } + } else + sym = prefixAndSym.first; + if (!sym.empty()) { + ctx.appendOrderedSymbol(sym, prefix); + //llvm::errs() << sym << ", prefix=" << prefix << "\n"; + } + } + return std::error_code(); +} + +// +// There are two variants of the -filelist option: +// +// -filelist +// In this variant, the path is to a text file which contains one file path +// per line. There are no comments or trimming of whitespace. +// +// -fileList , +// In this variant, the path is to a text file which contains a partial path +// per line. The prefix is prepended to each partial path. +// +static llvm::Error loadFileList(StringRef fileListPath, + MachOLinkingContext &ctx, bool forceLoad, + raw_ostream &diagnostics) { + // If there is a comma, split off . + std::pair opt = fileListPath.split(','); + StringRef filePath = opt.first; + StringRef dirName = opt.second; + ctx.addInputFileDependency(filePath); + // Map in file list file. + ErrorOr> mb = + MemoryBuffer::getFileOrSTDIN(filePath); + if (std::error_code ec = mb.getError()) + return llvm::errorCodeToError(ec); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + StringRef path; + if (!dirName.empty()) { + // If there is a then prepend dir to each line. + SmallString<256> fullPath; + fullPath.assign(dirName); + llvm::sys::path::append(fullPath, Twine(line)); + path = ctx.copy(fullPath.str()); + } else { + // No use whole line as input file path. + path = ctx.copy(line); + } + if (!ctx.pathExists(path)) { + return llvm::make_error(Twine("File not found '") + + path + + "'"); + } + if (ctx.testingFileUsage()) { + diagnostics << "Found filelist entry " << canonicalizePath(path) << '\n'; + } + addFile(path, ctx, forceLoad, false, diagnostics); + buffer = lineAndRest.second; + } + return llvm::Error(); +} + +/// Parse number assuming it is base 16, but allow 0x prefix. +static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) { + if (numStr.startswith_lower("0x")) + numStr = numStr.drop_front(2); + return numStr.getAsInteger(16, baseAddress); +} + +static void parseLLVMOptions(const LinkingContext &ctx) { + // Honor -mllvm + if (!ctx.llvmOptions().empty()) { + unsigned numArgs = ctx.llvmOptions().size(); + auto **args = new const char *[numArgs + 2]; + args[0] = "lld (LLVM option parsing)"; + for (unsigned i = 0; i != numArgs; ++i) + args[i + 1] = ctx.llvmOptions()[i]; + args[numArgs + 1] = nullptr; + llvm::cl::ParseCommandLineOptions(numArgs + 1, args); + } +} + +namespace lld { +namespace mach_o { + +bool parse(llvm::ArrayRef args, MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Parse command line options using DarwinLdOptions.td + DarwinLdOptTable table; + unsigned missingIndex; + unsigned missingCount; + llvm::opt::InputArgList parsedArgs = + table.ParseArgs(args.slice(1), missingIndex, missingCount); + if (missingCount) { + diagnostics << "error: missing arg value for '" + << parsedArgs.getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + for (auto unknownArg : parsedArgs.filtered(OPT_UNKNOWN)) { + diagnostics << "warning: ignoring unknown argument: " + << unknownArg->getAsString(parsedArgs) << "\n"; + } + + // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static ) + llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE; + bool isStaticExecutable = false; + if (llvm::opt::Arg *kind = parsedArgs.getLastArg( + OPT_dylib, OPT_relocatable, OPT_bundle, OPT_static, OPT_preload)) { + switch (kind->getOption().getID()) { + case OPT_dylib: + fileType = llvm::MachO::MH_DYLIB; + break; + case OPT_relocatable: + fileType = llvm::MachO::MH_OBJECT; + break; + case OPT_bundle: + fileType = llvm::MachO::MH_BUNDLE; + break; + case OPT_static: + fileType = llvm::MachO::MH_EXECUTE; + isStaticExecutable = true; + break; + case OPT_preload: + fileType = llvm::MachO::MH_PRELOAD; + break; + } + } + + // Handle -arch xxx + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + if (llvm::opt::Arg *archStr = parsedArgs.getLastArg(OPT_arch)) { + arch = MachOLinkingContext::archFromName(archStr->getValue()); + if (arch == MachOLinkingContext::arch_unknown) { + diagnostics << "error: unknown arch named '" << archStr->getValue() + << "'\n"; + return false; + } + } + // If no -arch specified, scan input files to find first non-fat .o file. + if (arch == MachOLinkingContext::arch_unknown) { + for (auto &inFile : parsedArgs.filtered(OPT_INPUT)) { + // This is expensive because it opens and maps the file. But that is + // ok because no -arch is rare. + if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch)) + break; + } + if (arch == MachOLinkingContext::arch_unknown && + !parsedArgs.getLastArg(OPT_test_file_usage)) { + // If no -arch and no options at all, print usage message. + if (parsedArgs.size() == 0) + table.PrintHelp(llvm::outs(), args[0], "LLVM Linker", false); + else + diagnostics << "error: -arch not specified and could not be inferred\n"; + return false; + } + } + + // Handle -macosx_version_min or -ios_version_min + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + uint32_t minOSVersion = 0; + if (llvm::opt::Arg *minOS = + parsedArgs.getLastArg(OPT_macosx_version_min, OPT_ios_version_min, + OPT_ios_simulator_version_min)) { + switch (minOS->getOption().getID()) { + case OPT_macosx_version_min: + os = MachOLinkingContext::OS::macOSX; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed macosx_version_min value\n"; + return false; + } + break; + case OPT_ios_version_min: + os = MachOLinkingContext::OS::iOS; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_version_min value\n"; + return false; + } + break; + case OPT_ios_simulator_version_min: + os = MachOLinkingContext::OS::iOS_simulator; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_simulator_version_min value\n"; + return false; + } + break; + } + } else { + // No min-os version on command line, check environment variables + } + + // Handle export_dynamic + // FIXME: Should we warn when this applies to something other than a static + // executable or dylib? Those are the only cases where this has an effect. + // Note, this has to come before ctx.configure() so that we get the correct + // value for _globalsAreDeadStripRoots. + bool exportDynamicSymbols = parsedArgs.hasArg(OPT_export_dynamic); + + // Now that there's enough information parsed in, let the linking context + // set up default values. + ctx.configure(fileType, arch, os, minOSVersion, exportDynamicSymbols); + + // Handle -e xxx + if (llvm::opt::Arg *entry = parsedArgs.getLastArg(OPT_entry)) + ctx.setEntrySymbolName(entry->getValue()); + + // Handle -o xxx + if (llvm::opt::Arg *outpath = parsedArgs.getLastArg(OPT_output)) + ctx.setOutputPath(outpath->getValue()); + else + ctx.setOutputPath("a.out"); + + // Handle -image_base XXX and -seg1addr XXXX + if (llvm::opt::Arg *imageBase = parsedArgs.getLastArg(OPT_image_base)) { + uint64_t baseAddress; + if (parseNumberBase16(imageBase->getValue(), baseAddress)) { + diagnostics << "error: image_base expects a hex number\n"; + return false; + } else if (baseAddress < ctx.pageZeroSize()) { + diagnostics << "error: image_base overlaps with __PAGEZERO\n"; + return false; + } else if (baseAddress % ctx.pageSize()) { + diagnostics << "error: image_base must be a multiple of page size (" + << "0x" << llvm::utohexstr(ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setBaseAddress(baseAddress); + } + + // Handle -dead_strip + if (parsedArgs.getLastArg(OPT_dead_strip)) + ctx.setDeadStripping(true); + + bool globalWholeArchive = false; + // Handle -all_load + if (parsedArgs.getLastArg(OPT_all_load)) + globalWholeArchive = true; + + // Handle -install_name + if (llvm::opt::Arg *installName = parsedArgs.getLastArg(OPT_install_name)) + ctx.setInstallName(installName->getValue()); + else + ctx.setInstallName(ctx.outputPath()); + + // Handle -mark_dead_strippable_dylib + if (parsedArgs.getLastArg(OPT_mark_dead_strippable_dylib)) + ctx.setDeadStrippableDylib(true); + + // Handle -compatibility_version and -current_version + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_compatibility_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -compatibility_version value is malformed\n"; + return false; + } + ctx.setCompatibilityVersion(parsedVers); + } + + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_current_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "-current_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -current_version value is malformed\n"; + return false; + } + ctx.setCurrentVersion(parsedVers); + } + + // Handle -bundle_loader + if (llvm::opt::Arg *loader = parsedArgs.getLastArg(OPT_bundle_loader)) + ctx.setBundleLoader(loader->getValue()); + + // Handle -sectalign segname sectname align + for (auto &alignArg : parsedArgs.filtered(OPT_sectalign)) { + const char* segName = alignArg->getValue(0); + const char* sectName = alignArg->getValue(1); + const char* alignStr = alignArg->getValue(2); + if ((alignStr[0] == '0') && (alignStr[1] == 'x')) + alignStr += 2; + unsigned long long alignValue; + if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) { + diagnostics << "error: -sectalign alignment value '" + << alignStr << "' not a valid number\n"; + return false; + } + uint16_t align = 1 << llvm::countTrailingZeros(alignValue); + if (!llvm::isPowerOf2_64(alignValue)) { + diagnostics << "warning: alignment for '-sectalign " + << segName << " " << sectName + << llvm::format(" 0x%llX", alignValue) + << "' is not a power of two, using " + << llvm::format("0x%08X", align) << "\n"; + } + ctx.addSectionAlignment(segName, sectName, align); + } + + // Handle -mllvm + for (auto &llvmArg : parsedArgs.filtered(OPT_mllvm)) { + ctx.appendLLVMOption(llvmArg->getValue()); + } + + // Handle -print_atoms + if (parsedArgs.getLastArg(OPT_print_atoms)) + ctx.setPrintAtoms(); + + // Handle -t (trace) option. + if (parsedArgs.getLastArg(OPT_t)) + ctx.setLogInputFiles(true); + + // Handle -demangle option. + if (parsedArgs.getLastArg(OPT_demangle)) + ctx.setDemangleSymbols(true); + + // Handle -keep_private_externs + if (parsedArgs.getLastArg(OPT_keep_private_externs)) { + ctx.setKeepPrivateExterns(true); + if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + diagnostics << "warning: -keep_private_externs only used in -r mode\n"; + } + + // Handle -dependency_info used by Xcode. + if (llvm::opt::Arg *depInfo = parsedArgs.getLastArg(OPT_dependency_info)) { + if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue())) { + diagnostics << "warning: " << ec.message() + << ", processing '-dependency_info " + << depInfo->getValue() + << "'\n"; + } + } + + // In -test_file_usage mode, we'll be given an explicit list of paths that + // exist. We'll also be expected to print out information about how we located + // libraries and so on that the user specified, but not to actually do any + // linking. + if (parsedArgs.getLastArg(OPT_test_file_usage)) { + ctx.setTestingFileUsage(); + + // With paths existing by fiat, linking is not going to end well. + ctx.setDoNothing(true); + + // Only bother looking for an existence override if we're going to use it. + for (auto existingPath : parsedArgs.filtered(OPT_path_exists)) { + ctx.addExistingPathForDebug(existingPath->getValue()); + } + } + + // Register possible input file parsers. + if (!ctx.doNothing()) { + ctx.registry().addSupportMachOObjects(ctx); + ctx.registry().addSupportArchives(ctx.logInputFiles()); + ctx.registry().addSupportYamlFiles(); + } + + // Now construct the set of library search directories, following ld64's + // baroque set of accumulated hacks. Mostly, the algorithm constructs + // { syslibroots } x { libpaths } + // + // Unfortunately, there are numerous exceptions: + // 1. Only absolute paths get modified by syslibroot options. + // 2. If there is just 1 -syslibroot, system paths not found in it are + // skipped. + // 3. If the last -syslibroot is "/", all of them are ignored entirely. + // 4. If { syslibroots } x path == {}, the original path is kept. + std::vector sysLibRoots; + for (auto syslibRoot : parsedArgs.filtered(OPT_syslibroot)) { + sysLibRoots.push_back(syslibRoot->getValue()); + } + if (!sysLibRoots.empty()) { + // Ignore all if last -syslibroot is "/". + if (sysLibRoots.back() != "/") + ctx.setSysLibRoots(sysLibRoots); + } + + // Paths specified with -L come first, and are not considered system paths for + // the case where there is precisely 1 -syslibroot. + for (auto libPath : parsedArgs.filtered(OPT_L)) { + ctx.addModifiedSearchDir(libPath->getValue()); + } + + // Process -F directories (where to look for frameworks). + for (auto fwPath : parsedArgs.filtered(OPT_F)) { + ctx.addFrameworkSearchDir(fwPath->getValue()); + } + + // -Z suppresses the standard search paths. + if (!parsedArgs.hasArg(OPT_Z)) { + ctx.addModifiedSearchDir("/usr/lib", true); + ctx.addModifiedSearchDir("/usr/local/lib", true); + ctx.addFrameworkSearchDir("/Library/Frameworks", true); + ctx.addFrameworkSearchDir("/System/Library/Frameworks", true); + } + + // Now that we've constructed the final set of search paths, print out those + // search paths in verbose mode. + if (parsedArgs.getLastArg(OPT_v)) { + diagnostics << "Library search paths:\n"; + for (auto path : ctx.searchDirs()) { + diagnostics << " " << path << '\n'; + } + diagnostics << "Framework search paths:\n"; + for (auto path : ctx.frameworkDirs()) { + diagnostics << " " << path << '\n'; + } + } + + // Handle -exported_symbols_list + for (auto expFile : parsedArgs.filtered(OPT_exported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbols_list cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-exported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -exported_symbol + for (auto symbol : parsedArgs.filtered(OPT_exported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbol cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle -unexported_symbols_list + for (auto expFile : parsedArgs.filtered(OPT_unexported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbols_list cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-unexported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -unexported_symbol + for (auto symbol : parsedArgs.filtered(OPT_unexported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbol cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle obosolete -multi_module and -single_module + if (llvm::opt::Arg *mod = + parsedArgs.getLastArg(OPT_multi_module, OPT_single_module)) { + if (mod->getOption().getID() == OPT_multi_module) { + diagnostics << "warning: -multi_module is obsolete and being ignored\n"; + } + else { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "warning: -single_module being ignored. " + "It is only for use when producing a dylib\n"; + } + } + } + + // Handle obsolete ObjC options: -objc_gc_compaction, -objc_gc, -objc_gc_only + if (parsedArgs.getLastArg(OPT_objc_gc_compaction)) { + diagnostics << "error: -objc_gc_compaction is not supported\n"; + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc)) { + diagnostics << "error: -objc_gc is not supported\n"; + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc_only)) { + diagnostics << "error: -objc_gc_only is not supported\n"; + return false; + } + + // Handle -pie or -no_pie + if (llvm::opt::Arg *pie = parsedArgs.getLastArg(OPT_pie, OPT_no_pie)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + switch (ctx.os()) { + case MachOLinkingContext::OS::macOSX: + if ((minOSVersion < 0x000A0500) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "Mac OS X 10.5 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS: + if ((minOSVersion < 0x00040200) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "iOS 4.2 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS_simulator: + if (pie->getOption().getID() == OPT_no_pie) + diagnostics << "iOS simulator programs must be built PIE\n"; + return false; + break; + case MachOLinkingContext::OS::unknown: + break; + } + ctx.setPIE(pie->getOption().getID() == OPT_pie); + break; + case llvm::MachO::MH_PRELOAD: + break; + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + diagnostics << "warning: " << pie->getSpelling() << " being ignored. " + << "It is only used when linking main executables\n"; + break; + default: + diagnostics << pie->getSpelling() + << " can only used when linking main executables\n"; + return false; + break; + } + } + + // Handle -version_load_command or -no_version_load_command + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_version_load_command, + OPT_no_version_load_command)) { + flagOn = arg->getOption().getID() == OPT_version_load_command; + flagOff = arg->getOption().getID() == OPT_no_version_load_command; + } + + // default to adding version load command for dynamic code, + // static code must opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateVersionLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -function_starts or -no_function_starts + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_function_starts, + OPT_no_function_starts)) { + flagOn = arg->getOption().getID() == OPT_function_starts; + flagOff = arg->getOption().getID() == OPT_no_function_starts; + } + + // default to adding functions start for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateFunctionStartsLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -data_in_code_info or -no_data_in_code_info + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_data_in_code_info, + OPT_no_data_in_code_info)) { + flagOn = arg->getOption().getID() == OPT_data_in_code_info; + flagOff = arg->getOption().getID() == OPT_no_data_in_code_info; + } + + // default to adding data in code for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle sdk_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) { + uint32_t sdkVersion = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + sdkVersion)) { + diagnostics << "error: malformed sdkVersion value\n"; + return false; + } + ctx.setSdkVersion(sdkVersion); + } else if (ctx.generateVersionLoadCommand()) { + // If we don't have an sdk version, but were going to emit a load command + // with min_version, then we need to give an warning as we have no sdk + // version to put in that command. + // FIXME: We need to decide whether to make this an error. + diagnostics << "warning: -sdk_version is required when emitting " + "min version load command. " + "Setting sdk version to match provided min version\n"; + ctx.setSdkVersion(ctx.osMinVersion()); + } + + // Handle source_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_source_version)) { + uint64_t version = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + version)) { + diagnostics << "error: malformed source_version value\n"; + return false; + } + ctx.setSourceVersion(version); + } + + // Handle stack_size + if (llvm::opt::Arg *stackSize = parsedArgs.getLastArg(OPT_stack_size)) { + uint64_t stackSizeVal; + if (parseNumberBase16(stackSize->getValue(), stackSizeVal)) { + diagnostics << "error: stack_size expects a hex number\n"; + return false; + } + if ((stackSizeVal % ctx.pageSize()) != 0) { + diagnostics << "error: stack_size must be a multiple of page size (" + << "0x" << llvm::utohexstr(ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setStackSize(stackSizeVal); + } + + // Handle debug info handling options: -S + if (parsedArgs.hasArg(OPT_S)) + ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap); + + // Handle -order_file + for (auto orderFile : parsedArgs.filtered(OPT_order_file)) { + if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-order_file " + << orderFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -flat_namespace. + if (llvm::opt::Arg *ns = + parsedArgs.getLastArg(OPT_flat_namespace, OPT_twolevel_namespace)) { + if (ns->getOption().getID() == OPT_flat_namespace) + ctx.setUseFlatNamespace(true); + } + + // Handle -undefined + if (llvm::opt::Arg *undef = parsedArgs.getLastArg(OPT_undefined)) { + MachOLinkingContext::UndefinedMode UndefMode; + if (StringRef(undef->getValue()).equals("error")) + UndefMode = MachOLinkingContext::UndefinedMode::error; + else if (StringRef(undef->getValue()).equals("warning")) + UndefMode = MachOLinkingContext::UndefinedMode::warning; + else if (StringRef(undef->getValue()).equals("suppress")) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + else if (StringRef(undef->getValue()).equals("dynamic_lookup")) + UndefMode = MachOLinkingContext::UndefinedMode::dynamicLookup; + else { + diagnostics << "error: invalid option to -undefined " + "[ warning | error | suppress | dynamic_lookup ]\n"; + return false; + } + + if (ctx.useFlatNamespace()) { + // If we're using -flat_namespace then 'warning', 'suppress' and + // 'dynamic_lookup' are all equivalent, so map them to 'suppress'. + if (UndefMode != MachOLinkingContext::UndefinedMode::error) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + } else { + // If we're using -twolevel_namespace then 'warning' and 'suppress' are + // illegal. Emit a diagnostic if they've been (mis)used. + if (UndefMode == MachOLinkingContext::UndefinedMode::warning || + UndefMode == MachOLinkingContext::UndefinedMode::suppress) { + diagnostics << "error: can't use -undefined warning or suppress with " + "-twolevel_namespace\n"; + return false; + } + } + + ctx.setUndefinedMode(UndefMode); + } + + // Handle -no_objc_category_merging. + if (parsedArgs.getLastArg(OPT_no_objc_category_merging)) + ctx.setMergeObjCCategories(false); + + // Handle -rpath + if (parsedArgs.hasArg(OPT_rpath)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!ctx.minOS("10.5", "2.0")) { + if (ctx.os() == MachOLinkingContext::OS::macOSX) { + diagnostics << "error: -rpath can only be used when targeting " + "OS X 10.5 or later\n"; + } else { + diagnostics << "error: -rpath can only be used when targeting " + "iOS 2.0 or later\n"; + } + return false; + } + break; + default: + diagnostics << "error: -rpath can only be used when creating " + "a dynamic final linked image\n"; + return false; + } + + for (auto rPath : parsedArgs.filtered(OPT_rpath)) { + ctx.addRpath(rPath->getValue()); + } + } + + // Parse the LLVM options before we process files in case the file handling + // makes use of things like DEBUG(). + parseLLVMOptions(ctx); + + // Handle input files and sectcreate. + for (auto &arg : parsedArgs) { + bool upward; + llvm::Optional resolvedPath; + switch (arg->getOption().getID()) { + default: + continue; + case OPT_INPUT: + addFile(arg->getValue(), ctx, globalWholeArchive, false, diagnostics); + break; + case OPT_upward_library: + addFile(arg->getValue(), ctx, false, true, diagnostics); + break; + case OPT_force_load: + addFile(arg->getValue(), ctx, true, false, diagnostics); + break; + case OPT_l: + case OPT_upward_l: + upward = (arg->getOption().getID() == OPT_upward_l); + resolvedPath = ctx.searchLibrary(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find library for " << arg->getSpelling() + << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "library " + << canonicalizePath(resolvedPath.getValue()) << '\n'; + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, + upward, diagnostics); + break; + case OPT_framework: + case OPT_upward_framework: + upward = (arg->getOption().getID() == OPT_upward_framework); + resolvedPath = ctx.findPathForFramework(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find framework for " + << arg->getSpelling() << " " << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "framework " + << canonicalizePath(resolvedPath.getValue()) << '\n'; + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, + upward, diagnostics); + break; + case OPT_filelist: + if (auto ec = loadFileList(arg->getValue(), + ctx, globalWholeArchive, + diagnostics)) { + handleAllErrors(std::move(ec), [&](const llvm::ErrorInfoBase &EI) { + diagnostics << "error: "; + EI.log(diagnostics); + diagnostics << ", processing '-filelist " << arg->getValue() << "'\n"; + }); + return false; + } + break; + case OPT_sectcreate: { + const char* seg = arg->getValue(0); + const char* sect = arg->getValue(1); + const char* fileName = arg->getValue(2); + + ErrorOr> contentOrErr = + MemoryBuffer::getFile(fileName); + + if (!contentOrErr) { + diagnostics << "error: can't open -sectcreate file " << fileName << "\n"; + return false; + } + + ctx.addSectCreateSection(seg, sect, std::move(*contentOrErr)); + } + break; + } + } + + if (ctx.getNodes().empty()) { + diagnostics << "No input files\n"; + return false; + } + + // Validate the combination of options used. + return ctx.validate(diagnostics); +} + +/// This is where the link is actually performed. +bool link(llvm::ArrayRef args, raw_ostream &diagnostics) { + MachOLinkingContext ctx; + if (!parse(args, ctx, diagnostics)) + return false; + if (ctx.doNothing()) + return true; + if (ctx.getNodes().empty()) + return false; + + for (std::unique_ptr &ie : ctx.getNodes()) + if (FileNode *node = dyn_cast(ie.get())) + node->getFile()->parse(); + + std::vector> internalFiles; + ctx.createInternalFiles(internalFiles); + for (auto i = internalFiles.rbegin(), e = internalFiles.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), llvm::make_unique(std::move(*i))); + } + + // Give target a chance to add files. + std::vector> implicitFiles; + ctx.createImplicitFiles(implicitFiles); + for (auto i = implicitFiles.rbegin(), e = implicitFiles.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), llvm::make_unique(std::move(*i))); + } + + // Give target a chance to postprocess input files. + // Mach-O uses this chance to move all object files before library files. + ctx.finalizeInputFiles(); + + // Do core linking. + ScopedTask resolveTask(getDefaultDomain(), "Resolve"); + Resolver resolver(ctx); + if (!resolver.resolve()) + return false; + SimpleFile *merged = nullptr; + { + std::unique_ptr mergedFile = resolver.resultFile(); + merged = mergedFile.get(); + auto &members = ctx.getNodes(); + members.insert(members.begin(), + llvm::make_unique(std::move(mergedFile))); + } + resolveTask.end(); + + // Run passes on linked atoms. + ScopedTask passTask(getDefaultDomain(), "Passes"); + PassManager pm; + ctx.addPasses(pm); + if (auto ec = pm.runOnFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + diagnostics << "Failed to run passes on file '" << ctx.outputPath() + << "': "; + logAllUnhandledErrors(std::move(ec), diagnostics, std::string()); + return false; + } + + passTask.end(); + + // Give linked atoms to Writer to generate output file. + ScopedTask writeTask(getDefaultDomain(), "Write"); + if (auto ec = ctx.writeFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + diagnostics << "Failed to write file '" << ctx.outputPath() << "': "; + logAllUnhandledErrors(std::move(ec), diagnostics, std::string()); + return false; + } + + return true; +} +} // namespace mach_o +} // namespace lld diff --git a/lib/Driver/DarwinLdOptions.td b/lib/Driver/DarwinLdOptions.td new file mode 100644 index 00000000..fa07f336 --- /dev/null +++ b/lib/Driver/DarwinLdOptions.td @@ -0,0 +1,242 @@ +include "llvm/Option/OptParser.td" + + +// output kinds +def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">; +def relocatable : Flag<["-"], "r">, + HelpText<"Create relocatable object file">, Group; +def static : Flag<["-"], "static">, + HelpText<"Create static executable">, Group; +def dynamic : Flag<["-"], "dynamic">, + HelpText<"Create dynamic executable (default)">,Group; +def dylib : Flag<["-"], "dylib">, + HelpText<"Create dynamic library">, Group; +def bundle : Flag<["-"], "bundle">, + HelpText<"Create dynamic bundle">, Group; +def execute : Flag<["-"], "execute">, + HelpText<"Create main executable (default)">, Group; +def preload : Flag<["-"], "preload">, + HelpText<"Create binary for use with embedded systems">, Group; + +// optimizations +def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; +def dead_strip : Flag<["-"], "dead_strip">, + HelpText<"Remove unreference code and data">, Group; +def macosx_version_min : Separate<["-"], "macosx_version_min">, + MetaVarName<"">, + HelpText<"Minimum Mac OS X version">, Group; +def ios_version_min : Separate<["-"], "ios_version_min">, + MetaVarName<"">, + HelpText<"Minimum iOS version">, Group; +def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, + Alias; +def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, + MetaVarName<"">, + HelpText<"Minimum iOS simulator version">, Group; +def sdk_version : Separate<["-"], "sdk_version">, + MetaVarName<"">, + HelpText<"SDK version">, Group; +def source_version : Separate<["-"], "source_version">, + MetaVarName<"">, + HelpText<"Source version">, Group; +def version_load_command : Flag<["-"], "version_load_command">, + HelpText<"Force generation of a version load command">, Group; +def no_version_load_command : Flag<["-"], "no_version_load_command">, + HelpText<"Disable generation of a version load command">, Group; +def function_starts : Flag<["-"], "function_starts">, + HelpText<"Force generation of a function starts load command">, + Group; +def no_function_starts : Flag<["-"], "no_function_starts">, + HelpText<"Disable generation of a function starts load command">, + Group; +def data_in_code_info : Flag<["-"], "data_in_code_info">, + HelpText<"Force generation of a data in code load command">, + Group; +def no_data_in_code_info : Flag<["-"], "no_data_in_code_info">, + HelpText<"Disable generation of a data in code load command">, + Group; +def mllvm : Separate<["-"], "mllvm">, + MetaVarName<"