summaryrefslogtreecommitdiff
path: root/lld/MachO/InputFiles.h
blob: 66d46e46fa73fdaa288a8074b389adb12ac8d12e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLD_MACHO_INPUT_FILES_H
#define LLD_MACHO_INPUT_FILES_H

#include "MachOStructs.h"
#include "Target.h"

#include "lld/Common/DWARF.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Threading.h"
#include "llvm/TextAPI/TextAPIReader.h"

#include <vector>

namespace llvm {
namespace lto {
class InputFile;
} // namespace lto
namespace MachO {
class InterfaceFile;
} // namespace MachO
class TarWriter;
} // namespace llvm

namespace lld {
namespace macho {

struct PlatformInfo;
class ConcatInputSection;
class Symbol;
class Defined;
class AliasSymbol;
struct Reloc;
enum class RefState : uint8_t;

// If --reproduce option is given, all input files are written
// to this tar archive.
extern std::unique_ptr<llvm::TarWriter> tar;

// If .subsections_via_symbols is set, each InputSection will be split along
// symbol boundaries. The field offset represents the offset of the subsection
// from the start of the original pre-split InputSection.
struct Subsection {
  uint64_t offset = 0;
  InputSection *isec = nullptr;
};

using Subsections = std::vector<Subsection>;
class InputFile;

class Section {
public:
  InputFile *file;
  StringRef segname;
  StringRef name;
  uint32_t flags;
  uint64_t addr;
  Subsections subsections;

  Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
          uint64_t addr)
      : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
  // Ensure pointers to Sections are never invalidated.
  Section(const Section &) = delete;
  Section &operator=(const Section &) = delete;
  Section(Section &&) = delete;
  Section &operator=(Section &&) = delete;

private:
  // Whether we have already split this section into individual subsections.
  // For sections that cannot be split (e.g. literal sections), this is always
  // false.
  bool doneSplitting = false;
  friend class ObjFile;
};

// Represents a call graph profile edge.
struct CallGraphEntry {
  // The index of the caller in the symbol table.
  uint32_t fromIndex;
  // The index of the callee in the symbol table.
  uint32_t toIndex;
  // Number of calls from callee to caller in the profile.
  uint64_t count;

  CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
      : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
};

class InputFile {
public:
  enum Kind {
    ObjKind,
    OpaqueKind,
    DylibKind,
    ArchiveKind,
    BitcodeKind,
  };

  virtual ~InputFile() = default;
  Kind kind() const { return fileKind; }
  StringRef getName() const { return name; }
  static void resetIdCount() { idCount = 0; }

  MemoryBufferRef mb;

  std::vector<Symbol *> symbols;
  std::vector<Section *> sections;
  ArrayRef<uint8_t> objCImageInfo;

  // If not empty, this stores the name of the archive containing this file.
  // We use this string for creating error messages.
  std::string archiveName;

  // Provides an easy way to sort InputFiles deterministically.
  const int id;

  // True if this is a lazy ObjFile or BitcodeFile.
  bool lazy = false;

protected:
  InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
      : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
        name(mb.getBufferIdentifier()) {}

  InputFile(Kind, const llvm::MachO::InterfaceFile &);

private:
  const Kind fileKind;
  const StringRef name;

  static int idCount;
};

struct FDE {
  uint32_t funcLength;
  Symbol *personality;
  InputSection *lsda;
};

// .o file
class ObjFile final : public InputFile {
public:
  ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
          bool lazy = false, bool forceHidden = false);
  ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
  ArrayRef<uint8_t> getOptimizationHints() const;
  template <class LP> void parse();

  static bool classof(const InputFile *f) { return f->kind() == ObjKind; }

  std::string sourceFile() const;
  // Parses line table information for diagnostics. compileUnit should be used
  // for other purposes.
  lld::DWARFCache *getDwarf();

  llvm::DWARFUnit *compileUnit = nullptr;
  std::unique_ptr<lld::DWARFCache> dwarfCache;
  Section *addrSigSection = nullptr;
  const uint32_t modTime;
  bool forceHidden;
  std::vector<ConcatInputSection *> debugSections;
  std::vector<CallGraphEntry> callGraph;
  llvm::DenseMap<ConcatInputSection *, FDE> fdes;
  std::vector<AliasSymbol *> aliases;

private:
  llvm::once_flag initDwarf;
  template <class LP> void parseLazy();
  template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
  template <class LP>
  void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
                    ArrayRef<typename LP::nlist> nList, const char *strtab,
                    bool subsectionsViaSymbols);
  template <class NList>
  Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
  template <class SectionHeader>
  void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
                        const SectionHeader &, Section &);
  void parseDebugInfo();
  void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
  void registerCompactUnwind(Section &compactUnwindSection);
  void registerEhFrames(Section &ehFrameSection);
};

// command-line -sectcreate file
class OpaqueFile final : public InputFile {
public:
  OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
  static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
};

// .dylib or .tbd file
class DylibFile final : public InputFile {
public:
  // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
  // symbols in those sub-libraries will be available under the umbrella
  // library's namespace. Those sub-libraries can also have their own
  // re-exports. When loading a re-exported dylib, `umbrella` should be set to
  // the root dylib to ensure symbols in the child library are correctly bound
  // to the root. On the other hand, if a dylib is being directly loaded
  // (through an -lfoo flag), then `umbrella` should be a nullptr.
  explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
                     bool isBundleLoader, bool explicitlyLinked);
  explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
                     DylibFile *umbrella, bool isBundleLoader,
                     bool explicitlyLinked);
  explicit DylibFile(DylibFile *umbrella);

  void parseLoadCommands(MemoryBufferRef mb);
  void parseReexports(const llvm::MachO::InterfaceFile &interface);
  bool isReferenced() const { return numReferencedSymbols > 0; }
  bool isExplicitlyLinked() const;
  void setExplicitlyLinked() { explicitlyLinked = true; }

  static bool classof(const InputFile *f) { return f->kind() == DylibKind; }

  StringRef installName;
  DylibFile *exportingFile = nullptr;
  DylibFile *umbrella;
  SmallVector<StringRef, 2> rpaths;
  uint32_t compatibilityVersion = 0;
  uint32_t currentVersion = 0;
  int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
  unsigned numReferencedSymbols = 0;
  RefState refState;
  bool reexport = false;
  bool forceNeeded = false;
  bool forceWeakImport = false;
  bool deadStrippable = false;

private:
  bool explicitlyLinked = false; // Access via isExplicitlyLinked().

public:
  // An executable can be used as a bundle loader that will load the output
  // file being linked, and that contains symbols referenced, but not
  // implemented in the bundle. When used like this, it is very similar
  // to a dylib, so we've used the same class to represent it.
  bool isBundleLoader;

  // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
  // Usually empty. These synthetic dylibs won't have synthetic dylibs
  // themselves.
  SmallVector<DylibFile *, 2> extraDylibs;

private:
  DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
                               uint32_t compatVersion);

  bool handleLDSymbol(StringRef originalName);
  void handleLDPreviousSymbol(StringRef name, StringRef originalName);
  void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
  void handleLDHideSymbol(StringRef name, StringRef originalName);
  void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
  void parseExportedSymbols(uint32_t offset, uint32_t size);
  void loadReexport(StringRef path, DylibFile *umbrella,
                    const llvm::MachO::InterfaceFile *currentTopLevelTapi);

  llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
};

// .a file
class ArchiveFile final : public InputFile {
public:
  explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
                       bool forceHidden);
  void addLazySymbols();
  void fetch(const llvm::object::Archive::Symbol &);
  // LLD normally doesn't use Error for error-handling, but the underlying
  // Archive library does, so this is the cleanest way to wrap it.
  Error fetch(const llvm::object::Archive::Child &, StringRef reason);
  const llvm::object::Archive &getArchive() const { return *file; };
  static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }

private:
  std::unique_ptr<llvm::object::Archive> file;
  // Keep track of children fetched from the archive by tracking
  // which address offsets have been fetched already.
  llvm::DenseSet<uint64_t> seen;
  // Load all symbols with hidden visibility (-load_hidden).
  bool forceHidden;
};

class BitcodeFile final : public InputFile {
public:
  explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
                       uint64_t offsetInArchive, bool lazy = false,
                       bool forceHidden = false);
  static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
  void parse();

  std::unique_ptr<llvm::lto::InputFile> obj;
  bool forceHidden;

private:
  void parseLazy();
};

extern llvm::SetVector<InputFile *> inputFiles;
extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;

std::optional<MemoryBufferRef> readFile(StringRef path);

void extract(InputFile &file, StringRef reason);

namespace detail {

template <class CommandType, class... Types>
std::vector<const CommandType *>
findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
  std::vector<const CommandType *> cmds;
  std::initializer_list<uint32_t> typesList{types...};
  const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
  const uint8_t *p =
      reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
  for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
    auto *cmd = reinterpret_cast<const CommandType *>(p);
    if (llvm::is_contained(typesList, cmd->cmd)) {
      cmds.push_back(cmd);
      if (cmds.size() == maxCommands)
        return cmds;
    }
    p += cmd->cmdsize;
  }
  return cmds;
}

} // namespace detail

// anyHdr should be a pointer to either mach_header or mach_header_64
template <class CommandType = llvm::MachO::load_command, class... Types>
const CommandType *findCommand(const void *anyHdr, Types... types) {
  std::vector<const CommandType *> cmds =
      detail::findCommands<CommandType>(anyHdr, 1, types...);
  return cmds.size() ? cmds[0] : nullptr;
}

template <class CommandType = llvm::MachO::load_command, class... Types>
std::vector<const CommandType *> findCommands(const void *anyHdr,
                                              Types... types) {
  return detail::findCommands<CommandType>(anyHdr, 0, types...);
}

std::string replaceThinLTOSuffix(StringRef path);
} // namespace macho

std::string toString(const macho::InputFile *file);
std::string toString(const macho::Section &);
} // namespace lld

#endif