C++ linker presentation

This commit is contained in:
2026-02-24 10:21:15 +01:00
parent 0fda0d75fb
commit 6f3d98f388
32 changed files with 788 additions and 0 deletions

9
cpplinker/.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
*.o
*.a
*.so
*.dll
*.lib
*.exp
*.dylib
app
app_*

View File

@@ -0,0 +1,24 @@
#!/bin/bash
set -e
echo "=== Step 1: Compile each .cpp to object files ==="
g++ -std=c++17 -c foo.cpp -o foo.o
g++ -std=c++17 -c main.cpp -o main.o
echo "Created foo.o and main.o"
echo ""
echo "=== Step 2: Inspect object files with nm ==="
echo "--- foo.o symbols (defines add) ---"
nm -C foo.o
echo ""
echo "--- main.o symbols (references add) ---"
nm -C main.o
echo ""
echo "=== Step 3: Link object files into executable ==="
g++ foo.o main.o -o app
echo "Created app"
echo ""
echo "=== Step 4: Run ==="
./app

View File

@@ -0,0 +1,3 @@
int add(int a, int b) {
return a + b;
}

View File

@@ -0,0 +1,9 @@
#include <iostream>
extern int add(int, int);
int main() {
int r = add(3, 4);
std::cout << "add(3, 4) = " << r << "\n";
return 0;
}

View File

@@ -0,0 +1,23 @@
#!/bin/bash
set -e
echo "=== Compile to object files ==="
g++ -std=c++17 -c math.cpp -o math.o
g++ -std=c++17 -c main.cpp -o main.o
echo ""
echo "=== math.o: exported symbols (demangled) ==="
nm -C -g math.o
echo ""
echo "=== main.o: unresolved (UNDEF) symbols ==="
nm -C -u main.o
echo ""
echo "=== main.o: full symbol table ==="
objdump -t main.o
echo ""
echo "=== Link and run ==="
g++ math.o main.o -o app
./app

View File

@@ -0,0 +1,9 @@
#include <iostream>
double square(double);
int main() {
double r = square(5.0);
std::cout << "square(5.0) = " << r << "\n";
return 0;
}

View File

@@ -0,0 +1,3 @@
#include <iostream>
double square(double x) { return x * x; }

View File

@@ -0,0 +1,27 @@
#!/bin/bash
set -e
echo "=== Compile to object file ==="
g++ -std=c++17 -c mangling.cpp -o mangling.o
echo ""
echo "=== Raw mangled symbols ==="
nm -g mangling.o | grep -E "process|sqrt|dot|legacy"
echo ""
echo "=== Demangled symbols ==="
nm -C -g mangling.o | grep -E "process|sqrt|dot|legacy"
echo ""
echo "=== Demangle individual symbols ==="
echo "_Z7processi" | c++filt
echo "_Z7processd" | c++filt
echo "_Z7processid" | c++filt
echo "_ZN4Math4sqrtEd" | c++filt
echo "_ZN6Vector3dotERKS_" | c++filt
echo "legacy_init" | c++filt
echo ""
echo "=== Build and run ==="
g++ -std=c++17 mangling.o -o app
./app

View File

@@ -0,0 +1,33 @@
#include <iostream>
int process(int x) { return x * 2; }
int process(double x) { return (int)(x * 2); }
int process(int x, double y) { return x + (int)y; }
namespace Math {
double sqrt(double x) { return x * 0.5; }
}
class Vector {
public:
double x, y;
double dot(const Vector& v) { return x * v.x + y * v.y; }
};
extern "C" {
int legacy_init(void) { return 0; }
void legacy_free(void* p) { (void)p; }
}
int main() {
std::cout << "process(3) = " << process(3) << "\n";
std::cout << "process(3.0) = " << process(3.0) << "\n";
std::cout << "process(3, 4.0) = " << process(3, 4.0) << "\n";
std::cout << "Math::sqrt(9) = " << Math::sqrt(9.0) << "\n";
Vector a{1, 2}, b{3, 4};
std::cout << "a.dot(b) = " << a.dot(b) << "\n";
std::cout << "legacy_init() = " << legacy_init() << "\n";
return 0;
}

View File

@@ -0,0 +1,33 @@
#!/bin/bash
set -e
echo "=== Compile object files ==="
g++ -std=c++17 -c vec.cpp -o vec.o
g++ -std=c++17 -c mat.cpp -o mat.o
echo ""
echo "=== Create static library (archive) ==="
ar rcs libmymath.a vec.o mat.o
echo "Created libmymath.a"
echo ""
echo "=== Inspect archive contents ==="
ar t libmymath.a
echo ""
echo "=== Symbols in the archive ==="
nm -C -g libmymath.a
echo ""
echo "=== Compile and link main against the static library ==="
g++ -std=c++17 -c main.cpp -o main.o
g++ main.o -L. -lmymath -o app
echo "Created app"
echo ""
echo "=== Check dynamic dependencies ==="
ldd app || echo "(ldd not available or statically linked)"
echo ""
echo "=== Run ==="
./app

View File

@@ -0,0 +1,23 @@
#include <iostream>
void vec_add(double[3], double[3], double[3]);
void vec_print(double[3]);
void mat_identity(double[9]);
void mat_print(double[9]);
int main() {
double a[3] = {1, 2, 3};
double b[3] = {4, 5, 6};
double c[3];
vec_add(a, b, c);
std::cout << "vec_add: ";
vec_print(c);
double m[9];
mat_identity(m);
std::cout << "identity matrix:\n";
mat_print(m);
return 0;
}

View File

@@ -0,0 +1,12 @@
#include <iostream>
void mat_identity(double m[9]) {
for (int i = 0; i < 9; i++) m[i] = 0.0;
m[0] = m[4] = m[8] = 1.0;
}
void mat_print(double m[9]) {
for (int r = 0; r < 3; r++) {
std::cout << "| " << m[r*3] << " " << m[r*3+1] << " " << m[r*3+2] << " |\n";
}
}

View File

@@ -0,0 +1,9 @@
#include <iostream>
void vec_add(double a[3], double b[3], double out[3]) {
for (int i = 0; i < 3; i++) out[i] = a[i] + b[i];
}
void vec_print(double v[3]) {
std::cout << "(" << v[0] << ", " << v[1] << ", " << v[2] << ")\n";
}

View File

@@ -0,0 +1,29 @@
#!/bin/bash
set -e
echo "=== Compile with -fPIC for position-independent code ==="
g++ -std=c++17 -fPIC -c vec.cpp -o vec.o
g++ -std=c++17 -fPIC -c mat.cpp -o mat.o
echo ""
echo "=== Create shared library ==="
g++ -shared vec.o mat.o -o libmymath.so
echo "Created libmymath.so"
echo ""
echo "=== Symbols exported by the shared library ==="
nm -C -D libmymath.so | grep -E "vec_|mat_"
echo ""
echo "=== Compile and link main dynamically ==="
g++ -std=c++17 -c main.cpp -o main.o
g++ main.o -L. -lmymath -Wl,-rpath,'$ORIGIN' -o app
echo "Created app"
echo ""
echo "=== Runtime dependencies ==="
ldd app
echo ""
echo "=== Run ==="
./app

View File

@@ -0,0 +1,23 @@
#include <iostream>
void vec_add(double[3], double[3], double[3]);
void vec_print(double[3]);
void mat_identity(double[9]);
void mat_print(double[9]);
int main() {
double a[3] = {1, 2, 3};
double b[3] = {4, 5, 6};
double c[3];
vec_add(a, b, c);
std::cout << "vec_add: ";
vec_print(c);
double m[9];
mat_identity(m);
std::cout << "identity matrix:\n";
mat_print(m);
return 0;
}

View File

@@ -0,0 +1,12 @@
#include <iostream>
void mat_identity(double m[9]) {
for (int i = 0; i < 9; i++) m[i] = 0.0;
m[0] = m[4] = m[8] = 1.0;
}
void mat_print(double m[9]) {
for (int r = 0; r < 3; r++) {
std::cout << "| " << m[r*3] << " " << m[r*3+1] << " " << m[r*3+2] << " |\n";
}
}

View File

@@ -0,0 +1,9 @@
#include <iostream>
void vec_add(double a[3], double b[3], double out[3]) {
for (int i = 0; i < 3; i++) out[i] = a[i] + b[i];
}
void vec_print(double v[3]) {
std::cout << "(" << v[0] << ", " << v[1] << ", " << v[2] << ")\n";
}

View File

@@ -0,0 +1,26 @@
#!/bin/bash
set -e
echo "=== Compile C++ wrapper ==="
g++ -std=c++17 -c wrapper.cpp -o wrapper.o
echo ""
echo "=== Compile C main ==="
gcc -std=c11 -c main.c -o main.o
echo ""
echo "=== Symbols in wrapper.o (note: unmangled due to extern C) ==="
nm -g wrapper.o | grep vec_
echo ""
echo "=== Symbols in main.o (C code references) ==="
nm -u main.o | grep vec_
echo ""
echo "=== Link (use g++ to pull in C++ runtime) ==="
g++ wrapper.o main.o -o app
echo "Created app"
echo ""
echo "=== Run ==="
./app

View File

@@ -0,0 +1,19 @@
#include <stdio.h>
#include "wrapper.h"
int main(void) {
void* vec;
vec_create(&vec);
vec_push(vec, 3.14);
vec_push(vec, 2.71);
vec_push(vec, 1.41);
printf("vector size: %d\n", vec_size(vec));
for (int i = 0; i < vec_size(vec); i++) {
printf(" [%d] = %.2f\n", i, vec_get(vec, i));
}
vec_destroy(vec);
return 0;
}

View File

@@ -0,0 +1,24 @@
#include "wrapper.h"
#include <vector>
extern "C" {
void vec_create(void** out) {
*out = new std::vector<double>();
}
void vec_destroy(void* vec) {
delete static_cast<std::vector<double>*>(vec);
}
void vec_push(void* vec, double val) {
static_cast<std::vector<double>*>(vec)->push_back(val);
}
double vec_get(void* vec, int idx) {
return (*static_cast<std::vector<double>*>(vec))[idx];
}
int vec_size(void* vec) {
return static_cast<std::vector<double>*>(vec)->size();
}
}

View File

@@ -0,0 +1,12 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void vec_create(void** out);
void vec_destroy(void* vec);
void vec_push(void* vec, double val);
double vec_get(void* vec, int idx);
int vec_size(void* vec);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,6 @@
#include <iostream>
#include "shared.h"
void print_a() {
std::cout << "a: globalVar = " << globalVar << "\n";
}

View File

@@ -0,0 +1,6 @@
#include <iostream>
#include "shared.h"
void print_b() {
std::cout << "b: globalVar = " << globalVar << "\n";
}

View File

@@ -0,0 +1,10 @@
#!/bin/bash
echo "=== This build SHOULD fail with 'multiple definition' ==="
echo ""
g++ -std=c++17 a.cpp b.cpp main.cpp -o app 2>&1 || true
echo ""
echo "Both a.o and b.o define globalVar because shared.h has a definition."
echo ""
echo "Fix 1: use 'extern int globalVar;' in header, define in one .cpp"
echo "Fix 2: use 'inline int globalVar = 42;' (C++17)"

View File

@@ -0,0 +1,8 @@
void print_a();
void print_b();
int main() {
print_a();
print_b();
return 0;
}

View File

@@ -0,0 +1,13 @@
#pragma once
// BUG: this is a definition, not just a declaration.
// Including this header in multiple TUs causes "multiple definition" errors.
int globalVar = 42;
// FIX (pick one):
// 1. Use extern in header + define in one .cpp:
// extern int globalVar; // in header
// int globalVar = 42; // in one .cpp
//
// 2. Use inline (C++17):
// inline int globalVar = 42;

View File

@@ -0,0 +1,8 @@
#!/bin/bash
echo "=== This build SHOULD fail with 'undefined reference' ==="
echo ""
g++ -std=c++17 main.cpp -o app 2>&1 || true
echo ""
echo "The linker cannot find a definition for add(int, int)."
echo "Fix: provide the .cpp that defines it, or link the correct library."

View File

@@ -0,0 +1,8 @@
#include <iostream>
int add(int, int); // declared but never defined
int main() {
std::cout << add(3, 4) << "\n";
return 0;
}

24
cpplinker/08_lto/build.sh Normal file
View File

@@ -0,0 +1,24 @@
#!/bin/bash
set -e
echo "=== Build WITHOUT LTO ==="
g++ -std=c++17 -O2 -c foo.cpp -o foo.o
g++ -std=c++17 -O2 -c main.cpp -o main.o
g++ -O2 foo.o main.o -o app_nolto -lm
echo "Created app_nolto"
echo ""
echo "=== Build WITH LTO ==="
g++ -std=c++17 -O2 -flto -c foo.cpp -o foo_lto.o
g++ -std=c++17 -O2 -flto -c main.cpp -o main_lto.o
g++ -O2 -flto foo_lto.o main_lto.o -o app_lto -lm
echo "Created app_lto"
echo ""
echo "=== Compare binary sizes ==="
ls -l app_nolto app_lto
echo ""
echo "=== Run both ==="
echo -n "no-lto: "; ./app_nolto
echo -n "lto: "; ./app_lto

9
cpplinker/08_lto/foo.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include <cmath>
double heavy_compute(double x) {
double result = 0;
for (int i = 0; i < 1000; i++) {
result += std::sin(x + i) * std::cos(x - i);
}
return result;
}

View File

@@ -0,0 +1,9 @@
#include <iostream>
double heavy_compute(double);
int main() {
double r = heavy_compute(1.0);
std::cout << "result = " << r << "\n";
return 0;
}

316
cpplinker/cpp_linkers.md Normal file
View File

@@ -0,0 +1,316 @@
# C++ Linkers: From Object Files to Executables
*Symbol Resolution · Static & Dynamic Linking · Name Mangling · Debug Techniques*
---
## 1. What Is a Linker?
The linker is the final step in the build pipeline. It combines multiple object files and libraries into a single executable by:
- **Resolving symbol references** — matching function/variable uses to their definitions across translation units
- **Relocating** — assigning final memory addresses to all symbols
- **Stripping or keeping** debug info, depending on build flags
```
Source Files (.cpp)
↓ [compiler]
Object Files (.o)
↓ [linker: ld / lld / link.exe]
Executable (ELF / Mach-O / PE)
```
---
## 2. The Compilation Pipeline
Each `.cpp` file is compiled independently into a relocatable object file.
```cpp
// foo.cpp — defines the symbol
int add(int a, int b) {
return a + b;
}
// main.cpp — references the symbol
extern int add(int, int); // declaration only
int main() {
int r = add(3, 4); // unresolved reference until link time
return r;
}
```
```bash
g++ -c foo.cpp -o foo.o # compile only, no link
g++ -c main.cpp -o main.o
g++ foo.o main.o -o app # link step
```
The four stages: **Preprocessing****Compilation****Assembly****Linking**
---
## 3. Symbol Resolution
The linker maintains a symbol table and matches every `UNDEF` reference to a `GLOBAL` definition.
```cpp
// math.cpp — defines the symbol
double square(double x) { return x * x; }
// main.cpp — references the symbol
double square(double); // extern declaration
int main() {
return (int)square(5.0); // unresolved until link
}
```
| Symbol | Type | Binding |
|--------|------|---------|
| `_ZN4math6squareEd` | FUNC | GLOBAL |
| `__gxx_personality_v0` | UNDEF | GLOBAL |
**Strong symbols** (definitions) must be unique. **Weak symbols** can be overridden. `UNDEF` means referenced but not yet defined.
```bash
nm -C -g math.o # list exported symbols (demangled)
nm -u main.o # show unresolved (UNDEF) symbols
objdump -t main.o # full symbol table dump
```
---
## 4. Name Mangling
C++ encodes namespaces, class names, and parameter types into symbol names so the linker can distinguish overloads.
```cpp
// C++ overloaded functions → different mangled names
int process(int x); // _Z7processi
int process(double x); // _Z7processd
int process(int x, double y); // _Z7processid
namespace Math {
double sqrt(double x); // _ZN4Math4sqrtEd
}
class Vector {
double dot(const Vector& v); // _ZN6Vector3dotERKS_
};
```
Each compiler uses its own ABI scheme (Itanium ABI on Linux/macOS, MSVC on Windows), so mixing compiler-built objects requires caution.
```bash
# Disable mangling for C interoperability
extern "C" {
int legacy_init(void); // symbol stays: legacy_init
void legacy_free(void*); // symbol stays: legacy_free
}
# Demangle a symbol manually
c++filt _ZN4Math4sqrtEd # → Math::sqrt(double)
```
---
## 5. Static Linking
The linker copies the needed object files from `.a` archives directly into the executable.
```bash
# Build a static library
ar rcs libmymath.a vec.o mat.o quat.o
# Link statically — no runtime dependencies
g++ main.o -L. -lmymath -static -o app_static
# Verify: no shared lib deps
ldd app_static # → statically linked
```
**Pros:** single self-contained binary, no runtime dependency issues, faster startup.
**Cons:** larger binary, security patches require a full rebuild, code is duplicated across binaries.
> **Note:** Link order matters — list object files before libraries: `g++ main.o -lmymath`, not `g++ -lmymath main.o`.
---
## 6. Dynamic Linking
Shared libraries (`.so` / `.dll` / `.dylib`) are loaded at runtime by the dynamic linker (`ld.so`).
```bash
# Build a shared library (-fPIC is required)
g++ -fPIC -shared vec.o mat.o -o libmymath.so
# Link dynamically (default behavior)
g++ main.o -L. -lmymath -Wl,-rpath,'$ORIGIN' -o app
# Inspect runtime dependencies
ldd app
# libmymath.so => ./libmymath.so
# libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6
# Override a library at runtime (useful for mocking)
LD_PRELOAD=./mock_net.so ./app
```
**PLT/GOT mechanism:** external calls go through the *Procedure Linkage Table* (PLT); the *Global Offset Table* (GOT) holds resolved addresses filled in lazily on first call. Use `-z now` or `BIND_NOW` to resolve all symbols at startup instead.
**Pros:** shared memory between processes, hot-patching by replacing `.so`, smaller binaries.
**Cons:** dependency management ("DLL hell"), slight startup overhead, harder single-file deployment.
---
## 7. Static vs. Dynamic: At a Glance
| Aspect | Static (`.a`) | Dynamic (`.so` / `.dll`) |
|---|---|---|
| Resolution | Link time | Load / runtime |
| Binary size | Larger (code embedded) | Smaller (references only) |
| Memory sharing | No — each process has its own copy | Yes — single copy in RAM |
| Deployment | One self-contained file | Must ship `.so` alongside |
| Hot patching | Full relink required | Replace `.so` and restart |
| Startup overhead | Minimal | Dynamic loader adds ~ms |
| Security updates | Manual rebuild | OS-level update propagates |
---
## 8. Linking with C Libraries
Use `extern "C"` to suppress name mangling when calling C code from C++ (or exposing C++ to C callers).
```cpp
// wrapper.h — expose C++ code to C callers
#pragma once
#ifdef __cplusplus
extern "C" { // disables mangling for these symbols
#endif
void vec_create(void** out);
void vec_destroy(void* vec);
void vec_push(void* vec, double val);
double vec_get(void* vec, int idx);
#ifdef __cplusplus
}
#endif
```
```cpp
// Calling a C library from C++
extern "C" int sqlite3_open(const char*, void**);
```
```bash
g++ main.cpp wrapper.cpp -lsqlite3 -o app
# -l<name> → links libname.so or libname.a
# -L<path> → add directory to library search path
# -Wl,--as-needed → skip libs that aren't actually used
```
**Common pitfalls:**
- Forgetting `extern "C"` → mangled name doesn't match the C header
- C struct padding may differ across compilers, breaking ABI
- C code cannot unwind C++ exceptions — use `noexcept` at boundaries
- Link order still matters: objects first, then libraries
---
## 9. Common Linker Errors
### `undefined reference to 'add(int, int)'`
**Cause:** definition is missing or the library wasn't linked.
**Fix:** add `-lmylib` or include the `.cpp` that defines it.
### `multiple definition of 'globalVar'`
**Cause:** variable defined (not just declared) in a header included by multiple TUs.
**Fix:** use `inline` (C++17), or `extern` declaration in the header + one definition in a `.cpp`.
### `cannot find -lmylib`
**Cause:** linker can't locate `libmylib.so` or `libmylib.a`.
**Fix:** add `-L/path/to/lib`, or set `LD_LIBRARY_PATH` / `PKG_CONFIG_PATH`.
---
## 10. Debugging Linker Issues
```bash
# Inspect symbols
nm -C -g lib.a # demangled, global symbols only
nm -u main.o # undefined (unresolved) symbols
objdump -d my.o # disassembly
readelf -s my.o # ELF symbol table
# Trace linker decisions
g++ main.o -lmylib -Wl,--verbose 2>&1 | grep "attempt"
ld --trace my.o # shows each file the linker considers
# Check shared lib deps
ldd ./app
chrpath -l ./app # show embedded RPATH
# Demangle a mangled symbol
c++filt _ZN4Math4sqrtEd # → Math::sqrt(double)
```
**Useful flags:**
- `-Wl,--no-undefined` — catch unresolved symbols at build time
- `-Wl,--as-needed` — skip unused shared libraries
- `-Wl,--start-group ... --end-group` — resolve circular dependencies between archives
> *Rule of thumb: "When in doubt, `nm` it out."*
---
## 11. Bonus: Link-Time Optimization (LTO)
Without LTO, the compiler can only optimize within a single translation unit. With LTO, it embeds IR (Intermediate Representation) in `.o` files and performs whole-program optimization at link time.
```bash
# Enable LTO
g++ -flto -O2 -c foo.cpp -o foo.o
g++ -flto -O2 -c main.cpp -o main.o
g++ -flto -O2 foo.o main.o -o app_lto
# Thin LTO — faster, scales to large codebases (clang)
clang++ -flto=thin -O2 *.cpp -o app
```
LTO enables cross-TU inlining, dead code elimination, inter-procedural constant propagation, and whole-program devirtualization — typically 1025% speedup on real codebases.
**Gotcha:** all TUs must be compiled with `-flto`. Third-party archives compiled without it will still link, but that code won't be optimized across boundaries.
---
## Summary & Quick Reference
**Key concepts**
- Linker: resolves symbols, relocates addresses, produces the binary
- Symbol resolution order: strong > weak > UNDEF
- Name mangling encodes C++ type info into flat symbol names
- `extern "C"` disables mangling for C interoperability
**Essential commands**
| Command | Purpose |
|---|---|
| `nm -C -g lib.a` | List exported symbols (demangled) |
| `c++filt <sym>` | Demangle a symbol |
| `ldd app` | Show shared library dependencies |
| `objdump -d obj` | Disassemble object file |
| `ar rcs lib.a *.o` | Create a static library |
| `readelf -s obj` | ELF symbol table |
**Build flags**
| Flag | Effect |
|---|---|
| `-static` | Link everything statically |
| `-fPIC -shared` | Build a position-independent shared library |
| `-Wl,--no-undefined` | Fail at link time on unresolved symbols |
| `-Wl,-rpath,...` | Embed library search path in binary |
| `-flto` | Enable link-time optimization |
| `-Wl,--as-needed` | Only link libraries that are actually used |