From 6f3d98f388e260bee585987cb09a11f5e69fbb3d Mon Sep 17 00:00:00 2001 From: mnerv <24420859+mnerv@users.noreply.github.com> Date: Tue, 24 Feb 2026 10:21:15 +0100 Subject: [PATCH] C++ linker presentation --- cpplinker/.gitignore | 9 + cpplinker/01_compilation_pipeline/build.sh | 24 ++ cpplinker/01_compilation_pipeline/foo.cpp | 3 + cpplinker/01_compilation_pipeline/main.cpp | 9 + cpplinker/02_symbol_resolution/build.sh | 23 ++ cpplinker/02_symbol_resolution/main.cpp | 9 + cpplinker/02_symbol_resolution/math.cpp | 3 + cpplinker/03_name_mangling/build.sh | 27 ++ cpplinker/03_name_mangling/mangling.cpp | 33 ++ cpplinker/04_static_linking/build.sh | 33 ++ cpplinker/04_static_linking/main.cpp | 23 ++ cpplinker/04_static_linking/mat.cpp | 12 + cpplinker/04_static_linking/vec.cpp | 9 + cpplinker/05_dynamic_linking/build.sh | 29 ++ cpplinker/05_dynamic_linking/main.cpp | 23 ++ cpplinker/05_dynamic_linking/mat.cpp | 12 + cpplinker/05_dynamic_linking/vec.cpp | 9 + cpplinker/06_c_interop/build.sh | 26 ++ cpplinker/06_c_interop/main.c | 19 ++ cpplinker/06_c_interop/wrapper.cpp | 24 ++ cpplinker/06_c_interop/wrapper.h | 12 + cpplinker/07_linker_errors/multiple_def/a.cpp | 6 + cpplinker/07_linker_errors/multiple_def/b.cpp | 6 + .../07_linker_errors/multiple_def/build.sh | 10 + .../07_linker_errors/multiple_def/main.cpp | 8 + .../07_linker_errors/multiple_def/shared.h | 13 + .../07_linker_errors/undefined_ref/build.sh | 8 + .../07_linker_errors/undefined_ref/main.cpp | 8 + cpplinker/08_lto/build.sh | 24 ++ cpplinker/08_lto/foo.cpp | 9 + cpplinker/08_lto/main.cpp | 9 + cpplinker/cpp_linkers.md | 316 ++++++++++++++++++ 32 files changed, 788 insertions(+) create mode 100644 cpplinker/.gitignore create mode 100644 cpplinker/01_compilation_pipeline/build.sh create mode 100644 cpplinker/01_compilation_pipeline/foo.cpp create mode 100644 cpplinker/01_compilation_pipeline/main.cpp create mode 100644 cpplinker/02_symbol_resolution/build.sh create mode 100644 cpplinker/02_symbol_resolution/main.cpp create mode 100644 cpplinker/02_symbol_resolution/math.cpp create mode 100644 cpplinker/03_name_mangling/build.sh create mode 100644 cpplinker/03_name_mangling/mangling.cpp create mode 100644 cpplinker/04_static_linking/build.sh create mode 100644 cpplinker/04_static_linking/main.cpp create mode 100644 cpplinker/04_static_linking/mat.cpp create mode 100644 cpplinker/04_static_linking/vec.cpp create mode 100644 cpplinker/05_dynamic_linking/build.sh create mode 100644 cpplinker/05_dynamic_linking/main.cpp create mode 100644 cpplinker/05_dynamic_linking/mat.cpp create mode 100644 cpplinker/05_dynamic_linking/vec.cpp create mode 100644 cpplinker/06_c_interop/build.sh create mode 100644 cpplinker/06_c_interop/main.c create mode 100644 cpplinker/06_c_interop/wrapper.cpp create mode 100644 cpplinker/06_c_interop/wrapper.h create mode 100644 cpplinker/07_linker_errors/multiple_def/a.cpp create mode 100644 cpplinker/07_linker_errors/multiple_def/b.cpp create mode 100644 cpplinker/07_linker_errors/multiple_def/build.sh create mode 100644 cpplinker/07_linker_errors/multiple_def/main.cpp create mode 100644 cpplinker/07_linker_errors/multiple_def/shared.h create mode 100644 cpplinker/07_linker_errors/undefined_ref/build.sh create mode 100644 cpplinker/07_linker_errors/undefined_ref/main.cpp create mode 100644 cpplinker/08_lto/build.sh create mode 100644 cpplinker/08_lto/foo.cpp create mode 100644 cpplinker/08_lto/main.cpp create mode 100644 cpplinker/cpp_linkers.md diff --git a/cpplinker/.gitignore b/cpplinker/.gitignore new file mode 100644 index 0000000..03b610f --- /dev/null +++ b/cpplinker/.gitignore @@ -0,0 +1,9 @@ +*.o +*.a +*.so +*.dll +*.lib +*.exp +*.dylib +app +app_* diff --git a/cpplinker/01_compilation_pipeline/build.sh b/cpplinker/01_compilation_pipeline/build.sh new file mode 100644 index 0000000..f4a8e43 --- /dev/null +++ b/cpplinker/01_compilation_pipeline/build.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +echo "=== Step 1: Compile each .cpp to object files ===" +g++ -std=c++17 -c foo.cpp -o foo.o +g++ -std=c++17 -c main.cpp -o main.o +echo "Created foo.o and main.o" + +echo "" +echo "=== Step 2: Inspect object files with nm ===" +echo "--- foo.o symbols (defines add) ---" +nm -C foo.o +echo "" +echo "--- main.o symbols (references add) ---" +nm -C main.o + +echo "" +echo "=== Step 3: Link object files into executable ===" +g++ foo.o main.o -o app +echo "Created app" + +echo "" +echo "=== Step 4: Run ===" +./app diff --git a/cpplinker/01_compilation_pipeline/foo.cpp b/cpplinker/01_compilation_pipeline/foo.cpp new file mode 100644 index 0000000..59aab49 --- /dev/null +++ b/cpplinker/01_compilation_pipeline/foo.cpp @@ -0,0 +1,3 @@ +int add(int a, int b) { + return a + b; +} diff --git a/cpplinker/01_compilation_pipeline/main.cpp b/cpplinker/01_compilation_pipeline/main.cpp new file mode 100644 index 0000000..c641ae9 --- /dev/null +++ b/cpplinker/01_compilation_pipeline/main.cpp @@ -0,0 +1,9 @@ +#include + +extern int add(int, int); + +int main() { + int r = add(3, 4); + std::cout << "add(3, 4) = " << r << "\n"; + return 0; +} diff --git a/cpplinker/02_symbol_resolution/build.sh b/cpplinker/02_symbol_resolution/build.sh new file mode 100644 index 0000000..e185747 --- /dev/null +++ b/cpplinker/02_symbol_resolution/build.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +echo "=== Compile to object files ===" +g++ -std=c++17 -c math.cpp -o math.o +g++ -std=c++17 -c main.cpp -o main.o + +echo "" +echo "=== math.o: exported symbols (demangled) ===" +nm -C -g math.o + +echo "" +echo "=== main.o: unresolved (UNDEF) symbols ===" +nm -C -u main.o + +echo "" +echo "=== main.o: full symbol table ===" +objdump -t main.o + +echo "" +echo "=== Link and run ===" +g++ math.o main.o -o app +./app diff --git a/cpplinker/02_symbol_resolution/main.cpp b/cpplinker/02_symbol_resolution/main.cpp new file mode 100644 index 0000000..738d3df --- /dev/null +++ b/cpplinker/02_symbol_resolution/main.cpp @@ -0,0 +1,9 @@ +#include + +double square(double); + +int main() { + double r = square(5.0); + std::cout << "square(5.0) = " << r << "\n"; + return 0; +} diff --git a/cpplinker/02_symbol_resolution/math.cpp b/cpplinker/02_symbol_resolution/math.cpp new file mode 100644 index 0000000..471b647 --- /dev/null +++ b/cpplinker/02_symbol_resolution/math.cpp @@ -0,0 +1,3 @@ +#include + +double square(double x) { return x * x; } diff --git a/cpplinker/03_name_mangling/build.sh b/cpplinker/03_name_mangling/build.sh new file mode 100644 index 0000000..3606dbe --- /dev/null +++ b/cpplinker/03_name_mangling/build.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "=== Compile to object file ===" +g++ -std=c++17 -c mangling.cpp -o mangling.o + +echo "" +echo "=== Raw mangled symbols ===" +nm -g mangling.o | grep -E "process|sqrt|dot|legacy" + +echo "" +echo "=== Demangled symbols ===" +nm -C -g mangling.o | grep -E "process|sqrt|dot|legacy" + +echo "" +echo "=== Demangle individual symbols ===" +echo "_Z7processi" | c++filt +echo "_Z7processd" | c++filt +echo "_Z7processid" | c++filt +echo "_ZN4Math4sqrtEd" | c++filt +echo "_ZN6Vector3dotERKS_" | c++filt +echo "legacy_init" | c++filt + +echo "" +echo "=== Build and run ===" +g++ -std=c++17 mangling.o -o app +./app diff --git a/cpplinker/03_name_mangling/mangling.cpp b/cpplinker/03_name_mangling/mangling.cpp new file mode 100644 index 0000000..2791e10 --- /dev/null +++ b/cpplinker/03_name_mangling/mangling.cpp @@ -0,0 +1,33 @@ +#include + +int process(int x) { return x * 2; } +int process(double x) { return (int)(x * 2); } +int process(int x, double y) { return x + (int)y; } + +namespace Math { + double sqrt(double x) { return x * 0.5; } +} + +class Vector { +public: + double x, y; + double dot(const Vector& v) { return x * v.x + y * v.y; } +}; + +extern "C" { + int legacy_init(void) { return 0; } + void legacy_free(void* p) { (void)p; } +} + +int main() { + std::cout << "process(3) = " << process(3) << "\n"; + std::cout << "process(3.0) = " << process(3.0) << "\n"; + std::cout << "process(3, 4.0) = " << process(3, 4.0) << "\n"; + std::cout << "Math::sqrt(9) = " << Math::sqrt(9.0) << "\n"; + + Vector a{1, 2}, b{3, 4}; + std::cout << "a.dot(b) = " << a.dot(b) << "\n"; + + std::cout << "legacy_init() = " << legacy_init() << "\n"; + return 0; +} diff --git a/cpplinker/04_static_linking/build.sh b/cpplinker/04_static_linking/build.sh new file mode 100644 index 0000000..e06f6c3 --- /dev/null +++ b/cpplinker/04_static_linking/build.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e + +echo "=== Compile object files ===" +g++ -std=c++17 -c vec.cpp -o vec.o +g++ -std=c++17 -c mat.cpp -o mat.o + +echo "" +echo "=== Create static library (archive) ===" +ar rcs libmymath.a vec.o mat.o +echo "Created libmymath.a" + +echo "" +echo "=== Inspect archive contents ===" +ar t libmymath.a + +echo "" +echo "=== Symbols in the archive ===" +nm -C -g libmymath.a + +echo "" +echo "=== Compile and link main against the static library ===" +g++ -std=c++17 -c main.cpp -o main.o +g++ main.o -L. -lmymath -o app +echo "Created app" + +echo "" +echo "=== Check dynamic dependencies ===" +ldd app || echo "(ldd not available or statically linked)" + +echo "" +echo "=== Run ===" +./app diff --git a/cpplinker/04_static_linking/main.cpp b/cpplinker/04_static_linking/main.cpp new file mode 100644 index 0000000..b826340 --- /dev/null +++ b/cpplinker/04_static_linking/main.cpp @@ -0,0 +1,23 @@ +#include + +void vec_add(double[3], double[3], double[3]); +void vec_print(double[3]); +void mat_identity(double[9]); +void mat_print(double[9]); + +int main() { + double a[3] = {1, 2, 3}; + double b[3] = {4, 5, 6}; + double c[3]; + + vec_add(a, b, c); + std::cout << "vec_add: "; + vec_print(c); + + double m[9]; + mat_identity(m); + std::cout << "identity matrix:\n"; + mat_print(m); + + return 0; +} diff --git a/cpplinker/04_static_linking/mat.cpp b/cpplinker/04_static_linking/mat.cpp new file mode 100644 index 0000000..a327616 --- /dev/null +++ b/cpplinker/04_static_linking/mat.cpp @@ -0,0 +1,12 @@ +#include + +void mat_identity(double m[9]) { + for (int i = 0; i < 9; i++) m[i] = 0.0; + m[0] = m[4] = m[8] = 1.0; +} + +void mat_print(double m[9]) { + for (int r = 0; r < 3; r++) { + std::cout << "| " << m[r*3] << " " << m[r*3+1] << " " << m[r*3+2] << " |\n"; + } +} diff --git a/cpplinker/04_static_linking/vec.cpp b/cpplinker/04_static_linking/vec.cpp new file mode 100644 index 0000000..addea6c --- /dev/null +++ b/cpplinker/04_static_linking/vec.cpp @@ -0,0 +1,9 @@ +#include + +void vec_add(double a[3], double b[3], double out[3]) { + for (int i = 0; i < 3; i++) out[i] = a[i] + b[i]; +} + +void vec_print(double v[3]) { + std::cout << "(" << v[0] << ", " << v[1] << ", " << v[2] << ")\n"; +} diff --git a/cpplinker/05_dynamic_linking/build.sh b/cpplinker/05_dynamic_linking/build.sh new file mode 100644 index 0000000..d54f0fa --- /dev/null +++ b/cpplinker/05_dynamic_linking/build.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -e + +echo "=== Compile with -fPIC for position-independent code ===" +g++ -std=c++17 -fPIC -c vec.cpp -o vec.o +g++ -std=c++17 -fPIC -c mat.cpp -o mat.o + +echo "" +echo "=== Create shared library ===" +g++ -shared vec.o mat.o -o libmymath.so +echo "Created libmymath.so" + +echo "" +echo "=== Symbols exported by the shared library ===" +nm -C -D libmymath.so | grep -E "vec_|mat_" + +echo "" +echo "=== Compile and link main dynamically ===" +g++ -std=c++17 -c main.cpp -o main.o +g++ main.o -L. -lmymath -Wl,-rpath,'$ORIGIN' -o app +echo "Created app" + +echo "" +echo "=== Runtime dependencies ===" +ldd app + +echo "" +echo "=== Run ===" +./app diff --git a/cpplinker/05_dynamic_linking/main.cpp b/cpplinker/05_dynamic_linking/main.cpp new file mode 100644 index 0000000..b826340 --- /dev/null +++ b/cpplinker/05_dynamic_linking/main.cpp @@ -0,0 +1,23 @@ +#include + +void vec_add(double[3], double[3], double[3]); +void vec_print(double[3]); +void mat_identity(double[9]); +void mat_print(double[9]); + +int main() { + double a[3] = {1, 2, 3}; + double b[3] = {4, 5, 6}; + double c[3]; + + vec_add(a, b, c); + std::cout << "vec_add: "; + vec_print(c); + + double m[9]; + mat_identity(m); + std::cout << "identity matrix:\n"; + mat_print(m); + + return 0; +} diff --git a/cpplinker/05_dynamic_linking/mat.cpp b/cpplinker/05_dynamic_linking/mat.cpp new file mode 100644 index 0000000..a327616 --- /dev/null +++ b/cpplinker/05_dynamic_linking/mat.cpp @@ -0,0 +1,12 @@ +#include + +void mat_identity(double m[9]) { + for (int i = 0; i < 9; i++) m[i] = 0.0; + m[0] = m[4] = m[8] = 1.0; +} + +void mat_print(double m[9]) { + for (int r = 0; r < 3; r++) { + std::cout << "| " << m[r*3] << " " << m[r*3+1] << " " << m[r*3+2] << " |\n"; + } +} diff --git a/cpplinker/05_dynamic_linking/vec.cpp b/cpplinker/05_dynamic_linking/vec.cpp new file mode 100644 index 0000000..addea6c --- /dev/null +++ b/cpplinker/05_dynamic_linking/vec.cpp @@ -0,0 +1,9 @@ +#include + +void vec_add(double a[3], double b[3], double out[3]) { + for (int i = 0; i < 3; i++) out[i] = a[i] + b[i]; +} + +void vec_print(double v[3]) { + std::cout << "(" << v[0] << ", " << v[1] << ", " << v[2] << ")\n"; +} diff --git a/cpplinker/06_c_interop/build.sh b/cpplinker/06_c_interop/build.sh new file mode 100644 index 0000000..8f1c0a3 --- /dev/null +++ b/cpplinker/06_c_interop/build.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -e + +echo "=== Compile C++ wrapper ===" +g++ -std=c++17 -c wrapper.cpp -o wrapper.o + +echo "" +echo "=== Compile C main ===" +gcc -std=c11 -c main.c -o main.o + +echo "" +echo "=== Symbols in wrapper.o (note: unmangled due to extern C) ===" +nm -g wrapper.o | grep vec_ + +echo "" +echo "=== Symbols in main.o (C code references) ===" +nm -u main.o | grep vec_ + +echo "" +echo "=== Link (use g++ to pull in C++ runtime) ===" +g++ wrapper.o main.o -o app +echo "Created app" + +echo "" +echo "=== Run ===" +./app diff --git a/cpplinker/06_c_interop/main.c b/cpplinker/06_c_interop/main.c new file mode 100644 index 0000000..b017f2c --- /dev/null +++ b/cpplinker/06_c_interop/main.c @@ -0,0 +1,19 @@ +#include +#include "wrapper.h" + +int main(void) { + void* vec; + vec_create(&vec); + + vec_push(vec, 3.14); + vec_push(vec, 2.71); + vec_push(vec, 1.41); + + printf("vector size: %d\n", vec_size(vec)); + for (int i = 0; i < vec_size(vec); i++) { + printf(" [%d] = %.2f\n", i, vec_get(vec, i)); + } + + vec_destroy(vec); + return 0; +} diff --git a/cpplinker/06_c_interop/wrapper.cpp b/cpplinker/06_c_interop/wrapper.cpp new file mode 100644 index 0000000..a621e29 --- /dev/null +++ b/cpplinker/06_c_interop/wrapper.cpp @@ -0,0 +1,24 @@ +#include "wrapper.h" +#include + +extern "C" { +void vec_create(void** out) { + *out = new std::vector(); +} + +void vec_destroy(void* vec) { + delete static_cast*>(vec); +} + +void vec_push(void* vec, double val) { + static_cast*>(vec)->push_back(val); +} + +double vec_get(void* vec, int idx) { + return (*static_cast*>(vec))[idx]; +} + +int vec_size(void* vec) { + return static_cast*>(vec)->size(); +} +} diff --git a/cpplinker/06_c_interop/wrapper.h b/cpplinker/06_c_interop/wrapper.h new file mode 100644 index 0000000..6c39d1a --- /dev/null +++ b/cpplinker/06_c_interop/wrapper.h @@ -0,0 +1,12 @@ +#pragma once +#ifdef __cplusplus +extern "C" { +#endif + void vec_create(void** out); + void vec_destroy(void* vec); + void vec_push(void* vec, double val); + double vec_get(void* vec, int idx); + int vec_size(void* vec); +#ifdef __cplusplus +} +#endif diff --git a/cpplinker/07_linker_errors/multiple_def/a.cpp b/cpplinker/07_linker_errors/multiple_def/a.cpp new file mode 100644 index 0000000..7b2edfc --- /dev/null +++ b/cpplinker/07_linker_errors/multiple_def/a.cpp @@ -0,0 +1,6 @@ +#include +#include "shared.h" + +void print_a() { + std::cout << "a: globalVar = " << globalVar << "\n"; +} diff --git a/cpplinker/07_linker_errors/multiple_def/b.cpp b/cpplinker/07_linker_errors/multiple_def/b.cpp new file mode 100644 index 0000000..082fe1e --- /dev/null +++ b/cpplinker/07_linker_errors/multiple_def/b.cpp @@ -0,0 +1,6 @@ +#include +#include "shared.h" + +void print_b() { + std::cout << "b: globalVar = " << globalVar << "\n"; +} diff --git a/cpplinker/07_linker_errors/multiple_def/build.sh b/cpplinker/07_linker_errors/multiple_def/build.sh new file mode 100644 index 0000000..288f582 --- /dev/null +++ b/cpplinker/07_linker_errors/multiple_def/build.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +echo "=== This build SHOULD fail with 'multiple definition' ===" +echo "" +g++ -std=c++17 a.cpp b.cpp main.cpp -o app 2>&1 || true +echo "" +echo "Both a.o and b.o define globalVar because shared.h has a definition." +echo "" +echo "Fix 1: use 'extern int globalVar;' in header, define in one .cpp" +echo "Fix 2: use 'inline int globalVar = 42;' (C++17)" diff --git a/cpplinker/07_linker_errors/multiple_def/main.cpp b/cpplinker/07_linker_errors/multiple_def/main.cpp new file mode 100644 index 0000000..b34d64a --- /dev/null +++ b/cpplinker/07_linker_errors/multiple_def/main.cpp @@ -0,0 +1,8 @@ +void print_a(); +void print_b(); + +int main() { + print_a(); + print_b(); + return 0; +} diff --git a/cpplinker/07_linker_errors/multiple_def/shared.h b/cpplinker/07_linker_errors/multiple_def/shared.h new file mode 100644 index 0000000..0376f58 --- /dev/null +++ b/cpplinker/07_linker_errors/multiple_def/shared.h @@ -0,0 +1,13 @@ +#pragma once + +// BUG: this is a definition, not just a declaration. +// Including this header in multiple TUs causes "multiple definition" errors. +int globalVar = 42; + +// FIX (pick one): +// 1. Use extern in header + define in one .cpp: +// extern int globalVar; // in header +// int globalVar = 42; // in one .cpp +// +// 2. Use inline (C++17): +// inline int globalVar = 42; diff --git a/cpplinker/07_linker_errors/undefined_ref/build.sh b/cpplinker/07_linker_errors/undefined_ref/build.sh new file mode 100644 index 0000000..2119d29 --- /dev/null +++ b/cpplinker/07_linker_errors/undefined_ref/build.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "=== This build SHOULD fail with 'undefined reference' ===" +echo "" +g++ -std=c++17 main.cpp -o app 2>&1 || true +echo "" +echo "The linker cannot find a definition for add(int, int)." +echo "Fix: provide the .cpp that defines it, or link the correct library." diff --git a/cpplinker/07_linker_errors/undefined_ref/main.cpp b/cpplinker/07_linker_errors/undefined_ref/main.cpp new file mode 100644 index 0000000..0a8223e --- /dev/null +++ b/cpplinker/07_linker_errors/undefined_ref/main.cpp @@ -0,0 +1,8 @@ +#include + +int add(int, int); // declared but never defined + +int main() { + std::cout << add(3, 4) << "\n"; + return 0; +} diff --git a/cpplinker/08_lto/build.sh b/cpplinker/08_lto/build.sh new file mode 100644 index 0000000..c623c7f --- /dev/null +++ b/cpplinker/08_lto/build.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +echo "=== Build WITHOUT LTO ===" +g++ -std=c++17 -O2 -c foo.cpp -o foo.o +g++ -std=c++17 -O2 -c main.cpp -o main.o +g++ -O2 foo.o main.o -o app_nolto -lm +echo "Created app_nolto" + +echo "" +echo "=== Build WITH LTO ===" +g++ -std=c++17 -O2 -flto -c foo.cpp -o foo_lto.o +g++ -std=c++17 -O2 -flto -c main.cpp -o main_lto.o +g++ -O2 -flto foo_lto.o main_lto.o -o app_lto -lm +echo "Created app_lto" + +echo "" +echo "=== Compare binary sizes ===" +ls -l app_nolto app_lto + +echo "" +echo "=== Run both ===" +echo -n "no-lto: "; ./app_nolto +echo -n "lto: "; ./app_lto diff --git a/cpplinker/08_lto/foo.cpp b/cpplinker/08_lto/foo.cpp new file mode 100644 index 0000000..fb6d479 --- /dev/null +++ b/cpplinker/08_lto/foo.cpp @@ -0,0 +1,9 @@ +#include + +double heavy_compute(double x) { + double result = 0; + for (int i = 0; i < 1000; i++) { + result += std::sin(x + i) * std::cos(x - i); + } + return result; +} diff --git a/cpplinker/08_lto/main.cpp b/cpplinker/08_lto/main.cpp new file mode 100644 index 0000000..f004c33 --- /dev/null +++ b/cpplinker/08_lto/main.cpp @@ -0,0 +1,9 @@ +#include + +double heavy_compute(double); + +int main() { + double r = heavy_compute(1.0); + std::cout << "result = " << r << "\n"; + return 0; +} diff --git a/cpplinker/cpp_linkers.md b/cpplinker/cpp_linkers.md new file mode 100644 index 0000000..bdd5edf --- /dev/null +++ b/cpplinker/cpp_linkers.md @@ -0,0 +1,316 @@ +# C++ Linkers: From Object Files to Executables + +*Symbol Resolution · Static & Dynamic Linking · Name Mangling · Debug Techniques* + +--- + +## 1. What Is a Linker? + +The linker is the final step in the build pipeline. It combines multiple object files and libraries into a single executable by: + +- **Resolving symbol references** — matching function/variable uses to their definitions across translation units +- **Relocating** — assigning final memory addresses to all symbols +- **Stripping or keeping** debug info, depending on build flags + +``` +Source Files (.cpp) + ↓ [compiler] +Object Files (.o) + ↓ [linker: ld / lld / link.exe] +Executable (ELF / Mach-O / PE) +``` + +--- + +## 2. The Compilation Pipeline + +Each `.cpp` file is compiled independently into a relocatable object file. + +```cpp +// foo.cpp — defines the symbol +int add(int a, int b) { + return a + b; +} + +// main.cpp — references the symbol +extern int add(int, int); // declaration only +int main() { + int r = add(3, 4); // unresolved reference until link time + return r; +} +``` + +```bash +g++ -c foo.cpp -o foo.o # compile only, no link +g++ -c main.cpp -o main.o +g++ foo.o main.o -o app # link step +``` + +The four stages: **Preprocessing** → **Compilation** → **Assembly** → **Linking** + +--- + +## 3. Symbol Resolution + +The linker maintains a symbol table and matches every `UNDEF` reference to a `GLOBAL` definition. + +```cpp +// math.cpp — defines the symbol +double square(double x) { return x * x; } + +// main.cpp — references the symbol +double square(double); // extern declaration +int main() { + return (int)square(5.0); // unresolved until link +} +``` + +| Symbol | Type | Binding | +|--------|------|---------| +| `_ZN4math6squareEd` | FUNC | GLOBAL | +| `__gxx_personality_v0` | UNDEF | GLOBAL | + +**Strong symbols** (definitions) must be unique. **Weak symbols** can be overridden. `UNDEF` means referenced but not yet defined. + +```bash +nm -C -g math.o # list exported symbols (demangled) +nm -u main.o # show unresolved (UNDEF) symbols +objdump -t main.o # full symbol table dump +``` + +--- + +## 4. Name Mangling + +C++ encodes namespaces, class names, and parameter types into symbol names so the linker can distinguish overloads. + +```cpp +// C++ overloaded functions → different mangled names +int process(int x); // _Z7processi +int process(double x); // _Z7processd +int process(int x, double y); // _Z7processid + +namespace Math { + double sqrt(double x); // _ZN4Math4sqrtEd +} + +class Vector { + double dot(const Vector& v); // _ZN6Vector3dotERKS_ +}; +``` + +Each compiler uses its own ABI scheme (Itanium ABI on Linux/macOS, MSVC on Windows), so mixing compiler-built objects requires caution. + +```bash +# Disable mangling for C interoperability +extern "C" { + int legacy_init(void); // symbol stays: legacy_init + void legacy_free(void*); // symbol stays: legacy_free +} + +# Demangle a symbol manually +c++filt _ZN4Math4sqrtEd # → Math::sqrt(double) +``` + +--- + +## 5. Static Linking + +The linker copies the needed object files from `.a` archives directly into the executable. + +```bash +# Build a static library +ar rcs libmymath.a vec.o mat.o quat.o + +# Link statically — no runtime dependencies +g++ main.o -L. -lmymath -static -o app_static + +# Verify: no shared lib deps +ldd app_static # → statically linked +``` + +**Pros:** single self-contained binary, no runtime dependency issues, faster startup. +**Cons:** larger binary, security patches require a full rebuild, code is duplicated across binaries. + +> **Note:** Link order matters — list object files before libraries: `g++ main.o -lmymath`, not `g++ -lmymath main.o`. + +--- + +## 6. Dynamic Linking + +Shared libraries (`.so` / `.dll` / `.dylib`) are loaded at runtime by the dynamic linker (`ld.so`). + +```bash +# Build a shared library (-fPIC is required) +g++ -fPIC -shared vec.o mat.o -o libmymath.so + +# Link dynamically (default behavior) +g++ main.o -L. -lmymath -Wl,-rpath,'$ORIGIN' -o app + +# Inspect runtime dependencies +ldd app +# libmymath.so => ./libmymath.so +# libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 + +# Override a library at runtime (useful for mocking) +LD_PRELOAD=./mock_net.so ./app +``` + +**PLT/GOT mechanism:** external calls go through the *Procedure Linkage Table* (PLT); the *Global Offset Table* (GOT) holds resolved addresses filled in lazily on first call. Use `-z now` or `BIND_NOW` to resolve all symbols at startup instead. + +**Pros:** shared memory between processes, hot-patching by replacing `.so`, smaller binaries. +**Cons:** dependency management ("DLL hell"), slight startup overhead, harder single-file deployment. + +--- + +## 7. Static vs. Dynamic: At a Glance + +| Aspect | Static (`.a`) | Dynamic (`.so` / `.dll`) | +|---|---|---| +| Resolution | Link time | Load / runtime | +| Binary size | Larger (code embedded) | Smaller (references only) | +| Memory sharing | No — each process has its own copy | Yes — single copy in RAM | +| Deployment | One self-contained file | Must ship `.so` alongside | +| Hot patching | Full relink required | Replace `.so` and restart | +| Startup overhead | Minimal | Dynamic loader adds ~ms | +| Security updates | Manual rebuild | OS-level update propagates | + +--- + +## 8. Linking with C Libraries + +Use `extern "C"` to suppress name mangling when calling C code from C++ (or exposing C++ to C callers). + +```cpp +// wrapper.h — expose C++ code to C callers +#pragma once +#ifdef __cplusplus +extern "C" { // disables mangling for these symbols +#endif + void vec_create(void** out); + void vec_destroy(void* vec); + void vec_push(void* vec, double val); + double vec_get(void* vec, int idx); +#ifdef __cplusplus +} +#endif +``` + +```cpp +// Calling a C library from C++ +extern "C" int sqlite3_open(const char*, void**); +``` + +```bash +g++ main.cpp wrapper.cpp -lsqlite3 -o app +# -l → links libname.so or libname.a +# -L → add directory to library search path +# -Wl,--as-needed → skip libs that aren't actually used +``` + +**Common pitfalls:** +- Forgetting `extern "C"` → mangled name doesn't match the C header +- C struct padding may differ across compilers, breaking ABI +- C code cannot unwind C++ exceptions — use `noexcept` at boundaries +- Link order still matters: objects first, then libraries + +--- + +## 9. Common Linker Errors + +### `undefined reference to 'add(int, int)'` + +**Cause:** definition is missing or the library wasn't linked. +**Fix:** add `-lmylib` or include the `.cpp` that defines it. + +### `multiple definition of 'globalVar'` + +**Cause:** variable defined (not just declared) in a header included by multiple TUs. +**Fix:** use `inline` (C++17), or `extern` declaration in the header + one definition in a `.cpp`. + +### `cannot find -lmylib` + +**Cause:** linker can't locate `libmylib.so` or `libmylib.a`. +**Fix:** add `-L/path/to/lib`, or set `LD_LIBRARY_PATH` / `PKG_CONFIG_PATH`. + +--- + +## 10. Debugging Linker Issues + +```bash +# Inspect symbols +nm -C -g lib.a # demangled, global symbols only +nm -u main.o # undefined (unresolved) symbols +objdump -d my.o # disassembly +readelf -s my.o # ELF symbol table + +# Trace linker decisions +g++ main.o -lmylib -Wl,--verbose 2>&1 | grep "attempt" +ld --trace my.o # shows each file the linker considers + +# Check shared lib deps +ldd ./app +chrpath -l ./app # show embedded RPATH + +# Demangle a mangled symbol +c++filt _ZN4Math4sqrtEd # → Math::sqrt(double) +``` + +**Useful flags:** +- `-Wl,--no-undefined` — catch unresolved symbols at build time +- `-Wl,--as-needed` — skip unused shared libraries +- `-Wl,--start-group ... --end-group` — resolve circular dependencies between archives + +> *Rule of thumb: "When in doubt, `nm` it out."* + +--- + +## 11. Bonus: Link-Time Optimization (LTO) + +Without LTO, the compiler can only optimize within a single translation unit. With LTO, it embeds IR (Intermediate Representation) in `.o` files and performs whole-program optimization at link time. + +```bash +# Enable LTO +g++ -flto -O2 -c foo.cpp -o foo.o +g++ -flto -O2 -c main.cpp -o main.o +g++ -flto -O2 foo.o main.o -o app_lto + +# Thin LTO — faster, scales to large codebases (clang) +clang++ -flto=thin -O2 *.cpp -o app +``` + +LTO enables cross-TU inlining, dead code elimination, inter-procedural constant propagation, and whole-program devirtualization — typically 10–25% speedup on real codebases. + +**Gotcha:** all TUs must be compiled with `-flto`. Third-party archives compiled without it will still link, but that code won't be optimized across boundaries. + +--- + +## Summary & Quick Reference + +**Key concepts** +- Linker: resolves symbols, relocates addresses, produces the binary +- Symbol resolution order: strong > weak > UNDEF +- Name mangling encodes C++ type info into flat symbol names +- `extern "C"` disables mangling for C interoperability + +**Essential commands** + +| Command | Purpose | +|---|---| +| `nm -C -g lib.a` | List exported symbols (demangled) | +| `c++filt ` | Demangle a symbol | +| `ldd app` | Show shared library dependencies | +| `objdump -d obj` | Disassemble object file | +| `ar rcs lib.a *.o` | Create a static library | +| `readelf -s obj` | ELF symbol table | + +**Build flags** + +| Flag | Effect | +|---|---| +| `-static` | Link everything statically | +| `-fPIC -shared` | Build a position-independent shared library | +| `-Wl,--no-undefined` | Fail at link time on unresolved symbols | +| `-Wl,-rpath,...` | Embed library search path in binary | +| `-flto` | Enable link-time optimization | +| `-Wl,--as-needed` | Only link libraries that are actually used |