Add esrgan

This commit is contained in:
2026-04-05 18:05:54 +02:00
parent b622786943
commit 5f46ed6d6e
4 changed files with 367 additions and 1 deletions
+3
View File
@@ -13,6 +13,9 @@ wheels/
.DS_Store .DS_Store
Thumbs.db Thumbs.db
# Tools (large binaries)
.tools/
# Video output # Video output
*.mp4 *.mp4
*.mkv *.mkv
+59
View File
@@ -53,3 +53,62 @@ hdzero VID_0013.TS output.mp4 --gpu
# Limit CPU usage # Limit CPU usage
hdzero VID_0013.TS output.mp4 --threads 4 hdzero VID_0013.TS output.mp4 --threads 4
``` ```
---
### `realesrgan`
AI upscaler using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) (NCNN portable executable). Upscales video by extracting frames, running Real-ESRGAN on each frame, then reassembling with original audio.
Place the NCNN executable in `.tools/realesrgan-ncnn-vulkan-*/`.
```sh
realesrgan <input> <output> [options]
```
**Options**
| Flag | Default | Description |
| ---------------- | ------- | --------------------------- |
| `-ss <time>` | — | Start time |
| `-to <time>` | — | End time |
| `--model <name>` | `x4` | Model (see below) |
| `--gpu <id>` | `0` | GPU device ID, `-1` for CPU |
| `--crf <n>` | `18` | Output CRF quality |
**Models**
| Flag | Model | Scale | Best for |
| ----------------- | ----------------------- | ----- | ------------------------ |
| `x4` | realesrgan-x4plus | 4x | Real-world footage (FPV) |
| `x4-anime` | realesrgan-x4plus-anime | 4x | Animation |
| `x2` / `video-x2` | realesr-animevideov3-x2 | 2x | Animation video |
| `x3` / `video-x3` | realesr-animevideov3-x3 | 3x | Animation video |
| `video-x4` | realesr-animevideov3-x4 | 4x | Animation video |
**Examples**
```bash
# Upscale a clip (recommended model for FPV)
realesrgan VID_0013.TS output.mp4 --model x4
# Test on a short segment first
realesrgan VID_0013.TS test.mp4 --model x4 -ss 00:01:00 -to 00:01:10
# CPU only
realesrgan VID_0013.TS output.mp4 --gpu -1
```
---
## To investigate
### SeedVR
[ByteDance-Seed/SeedVR](https://github.com/ByteDance-Seed/SeedVR) — video
super-resolution via diffusion model. Potentially much higher quality than
Real-ESRGAN but significantly heavier:
- Requires Python 3.10, PyTorch, `flash_attn`, `apex` (Linux/CUDA only)
- Minimum 1× H100-80G for 720p; 4× H100 for 1080p+
- Not natively runnable on Windows — needs WSL2 or a Linux machine
+303
View File
@@ -0,0 +1,303 @@
import argparse
import os
import queue
import subprocess
import sys
import tempfile
import threading
import time
import urllib.request
import zipfile
from pathlib import Path
from rich.console import Console
from rich.progress import (
BarColumn,
MofNCompleteColumn,
Progress,
TaskProgressColumn,
TextColumn,
TimeElapsedColumn,
TimeRemainingColumn,
)
console = Console(stderr=True)
MODELS = {
"x4": ("realesrgan-x4plus", 4),
"x4-anime": ("realesrgan-x4plus-anime", 4),
"x2": ("realesr-animevideov3-x2", 2),
"x3": ("realesr-animevideov3-x3", 3),
"video-x2": ("realesr-animevideov3-x2", 2),
"video-x3": ("realesr-animevideov3-x3", 3),
"video-x4": ("realesr-animevideov3-x4", 4),
}
_RELEASE = "realesrgan-ncnn-vulkan-20220424"
_DOWNLOAD_URLS = {
"win32": f"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/{_RELEASE}-windows.zip",
"linux": f"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/{_RELEASE}-ubuntu.zip",
"darwin": f"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/{_RELEASE}-macos.zip",
}
_EXE_NAME = "realesrgan-ncnn-vulkan.exe" if sys.platform == "win32" else "realesrgan-ncnn-vulkan"
def _download_ncnn(tools: Path):
platform = sys.platform
url = _DOWNLOAD_URLS.get(platform)
if not url:
console.print(f"[red]no download available for platform: {platform}[/red]")
sys.exit(1)
tools.mkdir(parents=True, exist_ok=True)
zip_path = tools / f"{_RELEASE}.zip"
console.print(f"[dim]downloading {url}[/dim]")
with Progress(
TextColumn("{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
console=console,
) as progress:
task = progress.add_task("Downloading ", total=None)
def _reporthook(block: int, block_size: int, total: int):
if total > 0:
progress.update(task, total=total, completed=min(block * block_size, total))
urllib.request.urlretrieve(url, zip_path, reporthook=_reporthook)
console.print("[dim]extracting...[/dim]")
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall(tools)
zip_path.unlink()
if sys.platform != "win32":
exe = tools / _RELEASE / _EXE_NAME
exe.chmod(exe.stat().st_mode | 0o111)
def find_executable() -> Path:
tools = Path(__file__).parent / ".tools"
candidates = sorted(tools.glob(f"{_RELEASE}-*"))
if not candidates:
_download_ncnn(tools)
candidates = sorted(tools.glob(f"{_RELEASE}-*"))
exe = candidates[-1] / _EXE_NAME
if not exe.exists():
console.print(f"[red]executable not found: {exe}[/red]")
sys.exit(1)
return exe
def _parse_time(t: str) -> float:
parts = t.split(":")
return sum(float(p) * 60 ** i for i, p in enumerate(reversed(parts)))
def n_frames_to_seconds(ss: str | None, to: str | None, total: float) -> float:
start = _parse_time(ss) if ss else 0.0
end = _parse_time(to) if to else total
return max(0.0, end - start)
def probe_video(path: str) -> dict:
result = subprocess.run(
[
"ffprobe", "-v", "error", "-select_streams", "v:0",
"-show_entries", "stream=r_frame_rate,width,height:format=duration",
"-of", "default=noprint_wrappers=1",
path,
],
capture_output=True, text=True,
)
info: dict[str, str] = {}
for line in result.stdout.splitlines():
k, _, v = line.partition("=")
info[k.strip()] = v.strip()
num, _, den = info.get("r_frame_rate", "30/1").partition("/")
fps = float(num) / float(den or 1)
return {
"fps": fps,
"duration": float(info.get("duration", 0)),
"width": int(info.get("width", 0)),
"height": int(info.get("height", 0)),
}
def _pipe_reader(stream, q: queue.Queue):
for line in iter(stream.readline, b""):
q.put(line.decode(errors="replace").strip())
q.put(None)
def ffmpeg_with_progress(cmd: list[str], label: str, total: float | None) -> int:
t = total or 100.0
with Progress(
TextColumn("{task.description}"),
BarColumn(),
TaskProgressColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
console=console,
) as progress:
task = progress.add_task(label, total=t)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
q: queue.Queue = queue.Queue()
threading.Thread(target=_pipe_reader, args=(proc.stdout, q), daemon=True).start()
threading.Thread(target=_pipe_reader, args=(proc.stderr, queue.Queue()), daemon=True).start()
try:
while True:
line = q.get()
if line is None:
break
key, _, value = line.partition("=")
if key == "out_time" and total:
try:
h, m, s = value.split(":")
elapsed_s = int(h) * 3600 + int(m) * 60 + float(s)
progress.update(task, completed=min(elapsed_s, t))
except ValueError:
pass
elif key == "progress" and value == "end":
progress.update(task, completed=t)
except KeyboardInterrupt:
proc.terminate()
proc.wait()
console.print("\n[yellow]cancelled[/yellow]")
sys.exit(130)
proc.wait()
return proc.returncode
def extract_frames(input_file: str, frames_dir: Path, ss: str | None, to: str | None, duration: float | None) -> int:
cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
if ss:
cmd += ["-ss", ss]
if to:
cmd += ["-to", to]
cmd += ["-i", input_file, str(frames_dir / "%08d.png")]
cmd += ["-progress", "pipe:1", "-stats_period", "0.5"]
rc = ffmpeg_with_progress(cmd, "Extracting ", duration)
if rc != 0:
console.print("[red]frame extraction failed[/red]")
sys.exit(rc)
return len(sorted(frames_dir.glob("*.png")))
def upscale_frames(exe: Path, frames_dir: Path, out_dir: Path, model: str, scale: int, gpu_id: int, n_frames: int):
cmd = [
str(exe),
"-i", str(frames_dir),
"-o", str(out_dir),
"-n", model,
"-s", str(scale),
"-g", str(gpu_id),
"-f", "png",
]
with Progress(
TextColumn("{task.description}"),
BarColumn(),
MofNCompleteColumn(),
TimeElapsedColumn(),
TimeRemainingColumn(),
console=console,
) as progress:
task = progress.add_task("Upscaling ", total=n_frames)
proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
stop = threading.Event()
def _watcher():
while not stop.is_set():
count = len(list(out_dir.glob("*.png")))
progress.update(task, completed=count)
time.sleep(0.5)
threading.Thread(target=_watcher, daemon=True).start()
try:
proc.wait()
except KeyboardInterrupt:
proc.terminate()
proc.wait()
stop.set()
console.print("\n[yellow]cancelled[/yellow]")
sys.exit(130)
stop.set()
if proc.returncode != 0:
console.print("[red]upscaling failed[/red]")
sys.exit(proc.returncode)
def assemble_video(frames_dir: Path, original: str, output: str, fps: float, clip_duration: float | None, crf: int, ss: str | None, to: str | None):
audio_input: list[str] = []
if ss:
audio_input += ["-ss", ss]
if to:
audio_input += ["-to", to]
audio_input += ["-i", original]
cmd = [
"ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
"-framerate", str(fps),
"-i", str(frames_dir / "%08d.png"),
*audio_input,
"-map", "0:v", "-map", "1:a?",
"-c:v", "libx264", "-crf", str(crf), "-preset", "slow",
"-c:a", "copy",
"-progress", "pipe:1", "-stats_period", "0.5",
output,
]
rc = ffmpeg_with_progress(cmd, "Assembling ", clip_duration)
if rc != 0:
console.print("[red]assembly failed[/red]")
sys.exit(rc)
def main():
parser = argparse.ArgumentParser(description="Real-ESRGAN video upscaler (NCNN)")
parser.add_argument("input", help="Input video file")
parser.add_argument("output", help="Output video file")
parser.add_argument("-ss", help="Start time (e.g. 00:01:30 or 90)")
parser.add_argument("-to", help="End time (e.g. 00:02:00 or 120)")
parser.add_argument("--model", default="x4", choices=list(MODELS), help="Model (default: x4)")
parser.add_argument("--gpu", type=int, default=0, help="GPU device ID, -1 for CPU (default: 0)")
parser.add_argument("--crf", type=int, default=18, help="Output CRF quality (default: 18)")
args = parser.parse_args()
exe = find_executable()
model_name, scale = MODELS[args.model]
info = probe_video(args.input)
console.print(
f"[dim]{Path(args.input).name}[/dim] "
f"[cyan]{info['width']}x{info['height']}[/cyan] "
f"[cyan]{info['fps']:.2f} fps[/cyan] "
f"[cyan]{info['duration']:.1f}s[/cyan] "
f"[dim]→ {model_name} ({scale}x)[/dim]"
)
with tempfile.TemporaryDirectory(prefix="esrgan_in_") as frames_tmp, \
tempfile.TemporaryDirectory(prefix="esrgan_out_") as out_tmp:
frames_dir = Path(frames_tmp)
out_dir = Path(out_tmp)
clip_duration = n_frames_to_seconds(args.ss, args.to, info["duration"])
n = extract_frames(args.input, frames_dir, args.ss, args.to, clip_duration)
console.print(f"[dim]{n} frames[/dim]")
upscale_frames(exe, frames_dir, out_dir, model_name, scale, args.gpu, n)
assemble_video(out_dir, args.input, args.output, info["fps"], clip_duration, args.crf, args.ss, args.to)
console.print(f"[green]done[/green] {args.output}")
if __name__ == "__main__":
main()
+2 -1
View File
@@ -11,7 +11,8 @@ requires = ["setuptools"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[tool.setuptools] [tool.setuptools]
py-modules = ["hdzero"] py-modules = ["hdzero", "esrgan"]
[project.scripts] [project.scripts]
hdzero = "hdzero:main" hdzero = "hdzero:main"
realesrgan = "esrgan:main"