From ae7bc367a23c3ac5e2a6e71987abc29a5cad2a92 Mon Sep 17 00:00:00 2001
From: mnerv <me@mononerv.dev>
Date: Sun, 10 Sep 2023 18:15:54 +0200
Subject: [PATCH] Refactor: Media class with tracks

---
 ffmini.cpp | 335 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 210 insertions(+), 125 deletions(-)
diff --git a/ffmini.cpp b/ffmini.cpp
index fe8e1ad..572ef4a 100644
--- a/ffmini.cpp
+++ b/ffmini.cpp
@@ -1,5 +1,8 @@
 #include <iostream>
 #include <filesystem>
+#include <memory>
+#include <vector>
+#include <ranges>
 
 #include "fmt/format.h"
 #include "miniaudio.h"
@@ -10,173 +13,255 @@ extern "C" {
 #include "libavcodec/avcodec.h"
 }
 
-struct ffmini {
-    AVFormatContext* format_context;
-    AVCodecContext* codec_context;
-    std::int64_t stream_index = -1;
-    char const* path = "C:/Users/miku/Downloads/Porter Robinson - Trying to Feel Alive (Official Audio).webm";
-    std::string filename{};
-    AVStream* stream = nullptr;
-    AVPacket *pkt = nullptr;
-    AVFrame* frame = nullptr;
+enum class track_type {
+    unknown,
+    video,
+    audio,
+};
 
-    std::size_t byte_read = 0;
+class track {
+public:
+    virtual ~track() {
+        av_frame_free(&m_frame);
+    };
 
-    ffmini() {
-        format_context = avformat_alloc_context();
-        if (!format_context)
-            throw std::runtime_error("Failed to create AVFormatContext");
+    virtual auto str() const -> std::string = 0;
+    auto stream_index() const -> std::size_t { return m_stream_index; }
+    auto type() const -> track_type { return m_type; }
+    auto frame() -> AVFrame* { return m_frame;}
+    auto codec() -> AVCodecContext* { return m_codec; }
 
-        if (avformat_open_input(&format_context, path, nullptr, nullptr) != 0)
-            throw std::runtime_error(fmt::format("Failed to open file {}\n", filename));
+protected:
+    track(AVStream* stream, track_type const& type)
+        : m_stream_index(stream->index), m_stream(stream), m_type(type) {
+        auto* params = stream->codecpar;
+        auto const* codec = avcodec_find_decoder(params->codec_id);
 
-        if (avformat_find_stream_info(format_context, nullptr) < 0)
-            throw std::runtime_error("Error loading stream info");
+        m_codec = avcodec_alloc_context3(codec);
+        if (!m_codec) throw std::runtime_error("Couldn't create AVCodecContext");
 
-        AVCodec const* codec = nullptr;
-        for (std::uint32_t i = 0; i < format_context->nb_streams; ++i) {
-            auto* params = format_context->streams[i]->codecpar;
-            codec = avcodec_find_decoder(params->codec_id);
-            if (!codec) continue;
-            if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
-                stream_index = static_cast<std::int64_t>(i);
-            }
-        }
-
-        if (stream_index == -1)
-            throw std::runtime_error(fmt::format("Couldn't find valid audio track inside file: {}", filename));
-
-        codec_context = avcodec_alloc_context3(codec);
-        if (!codec_context)
-            throw std::runtime_error("Couldn't create AVCodecContext");
-
-        auto* codec_params = format_context->streams[stream_index]->codecpar;
-        if (avcodec_parameters_to_context(codec_context, codec_params) < 0)
+        if (avcodec_parameters_to_context(m_codec, params) < 0)
             throw std::runtime_error("Couldn't initialize AVCodecContext");
 
-        if (avcodec_open2(codec_context, codec, nullptr) < 0)
+        if (avcodec_open2(m_codec, codec, nullptr) < 0)
             throw std::runtime_error("Couln't open codec");
-        stream = format_context->streams[stream_index];
 
-        frame = av_frame_alloc();
-        if (!frame)
-            throw std::runtime_error("Couldn't allocate AVFrame");
-
-        pkt = av_packet_alloc();
-        if (!pkt)
-            throw std::runtime_error("Couldn't allocate AVPacket");
-
-        std::filesystem::path file{path};
-        filename = file.filename().string();
-
-        // char buffer[256]{};
-        // av_get_channel_layout_string(buffer, 256, codec_params->channels, codec_params->channel_layout);
-
-        fmt::print("File: {}\n", filename);
-        fmt::print("Sample rate: {}\n", codec_params->sample_rate);
-        fmt::print("Channels: {}\n", codec_params->channels);
-    }
-    ~ffmini() {
-        avcodec_free_context(&codec_context);
-        avformat_close_input(&format_context);
-        avformat_free_context(format_context);
-        av_frame_free(&frame);
-        av_packet_free(&pkt);
+        m_frame = av_frame_alloc();
+        if (!m_frame) throw std::runtime_error("Couldn't allocate AVFrame");
     }
 
-    auto decode() -> void {
-        if (byte_read != 0) return;
+protected:
+    std::size_t     m_stream_index;
+    AVStream*       m_stream;
+    track_type      m_type;
+    AVCodecContext* m_codec = nullptr;
+    AVFrame*        m_frame = nullptr;
+};
+using track_ref_t = std::shared_ptr<track>;
 
-        static auto make_error = [](int err) {
-            static char str[AV_ERROR_MAX_STRING_SIZE]{};
-            return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, err);
-        };
+class audio_track : public track {
+public:
+    audio_track(AVStream* stream)
+        : track(stream, track_type::audio) { }
 
-        int ret = 0;
-        while (av_read_frame(format_context, pkt) >= 0) {
-            if (pkt->stream_index != stream_index) {
-                av_packet_unref(pkt);
+    auto str() const -> std::string override {
+        return fmt::format("[{}] audio_track", m_stream_index);
+    }
+    auto channels() const -> std::size_t { return m_stream->codecpar->ch_layout.nb_channels; }
+    auto sample_rate() const -> std::size_t { return m_stream->codecpar->sample_rate; }
+    auto reset_frame() -> void { m_read_frames = 0; }
+    auto empty() const -> bool {
+        return m_read_frames >= std::size_t(m_frame->nb_samples);
+    }
+    auto read_pcm_frame(void* output, std::size_t frame_count) -> std::size_t {
+        auto const data_size = av_get_bytes_per_sample(m_codec->sample_fmt);
+        auto* ptr = static_cast<std::uint8_t*>(output);
+
+        std::size_t frame_read = 0;
+        std::size_t inc = 0;
+        for (int i = m_read_frames; i < m_frame->nb_samples; ++i) {
+            for (int ch = 0; ch < m_codec->channels; ++ch) {
+                std::memcpy(ptr + (inc++) * data_size, m_frame->data[ch] + data_size * i, data_size);
+            }
+            if (++frame_read >= frame_count) break;
+        }
+        m_read_frames += frame_read;
+        return frame_read;
+    }
+
+private:
+    std::size_t m_read_frames{0};
+};
+using audio_track_ref_t = std::shared_ptr<audio_track>;
+
+auto ffmpeg_error_str(int err) {
+    static char str[AV_ERROR_MAX_STRING_SIZE]{};
+    return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, err);
+};
+
+class media {
+public:
+    media() {
+        m_format = avformat_alloc_context();
+        if (!m_format) throw std::runtime_error("Failed to create AVFormatContext");
+        m_packet = av_packet_alloc();
+        if (!m_packet) throw std::runtime_error("Couldn't allocate AVPacket");
+    }
+    ~media() {
+        cleanup();
+        avformat_free_context(m_format);
+    }
+
+    auto load(std::string const& path) -> void {
+        cleanup();
+        m_path = path;
+        m_filename = m_path.filename().string();
+
+        if (avformat_open_input(&m_format, m_path.string().c_str(), nullptr, nullptr) != 0)
+            throw std::runtime_error(fmt::format("Failed to open file {}\n", m_filename));
+
+        if (avformat_find_stream_info(m_format, nullptr) < 0)
+            throw std::runtime_error("Error loading stream info");
+
+        for (std::uint32_t i = 0; i < m_format->nb_streams; ++i) {
+            auto* stream = m_format->streams[i];
+            auto* params = stream->codecpar;
+            auto const* codec = avcodec_find_decoder(params->codec_id);
+            if (!codec) continue;
+
+            switch (params->codec_type) {
+                case AVMEDIA_TYPE_AUDIO:
+                    m_tracks.push_back(std::make_shared<audio_track>(stream));
+                    ++m_audio_track_size;
+                    break;
+                // case AVMEDIA_TYPE_VIDEO:
+                //     m_tracks.push_back(std::make_shared<video_track>(std::size_t(i), stream));
+                //     break;
+                default: continue;
+            }
+        }
+    }
+
+    auto filename() const -> std::string const& { return m_filename; }
+    auto tracks() const -> std::vector<track_ref_t> const& { return m_tracks; }
+    auto str() const -> std::string {
+        using namespace std::string_literals;
+        std::string str{"media: "};
+        str += m_filename + "\n";
+        str += "  tracks:\n";
+        for (std::size_t i = 0; i < m_tracks.size(); ++i) {
+            str += "    "s + m_tracks[i]->str();
+            if (i < m_tracks.size() - 1) str += "\n";
+        }
+        return str;
+    }
+
+    auto read_pcm_frame(void* output, std::size_t frame_count) -> void {
+        std::size_t frame_read = 0;
+        while (frame_read < frame_count) {
+            auto track = std::static_pointer_cast<audio_track>(m_tracks[0]);
+            if (track->empty()) {
+                read(track);
+                track->reset_frame();
+            }
+            frame_read += track->read_pcm_frame(output, frame_count);
+        }
+    }
+
+    auto read(track_ref_t const& track) -> void {
+        while(av_read_frame(m_format, m_packet) >= 0) {
+            if (m_packet->stream_index != int(track->stream_index())) {
+                av_packet_unref(m_packet);
                 continue;
             }
-            ret = avcodec_send_packet(codec_context, pkt);
+
+            auto ret = avcodec_send_packet(track->codec(), m_packet);
             if (ret < 0) {
                 fmt::print(stderr, "Error submitting the packet to the decoder!\n");
-                return;
-            }
-
-            ret = avcodec_receive_frame(codec_context, frame);
-            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
-                av_packet_unref(pkt);
-                fmt::print("error\n");
-                continue;
-            } else if (ret < 0) {
-                fmt::print(stderr, "Failed to decode packet: %s\n", make_error(ret));
                 break;
             }
 
-            av_packet_unref(pkt);
+            ret = avcodec_receive_frame(track->codec(), track->frame());
+            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
+                av_packet_unref(m_packet);
+                continue;
+            } else if (ret < 0) {
+                fmt::print(stderr, "Failed to decode packet: %s\n", ffmpeg_error_str(ret));
+                break;
+            }
+
+            av_packet_unref(m_packet);
             break;
         }
 
-        auto const data_size = av_get_bytes_per_sample(codec_context->sample_fmt);
-        if (data_size < 0) {
-            fmt::print(stderr, "Failed to calculate data size\n");
-        }
     }
+
+    auto seek(track_ref_t const& track, std::int64_t timestamp) -> void {
+        av_seek_frame(m_format, track->stream_index(), timestamp, AVSEEK_FLAG_BACKWARD);
+        read(track);
+    }
+
+private:
+    auto cleanup() -> void {
+        m_audio_track_size = 0;
+        m_tracks.clear();
+        avformat_close_input(&m_format);
+    }
+
+private:
+    std::filesystem::path    m_path{};
+    std::string              m_filename{};
+    std::vector<track_ref_t> m_tracks{};
+
+    std::size_t m_audio_track_size{0};
+
+private:
+    AVFormatContext* m_format = nullptr;
+    AVPacket*        m_packet = nullptr;
 };
 
 auto callback([[maybe_unused]]ma_device* device, [[maybe_unused]]void* output, [[maybe_unused]]void const* input, [[maybe_unused]]ma_uint32 frame_count) -> void {
-    auto* ptr = static_cast<ffmini*>(device->pUserData);
-    ptr->decode();
-    auto& start = ptr->byte_read;
-    auto* frame = ptr->frame;
-    auto* codec = ptr->codec_context;
-
-    std::size_t read = 0;
-    auto const data_size = av_get_bytes_per_sample(codec->sample_fmt);
-    if (data_size < 0) {
-        fmt::print(stderr, "Failed to calculate data size\n");
-    }
-
-    auto* dst = static_cast<float*>(output);
-    for (int i = start; i < frame->nb_samples; i++) {
-        for (int ch = 0; ch < codec->channels; ch++) {
-            std::memcpy(dst++, frame->data[ch] + data_size * i, data_size);
-        }
-        ++read;
-        if (read >= frame_count) break;
-    }
-
-    start = start + read;
-    if (int(start) >= frame->nb_samples) start = 0;
+    auto* ptr = static_cast<media*>(device->pUserData);
+    ptr->read_pcm_frame(output, frame_count);
 }
 
-auto main([[maybe_unused]]int argc, [[maybe_unused]]char const* argv[]) -> int {
-    ffmini decoder{};
+auto entry() -> void {
+    media decoder{};
+    decoder.load("C:/Users/miku/Downloads/Porter Robinson - Trying to Feel Alive (Official Audio).webm");
+    fmt::print("{}\n", decoder.str());
+
+    auto track = std::static_pointer_cast<audio_track>(decoder.tracks()[0]);
 
     ma_device device;
     auto device_config = ma_device_config_init(ma_device_type_playback);
     device_config.playback.format   = ma_format_f32;
-    device_config.playback.channels = 2;
-    device_config.sampleRate        = decoder.stream->codecpar->sample_rate;
+    device_config.playback.channels = track->channels();
+    device_config.sampleRate        = track->sample_rate();
     device_config.dataCallback      = callback;
     device_config.pUserData         = &decoder;
 
-    if (ma_device_init(nullptr, &device_config, &device) != MA_SUCCESS) {
-        fmt::print(stderr, "Failed to open playback device\n");
-        return 1;
-    }
+    fmt::print("Channels: {}\n", track->channels());
+    fmt::print("Sample rate: {}\n", track->sample_rate());
 
-    if (ma_device_start(&device) != MA_SUCCESS) {
-        fmt::print(stderr, "Failed to start playback device.\n");
-        return 1;
-    }
+    if (ma_device_init(nullptr, &device_config, &device) != MA_SUCCESS)
+        throw std::runtime_error("Failed to open playback device\n");
 
-    fmt::print("Now playing: {}\n", decoder.filename);
+    if (ma_device_start(&device) != MA_SUCCESS)
+        throw std::runtime_error("Failed to start playback device.\n");
+
+    fmt::print("Now playing: {}\n", decoder.filename());
     fmt::print("Press enter to quit...");
     std::cin.get();
 
     ma_device_uninit(&device);
+}
+
+auto main([[maybe_unused]]int argc, [[maybe_unused]]char const* argv[]) -> int {
+    try {
+        entry();
+    } catch (std::exception const& e) {
+        fmt::print(stderr, "{}\n", e.what());
+    }
     return 0;
 }