aoc/sol/24/03/entry.cpp

#include <fstream>
#include <expected>
#include <string>

#include "aoc/aoc.hpp"
#include "fmt/format.h"
#include "ctre.hpp"

#define ENUMERATOR_AOC_TOKENS                               \
        ENUMERATOR_AOC_TOKEN(mul            , operator_  )  \
        ENUMERATOR_AOC_TOKEN(invalid        , invalid    )  \
        ENUMERATOR_AOC_TOKEN(numeric_literal, number     )  \
        ENUMERATOR_AOC_TOKEN(newline        , punctuation)  \
        ENUMERATOR_AOC_TOKEN(paren_open     , punctuation)  \
        ENUMERATOR_AOC_TOKEN(paren_close    , punctuation)  \
        ENUMERATOR_AOC_TOKEN(comma          , punctuation)  \
        ENUMERATOR_AOC_TOKEN(identifier     , identifier )

enum class token_type : std::uint32_t {
#define ENUMERATOR_AOC_TOKEN(type, category) type,
    ENUMERATOR_AOC_TOKENS
#undef  ENUMERATOR_AOC_TOKEN
    _count
};

enum class token_category : std::uint32_t {
     operator_,
     invalid,
     number,
     punctuation,
    identifier,
     _count
};

auto token_type_str(token_type type) -> char const* {
    switch (type) {
        using enum token_type;
#define ENUMERATOR_AOC_TOKEN(type, category) case type: return #type;
    ENUMERATOR_AOC_TOKENS
#undef  ENUMERATOR_AOC_TOKEN
        default: return "invalid";
    }
}

auto token_type_category(token_type type) -> token_category {
    switch (type) {
        using enum token_category;
#define ENUMERATOR_AOC_TOKEN(type, category) case token_type::type: return category;
    ENUMERATOR_AOC_TOKENS
#undef  ENUMERATOR_AOC_TOKEN
        default: return token_category::invalid;
    }
}

class token {
public:
    token(std::string const& str, token_type type, token_category category, std::size_t row, std::size_t col)
        : m_type(type)
        , m_category(category)
        , m_value(str)
        , m_row(row)
        , m_column(col) { }

    auto type() const -> token_type { return m_type; }
    auto category() const -> token_category { return m_category; }
    auto value() const -> std::string const& { return m_value; }

    auto row() const -> std::size_t { return m_row; }
    auto col() const -> std::size_t { return m_column; }

    auto str() const -> std::string {
        using namespace std::string_literals;
        std::string str{"token {"};
        str += " type: "s  + token_type_str(m_type) + ","s;
        str += " value: \""s + m_value + "\","s;
        str += " row: "s   + std::to_string(m_row) + ","s;
        str += " col: "s   + std::to_string(m_column);
        str += " }";
        return str;
    }

public:
    inline static auto is_identifier(std::string_view const& str) -> bool {
        return ctre::match<"^[a-z]+$">(str);
    }

private:
    token_type     m_type;
    token_category m_category;
    std::string    m_value;
    std::size_t    m_row;
    std::size_t    m_column;
};

enum class lexer_error {
    eof,
    unknown
};

class lexer {
public:
    lexer(std::filesystem::path const& source)
        : m_strm(source, std::ios::in | std::ios::binary)
        , m_line(1), m_col(1) {
    }

    auto tokenize() -> std::vector<token> {
        std::vector<token> tokens{};
        auto tk = next_token();
        while (tk) {
            tokens.emplace_back(std::move(tk.value()));
            tk = next_token();
        }
        return tokens;
    }

private:
    auto next_token() -> std::optional<token> {
        if (!has_next()) return {};
        if (peek() == '\n') {
            peek_consume();
            m_line = m_line + 1;
            m_col  = 0;
        }

        std::string str{};
        if (peek() == 'm') {
            auto const col = m_col;
            auto const is_valid_identifier_char = [](auto const c) {
                return c >= 'a' && c <= 'z';
            };
            while (is_valid_identifier_char(peek())) str += peek_consume();
            auto const& type = token::is_identifier(str) ? token_type::identifier : token_type::invalid;
            return token(str, type, token_type_category(type), m_line, col);
        }

        if (peek() == '(') {
            auto const col = m_col;
            str += peek_consume();
            return token(str, token_type::paren_open, token_type_category(token_type::paren_open), m_line, col);
        }

        if (peek() == ')') {
            auto const col = m_col;
            str += peek_consume();
            return token(str, token_type::paren_close, token_type_category(token_type::paren_close), m_line, col);
        }

        if (peek() == ',') {
            auto const col = m_col;
            str += peek_consume();
            return token(str, token_type::comma, token_type_category(token_type::comma), m_line, col);
        }

        auto const col = m_col;
        str += peek_consume();
        return token(str, token_type::invalid, token_type_category(token_type::invalid), m_line, col);
    }

    auto peek() -> char {
        return static_cast<char>(m_strm.peek());
    }
    auto peek_consume() -> char {
        ++m_col;
        return static_cast<char>(m_strm.get());
    }
    auto has_next() const -> bool {
        return !m_strm.eof();
    }

private:
    std::fstream m_strm;
    std::size_t  m_line;
    std::size_t  m_col;
};

auto aoc::entry([[maybe_unused]]std::vector<std::string_view> const& args) -> void {
    lexer lexer{"./dat/24/ex/03.txt"};

    auto const tokens = lexer.tokenize();

    for (auto const& tk : tokens) {
        fmt::print("{}\n", tk.str());
    }
}