#include #include #include #include "aoc/aoc.hpp" #include "fmt/format.h" #include "ctre.hpp" #define ENUMERATOR_AOC_TOKENS \ ENUMERATOR_AOC_TOKEN(mul , operator_ ) \ ENUMERATOR_AOC_TOKEN(invalid , invalid ) \ ENUMERATOR_AOC_TOKEN(numeric_literal, number ) \ ENUMERATOR_AOC_TOKEN(newline , punctuation) \ ENUMERATOR_AOC_TOKEN(paren_open , punctuation) \ ENUMERATOR_AOC_TOKEN(paren_close , punctuation) \ ENUMERATOR_AOC_TOKEN(comma , punctuation) \ ENUMERATOR_AOC_TOKEN(identifier , identifier ) enum class token_type : std::uint32_t { #define ENUMERATOR_AOC_TOKEN(type, category) type, ENUMERATOR_AOC_TOKENS #undef ENUMERATOR_AOC_TOKEN _count }; enum class token_category : std::uint32_t { operator_, invalid, number, punctuation, identifier, _count }; auto token_type_str(token_type type) -> char const* { switch (type) { using enum token_type; #define ENUMERATOR_AOC_TOKEN(type, category) case type: return #type; ENUMERATOR_AOC_TOKENS #undef ENUMERATOR_AOC_TOKEN default: return "invalid"; } } auto token_type_category(token_type type) -> token_category { switch (type) { using enum token_category; #define ENUMERATOR_AOC_TOKEN(type, category) case token_type::type: return category; ENUMERATOR_AOC_TOKENS #undef ENUMERATOR_AOC_TOKEN default: return token_category::invalid; } } class token { public: token(std::string const& str, token_type type, token_category category, std::size_t row, std::size_t col) : m_type(type) , m_category(category) , m_value(str) , m_row(row) , m_column(col) { } auto type() const -> token_type { return m_type; } auto category() const -> token_category { return m_category; } auto value() const -> std::string const& { return m_value; } auto row() const -> std::size_t { return m_row; } auto col() const -> std::size_t { return m_column; } auto str() const -> std::string { using namespace std::string_literals; std::string str{"token {"}; str += " type: "s + token_type_str(m_type) + ","s; str += " value: \""s + m_value + "\","s; str += " row: "s + std::to_string(m_row) + ","s; str += " col: "s + std::to_string(m_column); str += " }"; return str; } public: inline static auto is_identifier(std::string_view const& str) -> bool { return ctre::match<"^[a-z]+$">(str); } private: token_type m_type; token_category m_category; std::string m_value; std::size_t m_row; std::size_t m_column; }; enum class lexer_error { eof, unknown }; class lexer { public: lexer(std::filesystem::path const& source) : m_strm(source, std::ios::in | std::ios::binary) , m_line(1), m_col(1) { } auto tokenize() -> std::vector { std::vector tokens{}; auto tk = next_token(); while (tk) { tokens.emplace_back(std::move(tk.value())); tk = next_token(); } return tokens; } private: auto next_token() -> std::optional { if (!has_next()) return {}; if (peek() == '\n') { peek_consume(); m_line = m_line + 1; m_col = 0; } std::string str{}; if (peek() == 'm') { auto const col = m_col; auto const is_valid_identifier_char = [](auto const c) { return c >= 'a' && c <= 'z'; }; while (is_valid_identifier_char(peek())) str += peek_consume(); auto const& type = token::is_identifier(str) ? token_type::identifier : token_type::invalid; return token(str, type, token_type_category(type), m_line, col); } if (peek() == '(') { auto const col = m_col; str += peek_consume(); return token(str, token_type::paren_open, token_type_category(token_type::paren_open), m_line, col); } if (peek() == ')') { auto const col = m_col; str += peek_consume(); return token(str, token_type::paren_close, token_type_category(token_type::paren_close), m_line, col); } if (peek() == ',') { auto const col = m_col; str += peek_consume(); return token(str, token_type::comma, token_type_category(token_type::comma), m_line, col); } auto const col = m_col; str += peek_consume(); return token(str, token_type::invalid, token_type_category(token_type::invalid), m_line, col); } auto peek() -> char { return static_cast(m_strm.peek()); } auto peek_consume() -> char { ++m_col; return static_cast(m_strm.get()); } auto has_next() const -> bool { return !m_strm.eof(); } private: std::fstream m_strm; std::size_t m_line; std::size_t m_col; }; auto aoc::entry([[maybe_unused]]std::vector const& args) -> void { lexer lexer{"./dat/24/ex/03.txt"}; auto const tokens = lexer.tokenize(); for (auto const& tk : tokens) { fmt::print("{}\n", tk.str()); } }