187 lines
5.3 KiB
C++
187 lines
5.3 KiB
C++
#include <fstream>
|
|
#include <expected>
|
|
#include <string>
|
|
|
|
#include "aoc/aoc.hpp"
|
|
#include "fmt/format.h"
|
|
#include "ctre.hpp"
|
|
|
|
#define ENUMERATOR_AOC_TOKENS \
|
|
ENUMERATOR_AOC_TOKEN(mul , operator_ ) \
|
|
ENUMERATOR_AOC_TOKEN(invalid , invalid ) \
|
|
ENUMERATOR_AOC_TOKEN(numeric_literal, number ) \
|
|
ENUMERATOR_AOC_TOKEN(newline , punctuation) \
|
|
ENUMERATOR_AOC_TOKEN(paren_open , punctuation) \
|
|
ENUMERATOR_AOC_TOKEN(paren_close , punctuation) \
|
|
ENUMERATOR_AOC_TOKEN(comma , punctuation) \
|
|
ENUMERATOR_AOC_TOKEN(identifier , identifier )
|
|
|
|
enum class token_type : std::uint32_t {
|
|
#define ENUMERATOR_AOC_TOKEN(type, category) type,
|
|
ENUMERATOR_AOC_TOKENS
|
|
#undef ENUMERATOR_AOC_TOKEN
|
|
_count
|
|
};
|
|
|
|
enum class token_category : std::uint32_t {
|
|
operator_,
|
|
invalid,
|
|
number,
|
|
punctuation,
|
|
identifier,
|
|
_count
|
|
};
|
|
|
|
auto token_type_str(token_type type) -> char const* {
|
|
switch (type) {
|
|
using enum token_type;
|
|
#define ENUMERATOR_AOC_TOKEN(type, category) case type: return #type;
|
|
ENUMERATOR_AOC_TOKENS
|
|
#undef ENUMERATOR_AOC_TOKEN
|
|
default: return "invalid";
|
|
}
|
|
}
|
|
|
|
auto token_type_category(token_type type) -> token_category {
|
|
switch (type) {
|
|
using enum token_category;
|
|
#define ENUMERATOR_AOC_TOKEN(type, category) case token_type::type: return category;
|
|
ENUMERATOR_AOC_TOKENS
|
|
#undef ENUMERATOR_AOC_TOKEN
|
|
default: return token_category::invalid;
|
|
}
|
|
}
|
|
|
|
class token {
|
|
public:
|
|
token(std::string const& str, token_type type, token_category category, std::size_t row, std::size_t col)
|
|
: m_type(type)
|
|
, m_category(category)
|
|
, m_value(str)
|
|
, m_row(row)
|
|
, m_column(col) { }
|
|
|
|
auto type() const -> token_type { return m_type; }
|
|
auto category() const -> token_category { return m_category; }
|
|
auto value() const -> std::string const& { return m_value; }
|
|
|
|
auto row() const -> std::size_t { return m_row; }
|
|
auto col() const -> std::size_t { return m_column; }
|
|
|
|
auto str() const -> std::string {
|
|
using namespace std::string_literals;
|
|
std::string str{"token {"};
|
|
str += " type: "s + token_type_str(m_type) + ","s;
|
|
str += " value: \""s + m_value + "\","s;
|
|
str += " row: "s + std::to_string(m_row) + ","s;
|
|
str += " col: "s + std::to_string(m_column);
|
|
str += " }";
|
|
return str;
|
|
}
|
|
|
|
public:
|
|
inline static auto is_identifier(std::string_view const& str) -> bool {
|
|
return ctre::match<"^[a-z]+$">(str);
|
|
}
|
|
|
|
private:
|
|
token_type m_type;
|
|
token_category m_category;
|
|
std::string m_value;
|
|
std::size_t m_row;
|
|
std::size_t m_column;
|
|
};
|
|
|
|
enum class lexer_error {
|
|
eof,
|
|
unknown
|
|
};
|
|
|
|
class lexer {
|
|
public:
|
|
lexer(std::filesystem::path const& source)
|
|
: m_strm(source, std::ios::in | std::ios::binary)
|
|
, m_line(1), m_col(1) {
|
|
}
|
|
|
|
auto tokenize() -> std::vector<token> {
|
|
std::vector<token> tokens{};
|
|
auto tk = next_token();
|
|
while (tk) {
|
|
tokens.emplace_back(std::move(tk.value()));
|
|
tk = next_token();
|
|
}
|
|
return tokens;
|
|
}
|
|
|
|
private:
|
|
auto next_token() -> std::optional<token> {
|
|
if (!has_next()) return {};
|
|
if (peek() == '\n') {
|
|
peek_consume();
|
|
m_line = m_line + 1;
|
|
m_col = 0;
|
|
}
|
|
|
|
std::string str{};
|
|
if (peek() == 'm') {
|
|
auto const col = m_col;
|
|
auto const is_valid_identifier_char = [](auto const c) {
|
|
return c >= 'a' && c <= 'z';
|
|
};
|
|
while (is_valid_identifier_char(peek())) str += peek_consume();
|
|
auto const& type = token::is_identifier(str) ? token_type::identifier : token_type::invalid;
|
|
return token(str, type, token_type_category(type), m_line, col);
|
|
}
|
|
|
|
if (peek() == '(') {
|
|
auto const col = m_col;
|
|
str += peek_consume();
|
|
return token(str, token_type::paren_open, token_type_category(token_type::paren_open), m_line, col);
|
|
}
|
|
|
|
if (peek() == ')') {
|
|
auto const col = m_col;
|
|
str += peek_consume();
|
|
return token(str, token_type::paren_close, token_type_category(token_type::paren_close), m_line, col);
|
|
}
|
|
|
|
if (peek() == ',') {
|
|
auto const col = m_col;
|
|
str += peek_consume();
|
|
return token(str, token_type::comma, token_type_category(token_type::comma), m_line, col);
|
|
}
|
|
|
|
auto const col = m_col;
|
|
str += peek_consume();
|
|
return token(str, token_type::invalid, token_type_category(token_type::invalid), m_line, col);
|
|
}
|
|
|
|
auto peek() -> char {
|
|
return static_cast<char>(m_strm.peek());
|
|
}
|
|
auto peek_consume() -> char {
|
|
++m_col;
|
|
return static_cast<char>(m_strm.get());
|
|
}
|
|
auto has_next() const -> bool {
|
|
return !m_strm.eof();
|
|
}
|
|
|
|
private:
|
|
std::fstream m_strm;
|
|
std::size_t m_line;
|
|
std::size_t m_col;
|
|
};
|
|
|
|
auto aoc::entry([[maybe_unused]]std::vector<std::string_view> const& args) -> void {
|
|
lexer lexer{"./dat/24/ex/03.txt"};
|
|
|
|
auto const tokens = lexer.tokenize();
|
|
|
|
for (auto const& tk : tokens) {
|
|
fmt::print("{}\n", tk.str());
|
|
}
|
|
}
|
|
|