diff --git a/src/irc/evaluator.h b/src/irc/evaluator.h index 107bd78..d9c0aaf 100644 --- a/src/irc/evaluator.h +++ b/src/irc/evaluator.h @@ -9,7 +9,7 @@ namespace nix { class EvalState; class Value; class PosIdx; -} +} // namespace nix namespace nix_irc { @@ -18,18 +18,17 @@ class IREnvironment; class Evaluator { public: - explicit Evaluator(nix::EvalState& state); - ~Evaluator(); + explicit Evaluator(nix::EvalState& state); + ~Evaluator(); - void eval_to_nix(const std::shared_ptr& ir_node, - nix::Value& result, - IREnvironment* env = nullptr); + void eval_to_nix(const std::shared_ptr& ir_node, nix::Value& result, + IREnvironment* env = nullptr); private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif diff --git a/src/irc/ir_gen.cpp b/src/irc/ir_gen.cpp index 5b56cec..a2561be 100644 --- a/src/irc/ir_gen.cpp +++ b/src/irc/ir_gen.cpp @@ -1,219 +1,222 @@ #include "ir_gen.h" +#include #include #include -#include namespace nix_irc { struct NameResolver::Impl { - std::vector> scopes; - std::vector> scope_names; + std::vector> scopes; + std::vector> scope_names; - Impl() { - scopes.push_back({}); - scope_names.push_back({}); - } + Impl() { + scopes.push_back({}); + scope_names.push_back({}); + } }; NameResolver::NameResolver() : pImpl(std::make_unique()) {} NameResolver::~NameResolver() = default; void NameResolver::enter_scope() { - pImpl->scopes.push_back({}); - pImpl->scope_names.push_back({}); + pImpl->scopes.push_back({}); + pImpl->scope_names.push_back({}); } void NameResolver::exit_scope() { - if (!pImpl->scopes.empty()) { - pImpl->scopes.pop_back(); - pImpl->scope_names.pop_back(); - } + if (!pImpl->scopes.empty()) { + pImpl->scopes.pop_back(); + pImpl->scope_names.pop_back(); + } } void NameResolver::bind(const std::string& name) { - if (pImpl->scopes.empty()) return; - uint32_t idx = pImpl->scope_names.back().size(); - pImpl->scopes.back()[name] = idx; - pImpl->scope_names.back().push_back(name); + if (pImpl->scopes.empty()) + return; + uint32_t idx = pImpl->scope_names.back().size(); + pImpl->scopes.back()[name] = idx; + pImpl->scope_names.back().push_back(name); } uint32_t NameResolver::resolve(const std::string& name) { - for (int i = (int)pImpl->scopes.size() - 1; i >= 0; --i) { - auto it = pImpl->scopes[i].find(name); - if (it != pImpl->scopes[i].end()) { - uint32_t depth = pImpl->scopes.size() - 1 - i; - uint32_t offset = it->second; - return depth << 16 | offset; - } + for (int i = (int) pImpl->scopes.size() - 1; i >= 0; --i) { + auto it = pImpl->scopes[i].find(name); + if (it != pImpl->scopes[i].end()) { + uint32_t depth = pImpl->scopes.size() - 1 - i; + uint32_t offset = it->second; + return depth << 16 | offset; } - return 0xFFFFFFFF; + } + return 0xFFFFFFFF; } bool NameResolver::is_bound(const std::string& name) const { - for (auto it = pImpl->scopes.rbegin(); it != pImpl->scopes.rend(); ++it) { - if (it->count(name)) return true; - } - return false; + for (auto it = pImpl->scopes.rbegin(); it != pImpl->scopes.rend(); ++it) { + if (it->count(name)) + return true; + } + return false; } struct IRGenerator::Impl { - std::unordered_map string_table; - uint32_t next_string_id = 0; - NameResolver name_resolver; + std::unordered_map string_table; + uint32_t next_string_id = 0; + NameResolver name_resolver; - Impl() {} + Impl() {} - uint32_t add_string(const std::string& str) { - auto it = string_table.find(str); - if (it != string_table.end()) { - return it->second; - } - uint32_t id = next_string_id++; - string_table[str] = id; - return id; + uint32_t add_string(const std::string& str) { + auto it = string_table.find(str); + if (it != string_table.end()) { + return it->second; } + uint32_t id = next_string_id++; + string_table[str] = id; + return id; + } - std::shared_ptr convert(const std::shared_ptr& node_ptr) { - if (!node_ptr) return std::make_shared(ConstNullNode{}); + std::shared_ptr convert(const std::shared_ptr& node_ptr) { + if (!node_ptr) + return std::make_shared(ConstNullNode{}); - const Node& node = *node_ptr; + const Node& node = *node_ptr; - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - uint32_t idx = name_resolver.resolve(n->name.value_or("")); - VarNode converted(idx); - converted.name = n->name; - converted.line = n->line; - return std::make_shared(converted); - } - if (auto* n = node.get_if()) { - name_resolver.enter_scope(); - if (n->param_name) { - name_resolver.bind(*n->param_name); - } - auto body = convert(n->body); - name_resolver.exit_scope(); - LambdaNode lambda(n->arity, body, n->line); - lambda.param_name = n->param_name; - return std::make_shared(lambda); - } - if (auto* n = node.get_if()) { - auto func = convert(n->func); - auto arg = convert(n->arg); - return std::make_shared(AppNode(func, arg, n->line)); - } - if (auto* n = node.get_if()) { - AttrsetNode attrs(n->recursive, n->line); - name_resolver.enter_scope(); - for (const auto& [key, val] : n->attrs) { - name_resolver.bind(key); - } - for (const auto& [key, val] : n->attrs) { - attrs.attrs.push_back({key, convert(val)}); - } - name_resolver.exit_scope(); - return std::make_shared(attrs); - } - if (auto* n = node.get_if()) { - auto expr = convert(n->expr); - auto attr = convert(n->attr); - SelectNode select(expr, attr, n->line); - if (n->default_expr) { - select.default_expr = convert(*n->default_expr); - } - return std::make_shared(select); - } - if (auto* n = node.get_if()) { - auto expr = convert(n->expr); - auto attr = convert(n->attr); - return std::make_shared(HasAttrNode(expr, attr, n->line)); - } - if (auto* n = node.get_if()) { - auto attrs = convert(n->attrs); - auto body = convert(n->body); - return std::make_shared(WithNode(attrs, body, n->line)); - } - if (auto* n = node.get_if()) { - auto cond = convert(n->cond); - auto then_b = convert(n->then_branch); - auto else_b = convert(n->else_branch); - return std::make_shared(IfNode(cond, then_b, else_b, n->line)); - } - if (auto* n = node.get_if()) { - name_resolver.enter_scope(); - for (const auto& [key, val] : n->bindings) { - name_resolver.bind(key); - } - std::vector>> new_bindings; - for (const auto& [key, val] : n->bindings) { - new_bindings.push_back({key, convert(val)}); - } - auto body = convert(n->body); - name_resolver.exit_scope(); - LetNode let(body, n->line); - let.bindings = std::move(new_bindings); - return std::make_shared(let); - } - if (auto* n = node.get_if()) { - name_resolver.enter_scope(); - for (const auto& [key, val] : n->bindings) { - name_resolver.bind(key); - } - std::vector>> new_bindings; - for (const auto& [key, val] : n->bindings) { - new_bindings.push_back({key, convert(val)}); - } - auto body = convert(n->body); - name_resolver.exit_scope(); - LetRecNode letrec(body, n->line); - letrec.bindings = std::move(new_bindings); - return std::make_shared(letrec); - } - if (auto* n = node.get_if()) { - auto cond = convert(n->cond); - auto body = convert(n->body); - return std::make_shared(AssertNode(cond, body, n->line)); - } - if (auto* n = node.get_if()) { - auto left = convert(n->left); - auto right = convert(n->right); - return std::make_shared(BinaryOpNode(n->op, left, right, n->line)); - } - if (auto* n = node.get_if()) { - auto operand = convert(n->operand); - return std::make_shared(UnaryOpNode(n->op, operand, n->line)); - } - return std::make_shared(ConstNullNode{}); + if (auto* n = node.get_if()) { + return std::make_shared(*n); } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + uint32_t idx = name_resolver.resolve(n->name.value_or("")); + VarNode converted(idx); + converted.name = n->name; + converted.line = n->line; + return std::make_shared(converted); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + if (n->param_name) { + name_resolver.bind(*n->param_name); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LambdaNode lambda(n->arity, body, n->line); + lambda.param_name = n->param_name; + return std::make_shared(lambda); + } + if (auto* n = node.get_if()) { + auto func = convert(n->func); + auto arg = convert(n->arg); + return std::make_shared(AppNode(func, arg, n->line)); + } + if (auto* n = node.get_if()) { + AttrsetNode attrs(n->recursive, n->line); + name_resolver.enter_scope(); + for (const auto& [key, val] : n->attrs) { + name_resolver.bind(key); + } + for (const auto& [key, val] : n->attrs) { + attrs.attrs.push_back({key, convert(val)}); + } + name_resolver.exit_scope(); + return std::make_shared(attrs); + } + if (auto* n = node.get_if()) { + auto expr = convert(n->expr); + auto attr = convert(n->attr); + SelectNode select(expr, attr, n->line); + if (n->default_expr) { + select.default_expr = convert(*n->default_expr); + } + return std::make_shared(select); + } + if (auto* n = node.get_if()) { + auto expr = convert(n->expr); + auto attr = convert(n->attr); + return std::make_shared(HasAttrNode(expr, attr, n->line)); + } + if (auto* n = node.get_if()) { + auto attrs = convert(n->attrs); + auto body = convert(n->body); + return std::make_shared(WithNode(attrs, body, n->line)); + } + if (auto* n = node.get_if()) { + auto cond = convert(n->cond); + auto then_b = convert(n->then_branch); + auto else_b = convert(n->else_branch); + return std::make_shared(IfNode(cond, then_b, else_b, n->line)); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + for (const auto& [key, val] : n->bindings) { + name_resolver.bind(key); + } + std::vector>> new_bindings; + for (const auto& [key, val] : n->bindings) { + new_bindings.push_back({key, convert(val)}); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LetNode let(body, n->line); + let.bindings = std::move(new_bindings); + return std::make_shared(let); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + for (const auto& [key, val] : n->bindings) { + name_resolver.bind(key); + } + std::vector>> new_bindings; + for (const auto& [key, val] : n->bindings) { + new_bindings.push_back({key, convert(val)}); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LetRecNode letrec(body, n->line); + letrec.bindings = std::move(new_bindings); + return std::make_shared(letrec); + } + if (auto* n = node.get_if()) { + auto cond = convert(n->cond); + auto body = convert(n->body); + return std::make_shared(AssertNode(cond, body, n->line)); + } + if (auto* n = node.get_if()) { + auto left = convert(n->left); + auto right = convert(n->right); + return std::make_shared(BinaryOpNode(n->op, left, right, n->line)); + } + if (auto* n = node.get_if()) { + auto operand = convert(n->operand); + return std::make_shared(UnaryOpNode(n->op, operand, n->line)); + } + return std::make_shared(ConstNullNode{}); + } }; IRGenerator::IRGenerator() : pImpl(std::make_unique()) {} IRGenerator::~IRGenerator() = default; void IRGenerator::set_string_table(const std::unordered_map& table) { - pImpl->string_table = table; + pImpl->string_table = table; } uint32_t IRGenerator::add_string(const std::string& str) { - return pImpl->add_string(str); + return pImpl->add_string(str); } std::shared_ptr IRGenerator::generate(const std::shared_ptr& ast) { - return pImpl->convert(ast); + return pImpl->convert(ast); } -} +} // namespace nix_irc diff --git a/src/irc/ir_gen.h b/src/irc/ir_gen.h index de082a6..2c3e9b8 100644 --- a/src/irc/ir_gen.h +++ b/src/irc/ir_gen.h @@ -2,44 +2,44 @@ #define NIX_IRC_IR_GEN_H #include "types.h" +#include #include #include #include -#include namespace nix_irc { class IRGenerator { public: - IRGenerator(); - ~IRGenerator(); - - void set_string_table(const std::unordered_map& table); - uint32_t add_string(const std::string& str); - - std::shared_ptr generate(const std::shared_ptr& ast); - + IRGenerator(); + ~IRGenerator(); + + void set_string_table(const std::unordered_map& table); + uint32_t add_string(const std::string& str); + + std::shared_ptr generate(const std::shared_ptr& ast); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; class NameResolver { public: - NameResolver(); - ~NameResolver(); - - void enter_scope(); - void exit_scope(); - void bind(const std::string& name); - uint32_t resolve(const std::string& name); - bool is_bound(const std::string& name) const; - + NameResolver(); + ~NameResolver(); + + void enter_scope(); + void exit_scope(); + void bind(const std::string& name); + uint32_t resolve(const std::string& name); + bool is_bound(const std::string& name) const; + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/main.cpp b/src/irc/main.cpp index c5c28bf..0f71337 100644 --- a/src/irc/main.cpp +++ b/src/irc/main.cpp @@ -1,150 +1,150 @@ -#include +#include "ir_gen.h" #include "parser.h" #include "resolver.h" -#include "ir_gen.h" #include "serializer.h" +#include +#include #include #include -#include namespace nix_irc { void print_usage(const char* prog) { - std::cout << "Usage: " << prog << " [options] [output.nixir]\n" - << "\nOptions:\n" - << " -I Add search path for imports\n" - << " --no-imports Disable import resolution\n" - << " --help Show this help\n"; + std::cout << "Usage: " << prog << " [options] [output.nixir]\n" + << "\nOptions:\n" + << " -I Add search path for imports\n" + << " --no-imports Disable import resolution\n" + << " --help Show this help\n"; } int run_compile(int argc, char** argv) { - std::string input_file; - std::string output_file; - std::vector search_paths; - bool resolve_imports = true; - - int i = 1; - while (i < argc) { - std::string arg = argv[i]; - if (arg == "-I") { - if (i + 1 >= argc) { - std::cerr << "Error: -I requires a path argument\n"; - return 1; - } - search_paths.push_back(argv[++i]); - } else if (arg == "--no-imports") { - resolve_imports = false; - } else if (arg == "--help" || arg == "-h") { - print_usage(argv[0]); - return 0; - } else if (arg[0] != '-') { - input_file = arg; - if (i + 1 < argc && argv[i + 1][0] != '-') { - output_file = argv[++i]; - } - } else { - std::cerr << "Unknown option: " << arg << "\n"; - print_usage(argv[0]); - return 1; - } - i++; - } - - if (input_file.empty()) { - std::cerr << "Error: No input file specified\n"; - print_usage(argv[0]); - return 1; - } - - if (output_file.empty()) { - output_file = input_file + "r"; - } - - try { - Parser parser; - Resolver resolver; - - for (const auto& path : search_paths) { - resolver.add_search_path(path); - } - - std::cout << "Parsing: " << input_file << "\n"; - auto ast = parser.parse_file(input_file); - - if (!ast) { - std::cerr << "Error: Failed to parse input\n"; - return 1; - } - - std::cout << "Resolving imports...\n"; - - IRGenerator ir_gen; - - std::cout << "Generating IR...\n"; - auto ir = ir_gen.generate(ast); + std::string input_file; + std::string output_file; + std::vector search_paths; + bool resolve_imports = true; - IRModule module; - module.version = IR_VERSION; - module.entry = ir; - - std::cout << "Serializing to: " << output_file << "\n"; - Serializer serializer; - serializer.serialize(module, output_file); - - std::cout << "Done!\n"; - return 0; - - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; + int i = 1; + while (i < argc) { + std::string arg = argv[i]; + if (arg == "-I") { + if (i + 1 >= argc) { + std::cerr << "Error: -I requires a path argument\n"; return 1; + } + search_paths.push_back(argv[++i]); + } else if (arg == "--no-imports") { + resolve_imports = false; + } else if (arg == "--help" || arg == "-h") { + print_usage(argv[0]); + return 0; + } else if (arg[0] != '-') { + input_file = arg; + if (i + 1 < argc && argv[i + 1][0] != '-') { + output_file = argv[++i]; + } + } else { + std::cerr << "Unknown option: " << arg << "\n"; + print_usage(argv[0]); + return 1; } + i++; + } + + if (input_file.empty()) { + std::cerr << "Error: No input file specified\n"; + print_usage(argv[0]); + return 1; + } + + if (output_file.empty()) { + output_file = input_file + "r"; + } + + try { + Parser parser; + Resolver resolver; + + for (const auto& path : search_paths) { + resolver.add_search_path(path); + } + + std::cout << "Parsing: " << input_file << "\n"; + auto ast = parser.parse_file(input_file); + + if (!ast) { + std::cerr << "Error: Failed to parse input\n"; + return 1; + } + + std::cout << "Resolving imports...\n"; + + IRGenerator ir_gen; + + std::cout << "Generating IR...\n"; + auto ir = ir_gen.generate(ast); + + IRModule module; + module.version = IR_VERSION; + module.entry = ir; + + std::cout << "Serializing to: " << output_file << "\n"; + Serializer serializer; + serializer.serialize(module, output_file); + + std::cout << "Done!\n"; + return 0; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } } void print_decompile_usage(const char* prog) { - std::cout << "Usage: " << prog << " decompile \n"; + std::cout << "Usage: " << prog << " decompile \n"; } int run_decompile(int argc, char** argv) { - if (argc < 3) { - print_decompile_usage(argv[0]); - return 1; - } - - std::string input_file = argv[2]; - - try { - Deserializer deserializer; - auto module = deserializer.deserialize(input_file); - - std::cout << "IR Version: " << module.version << "\n"; - std::cout << "Sources: " << module.sources.size() << "\n"; - std::cout << "Imports: " << module.imports.size() << "\n"; - - return 0; - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; - return 1; - } + if (argc < 3) { + print_decompile_usage(argv[0]); + return 1; + } + + std::string input_file = argv[2]; + + try { + Deserializer deserializer; + auto module = deserializer.deserialize(input_file); + + std::cout << "IR Version: " << module.version << "\n"; + std::cout << "Sources: " << module.sources.size() << "\n"; + std::cout << "Imports: " << module.imports.size() << "\n"; + + return 0; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } } -} +} // namespace nix_irc int main(int argc, char** argv) { - if (argc < 2) { - nix_irc::print_usage(argv[0]); - return 1; - } - - std::string cmd = argv[1]; - - if (cmd == "compile" || cmd == "c") { - return nix_irc::run_compile(argc - 1, argv + 1); - } else if (cmd == "decompile" || cmd == "d") { - return nix_irc::run_decompile(argc, argv); - } else if (cmd == "help" || cmd == "--help" || cmd == "-h") { - nix_irc::print_usage(argv[0]); - return 0; - } else { - return nix_irc::run_compile(argc, argv); - } + if (argc < 2) { + nix_irc::print_usage(argv[0]); + return 1; + } + + std::string cmd = argv[1]; + + if (cmd == "compile" || cmd == "c") { + return nix_irc::run_compile(argc - 1, argv + 1); + } else if (cmd == "decompile" || cmd == "d") { + return nix_irc::run_decompile(argc, argv); + } else if (cmd == "help" || cmd == "--help" || cmd == "-h") { + nix_irc::print_usage(argv[0]); + return 0; + } else { + return nix_irc::run_compile(argc, argv); + } } diff --git a/src/irc/parser.cpp b/src/irc/parser.cpp index 8a47e2a..e72d034 100644 --- a/src/irc/parser.cpp +++ b/src/irc/parser.cpp @@ -1,950 +1,1064 @@ #include "parser.h" -#include +#include #include #include +#include #include -#include -#include -#include #include -#include +#include +#include +#include namespace nix_irc { static std::string trim(const std::string& s) { - size_t start = s.find_first_not_of(" \t\n\r"); - if (start == std::string::npos) return ""; - size_t end = s.find_last_not_of(" \t\n\r"); - return s.substr(start, end - start + 1); + size_t start = s.find_first_not_of(" \t\n\r"); + if (start == std::string::npos) + return ""; + size_t end = s.find_last_not_of(" \t\n\r"); + return s.substr(start, end - start + 1); } static std::string read_file(const std::string& path) { - FILE* f = fopen(path.c_str(), "r"); - if (!f) { - throw std::runtime_error("Cannot open file: " + path); - } - fseek(f, 0, SEEK_END); - long size = ftell(f); - fseek(f, 0, SEEK_SET); - std::string content(size, '\0'); - if (fread(content.data(), 1, size, f) != static_cast(size)) { - fclose(f); - throw std::runtime_error("Failed to read file: " + path); - } + FILE* f = fopen(path.c_str(), "r"); + if (!f) { + throw std::runtime_error("Cannot open file: " + path); + } + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + std::string content(size, '\0'); + if (fread(content.data(), 1, size, f) != static_cast(size)) { fclose(f); - return content; + throw std::runtime_error("Failed to read file: " + path); + } + fclose(f); + return content; } static std::pair run_command(const std::string& cmd) { - std::array buffer; - std::string result; - std::string error; + std::array buffer; + std::string result; + std::string error; - FILE* pipe = popen(cmd.c_str(), "r"); - if (!pipe) throw std::runtime_error("popen failed"); + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) + throw std::runtime_error("popen failed"); - while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { - result += buffer.data(); - } + while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { + result += buffer.data(); + } - int status = pclose(pipe); - if (status != 0) { - throw std::runtime_error("Command failed: " + cmd); - } - return {result, error}; + int status = pclose(pipe); + if (status != 0) { + throw std::runtime_error("Command failed: " + cmd); + } + return {result, error}; } struct Token { - enum Type { - LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET, - IDENT, STRING, STRING_INTERP, PATH, INT, BOOL, - LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH, INHERIT, - DOT, SEMICOLON, COLON, EQUALS, AT, COMMA, QUESTION, ELLIPSIS, - // Operators - PLUS, MINUS, STAR, SLASH, CONCAT, - EQEQ, NE, LT, GT, LE, GE, - AND, OR, IMPL, NOT, - EOF_ - } type; - std::string value; - size_t line; - size_t col; + enum Type { + LPAREN, + RPAREN, + LBRACE, + RBRACE, + LBRACKET, + RBRACKET, + IDENT, + STRING, + STRING_INTERP, + PATH, + INT, + BOOL, + LET, + IN, + REC, + IF, + THEN, + ELSE, + ASSERT, + WITH, + INHERIT, + DOT, + SEMICOLON, + COLON, + EQUALS, + AT, + COMMA, + QUESTION, + ELLIPSIS, + // Operators + PLUS, + MINUS, + STAR, + SLASH, + CONCAT, + EQEQ, + NE, + LT, + GT, + LE, + GE, + AND, + OR, + IMPL, + NOT, + EOF_ + } type; + std::string value; + size_t line; + size_t col; }; class Lexer { public: - Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {} + Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {} - std::vector tokenize() { - #define TOKEN(t) Token{Token::t, "", line, col} + std::vector tokenize() { +#define TOKEN(t) \ + Token { Token::t, "", line, col } - while (pos < input.size()) { - skip_whitespace(); - if (pos >= input.size()) break; + while (pos < input.size()) { + skip_whitespace(); + if (pos >= input.size()) + break; - char c = input[pos]; + char c = input[pos]; - if (c == '(') { emit(TOKEN(LPAREN)); } - else if (c == ')') { emit(TOKEN(RPAREN)); } - else if (c == '{') { emit(TOKEN(LBRACE)); } - else if (c == '}') { emit(TOKEN(RBRACE)); } - else if (c == '[') { emit(TOKEN(LBRACKET)); } - else if (c == ']') { emit(TOKEN(RBRACKET)); } - else if (c == ';') { emit(TOKEN(SEMICOLON)); } - else if (c == ':') { emit(TOKEN(COLON)); } - else if (c == '@') { emit(TOKEN(AT)); } - else if (c == ',') { emit(TOKEN(COMMA)); } - else if (c == '"') { tokenize_string(); } - // Two-char operators - else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(EQEQ)); - pos += 2; col += 2; - } - else if (c == '=') { emit(TOKEN(EQUALS)); } - else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(NE)); - pos += 2; col += 2; - } - else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(LE)); - pos += 2; col += 2; - } - else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(GE)); - pos += 2; col += 2; - } - else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') { - tokens.push_back(TOKEN(CONCAT)); - pos += 2; col += 2; - } - else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') { - tokens.push_back(TOKEN(AND)); - pos += 2; col += 2; - } - else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') { - tokens.push_back(TOKEN(OR)); - pos += 2; col += 2; - } - else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') { - tokens.push_back(TOKEN(IMPL)); - pos += 2; col += 2; - } - // Single-char operators - else if (c == '+') { emit(TOKEN(PLUS)); } - else if (c == '*') { emit(TOKEN(STAR)); } - else if (c == '/') { - // Check if it's a path or division - if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) { - tokenize_path(); - } else { - emit(TOKEN(SLASH)); - } - } - else if (c == '<') { emit(TOKEN(LT)); } - else if (c == '>') { emit(TOKEN(GT)); } - else if (c == '!') { emit(TOKEN(NOT)); } - else if (c == '.') { - // Check for ellipsis (...) - if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') { - tokens.push_back(TOKEN(ELLIPSIS)); - pos += 3; col += 3; - } else { - emit(TOKEN(DOT)); - } - } - else if (c == '?') { emit(TOKEN(QUESTION)); } - else if (c == '-') { - // Check if it's a negative number or minus operator - if (pos + 1 < input.size() && isdigit(input[pos + 1])) { - tokenize_int(); - } else { - emit(TOKEN(MINUS)); - } - } - else if (isdigit(c)) { tokenize_int(); } - else if (isalpha(c) || c == '_') { tokenize_ident(); } - else { pos++; col++; } + if (c == '(') { + emit(TOKEN(LPAREN)); + } else if (c == ')') { + emit(TOKEN(RPAREN)); + } else if (c == '{') { + emit(TOKEN(LBRACE)); + } else if (c == '}') { + emit(TOKEN(RBRACE)); + } else if (c == '[') { + emit(TOKEN(LBRACKET)); + } else if (c == ']') { + emit(TOKEN(RBRACKET)); + } else if (c == ';') { + emit(TOKEN(SEMICOLON)); + } else if (c == ':') { + emit(TOKEN(COLON)); + } else if (c == '@') { + emit(TOKEN(AT)); + } else if (c == ',') { + emit(TOKEN(COMMA)); + } else if (c == '"') { + tokenize_string(); + } + // Two-char operators + else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(EQEQ)); + pos += 2; + col += 2; + } else if (c == '=') { + emit(TOKEN(EQUALS)); + } else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(NE)); + pos += 2; + col += 2; + } else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(LE)); + pos += 2; + col += 2; + } else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(GE)); + pos += 2; + col += 2; + } else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') { + tokens.push_back(TOKEN(CONCAT)); + pos += 2; + col += 2; + } else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') { + tokens.push_back(TOKEN(AND)); + pos += 2; + col += 2; + } else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') { + tokens.push_back(TOKEN(OR)); + pos += 2; + col += 2; + } else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') { + tokens.push_back(TOKEN(IMPL)); + pos += 2; + col += 2; + } + // Single-char operators + else if (c == '+') { + emit(TOKEN(PLUS)); + } else if (c == '*') { + emit(TOKEN(STAR)); + } else if (c == '/') { + // Check if it's a path or division + if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) { + tokenize_path(); + } else { + emit(TOKEN(SLASH)); } - tokens.push_back({Token::EOF_, "", line, col}); - - #undef TOKEN - return tokens; - } - -private: - std::vector tokens; - const std::string& input; - size_t pos; - size_t line; - size_t col; - - void emit(Token t) { - tokens.push_back(t); + } else if (c == '<') { + emit(TOKEN(LT)); + } else if (c == '>') { + emit(TOKEN(GT)); + } else if (c == '!') { + emit(TOKEN(NOT)); + } else if (c == '.') { + // Check for ellipsis (...) + if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') { + tokens.push_back(TOKEN(ELLIPSIS)); + pos += 3; + col += 3; + } else { + emit(TOKEN(DOT)); + } + } else if (c == '?') { + emit(TOKEN(QUESTION)); + } else if (c == '-') { + // Check if it's a negative number or minus operator + if (pos + 1 < input.size() && isdigit(input[pos + 1])) { + tokenize_int(); + } else { + emit(TOKEN(MINUS)); + } + } else if (isdigit(c)) { + tokenize_int(); + } else if (isalpha(c) || c == '_') { + tokenize_ident(); + } else { pos++; col++; + } } + tokens.push_back({Token::EOF_, "", line, col}); - void skip_whitespace() { - while (pos < input.size()) { - char c = input[pos]; - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { - if (c == '\n') { line++; col = 1; } - else { col++; } - pos++; - } else if (c == '#') { - while (pos < input.size() && input[pos] != '\n') pos++; - } else { - break; - } - } - } +#undef TOKEN + return tokens; + } - void tokenize_string() { - pos++; - std::string s; - bool has_interp = false; +private: + std::vector tokens; + const std::string& input; + size_t pos; + size_t line; + size_t col; - while (pos < input.size() && input[pos] != '"') { - if (input[pos] == '\\' && pos + 1 < input.size()) { - pos++; - switch (input[pos]) { - case 'n': s += '\n'; break; - case 't': s += '\t'; break; - case 'r': s += '\r'; break; - case '"': s += '"'; break; - case '\\': s += '\\'; break; - case '$': s += '$'; break; // Escaped $ - default: s += input[pos]; break; - } - pos++; - } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { - // Found interpolation marker - has_interp = true; - s += input[pos]; // Keep $ in raw string - pos++; - } else { - s += input[pos]; - pos++; - } + void emit(Token t) { + tokens.push_back(t); + pos++; + col++; + } + + void skip_whitespace() { + while (pos < input.size()) { + char c = input[pos]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + if (c == '\n') { + line++; + col = 1; + } else { + col++; } pos++; - - Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING; - tokens.push_back({type, s, line, col}); - col += s.size() + 2; + } else if (c == '#') { + while (pos < input.size() && input[pos] != '\n') + pos++; + } else { + break; + } } + } - void tokenize_path() { - size_t start = pos; - while (pos < input.size() && !isspace(input[pos]) && - input[pos] != '(' && input[pos] != ')' && - input[pos] != '{' && input[pos] != '}' && - input[pos] != '[' && input[pos] != ']') { - pos++; + void tokenize_string() { + pos++; + std::string s; + bool has_interp = false; + + while (pos < input.size() && input[pos] != '"') { + if (input[pos] == '\\' && pos + 1 < input.size()) { + pos++; + switch (input[pos]) { + case 'n': + s += '\n'; + break; + case 't': + s += '\t'; + break; + case 'r': + s += '\r'; + break; + case '"': + s += '"'; + break; + case '\\': + s += '\\'; + break; + case '$': + s += '$'; + break; // Escaped $ + default: + s += input[pos]; + break; } - std::string path = input.substr(start, pos - start); - tokens.push_back({Token::PATH, path, line, col}); - col += path.size(); + pos++; + } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { + // Found interpolation marker + has_interp = true; + s += input[pos]; // Keep $ in raw string + pos++; + } else { + s += input[pos]; + pos++; + } } + pos++; - void tokenize_int() { - size_t start = pos; - if (input[pos] == '-') pos++; - while (pos < input.size() && isdigit(input[pos])) pos++; - std::string num = input.substr(start, pos - start); - tokens.push_back({Token::INT, num, line, col}); - col += num.size(); + Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING; + tokens.push_back({type, s, line, col}); + col += s.size() + 2; + } + + void tokenize_path() { + size_t start = pos; + while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' && + input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']') { + pos++; } + std::string path = input.substr(start, pos - start); + tokens.push_back({Token::PATH, path, line, col}); + col += path.size(); + } - void tokenize_ident() { - size_t start = pos; - while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++; - std::string ident = input.substr(start, pos - start); + void tokenize_int() { + size_t start = pos; + if (input[pos] == '-') + pos++; + while (pos < input.size() && isdigit(input[pos])) + pos++; + std::string num = input.substr(start, pos - start); + tokens.push_back({Token::INT, num, line, col}); + col += num.size(); + } - Token::Type type = Token::IDENT; - if (ident == "let") type = Token::LET; - else if (ident == "in") type = Token::IN; - else if (ident == "rec") type = Token::REC; - else if (ident == "if") type = Token::IF; - else if (ident == "then") type = Token::THEN; - else if (ident == "else") type = Token::ELSE; - else if (ident == "assert") type = Token::ASSERT; - else if (ident == "with") type = Token::WITH; - else if (ident == "inherit") type = Token::INHERIT; - else if (ident == "true") type = Token::BOOL; - else if (ident == "false") type = Token::BOOL; + void tokenize_ident() { + size_t start = pos; + while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) + pos++; + std::string ident = input.substr(start, pos - start); - tokens.push_back({type, ident, line, col}); - col += ident.size(); - } + Token::Type type = Token::IDENT; + if (ident == "let") + type = Token::LET; + else if (ident == "in") + type = Token::IN; + else if (ident == "rec") + type = Token::REC; + else if (ident == "if") + type = Token::IF; + else if (ident == "then") + type = Token::THEN; + else if (ident == "else") + type = Token::ELSE; + else if (ident == "assert") + type = Token::ASSERT; + else if (ident == "with") + type = Token::WITH; + else if (ident == "inherit") + type = Token::INHERIT; + else if (ident == "true") + type = Token::BOOL; + else if (ident == "false") + type = Token::BOOL; + + tokens.push_back({type, ident, line, col}); + col += ident.size(); + } }; class Parser::Impl { public: - std::vector tokens; - size_t pos = 0; - std::string current_file; + std::vector tokens; + size_t pos = 0; + std::string current_file; - const Token& current() { - if (pos < tokens.size()) return tokens[pos]; - static Token eof{Token::EOF_, "", 0, 0}; - return eof; + const Token& current() { + if (pos < tokens.size()) + return tokens[pos]; + static Token eof{Token::EOF_, "", 0, 0}; + return eof; + } + + void advance() { pos++; } + + bool consume(Token::Type type) { + if (current().type == type) { + advance(); + return true; + } + return false; + } + + bool expect(Token::Type type) { + if (current().type != type) { + std::cerr << "Expected token " << type << " but got " << current().type << " at " + << current().line << ":" << current().col << "\n"; + return false; + } + advance(); + return true; + } + + // Get operator precedence (higher = tighter binding) + int get_precedence(Token::Type type) { + switch (type) { + case Token::OR: + return 1; + case Token::AND: + return 2; + case Token::IMPL: + return 3; + case Token::EQEQ: + case Token::NE: + return 4; + case Token::LT: + case Token::GT: + case Token::LE: + case Token::GE: + return 5; + case Token::CONCAT: + return 6; + case Token::PLUS: + case Token::MINUS: + return 7; + case Token::STAR: + case Token::SLASH: + return 8; + default: + return 0; + } + } + + // Convert token type to binary operator + BinaryOp token_to_binop(Token::Type type) { + switch (type) { + case Token::PLUS: + return BinaryOp::ADD; + case Token::MINUS: + return BinaryOp::SUB; + case Token::STAR: + return BinaryOp::MUL; + case Token::SLASH: + return BinaryOp::DIV; + case Token::CONCAT: + return BinaryOp::CONCAT; + case Token::EQEQ: + return BinaryOp::EQ; + case Token::NE: + return BinaryOp::NE; + case Token::LT: + return BinaryOp::LT; + case Token::GT: + return BinaryOp::GT; + case Token::LE: + return BinaryOp::LE; + case Token::GE: + return BinaryOp::GE; + case Token::AND: + return BinaryOp::AND; + case Token::OR: + return BinaryOp::OR; + case Token::IMPL: + return BinaryOp::IMPL; + default: + throw std::runtime_error("Invalid binary operator"); + } + } + + std::shared_ptr parse_expr() { + // Try to parse lambda + auto lambda = try_parse_lambda(); + if (lambda) + return lambda; + + if (consume(Token::IF)) { + auto cond = parse_expr(); + expect(Token::THEN); + auto then = parse_expr(); + expect(Token::ELSE); + auto else_ = parse_expr(); + return std::make_shared(IfNode(cond, then, else_)); + } + if (consume(Token::LET)) { + bool is_rec = consume(Token::REC); + std::vector>> bindings; + parse_bindings(bindings); + expect(Token::IN); + auto body = parse_expr(); + + if (is_rec) { + auto letrec = LetRecNode(body); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } else { + auto let = LetNode(body); + let.bindings = std::move(bindings); + return std::make_shared(std::move(let)); + } + } + if (consume(Token::ASSERT)) { + auto cond = parse_expr(); + expect(Token::SEMICOLON); + auto body = parse_expr(); + return std::make_shared(AssertNode(cond, body)); + } + if (consume(Token::WITH)) { + auto attrs = parse_expr(); + expect(Token::SEMICOLON); + auto body = parse_expr(); + return std::make_shared(WithNode(attrs, body)); } - void advance() { pos++; } + return parse_expr1(); + } - bool consume(Token::Type type) { - if (current().type == type) { - advance(); - return true; - } - return false; + std::shared_ptr parse_expr1() { return parse_binary_op(0); } + + // Precedence climbing for binary operators + std::shared_ptr parse_binary_op(int min_prec) { + auto left = parse_selection(); + + while (true) { + int prec = get_precedence(current().type); + if (prec == 0 || prec < min_prec) + break; + + Token op_token = current(); + advance(); + + auto right = parse_binary_op(prec + 1); + left = std::make_shared(BinaryOpNode(token_to_binop(op_token.type), left, right)); } - bool expect(Token::Type type) { - if (current().type != type) { - std::cerr << "Expected token " << type << " but got " << current().type - << " at " << current().line << ":" << current().col << "\n"; - return false; - } + return left; + } + + std::shared_ptr parse_selection() { + auto left = parse_expr2(); + + while (current().type == Token::DOT) { + advance(); + Token name = current(); + if (name.type == Token::IDENT) { advance(); - return true; - } + auto attr = std::make_shared(ConstStringNode(name.value)); + auto result = std::make_shared(SelectNode(left, attr)); - // Get operator precedence (higher = tighter binding) - int get_precedence(Token::Type type) { - switch (type) { - case Token::OR: return 1; - case Token::AND: return 2; - case Token::IMPL: return 3; - case Token::EQEQ: case Token::NE: return 4; - case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5; - case Token::CONCAT: return 6; - case Token::PLUS: case Token::MINUS: return 7; - case Token::STAR: case Token::SLASH: return 8; - default: return 0; - } - } - - // Convert token type to binary operator - BinaryOp token_to_binop(Token::Type type) { - switch (type) { - case Token::PLUS: return BinaryOp::ADD; - case Token::MINUS: return BinaryOp::SUB; - case Token::STAR: return BinaryOp::MUL; - case Token::SLASH: return BinaryOp::DIV; - case Token::CONCAT: return BinaryOp::CONCAT; - case Token::EQEQ: return BinaryOp::EQ; - case Token::NE: return BinaryOp::NE; - case Token::LT: return BinaryOp::LT; - case Token::GT: return BinaryOp::GT; - case Token::LE: return BinaryOp::LE; - case Token::GE: return BinaryOp::GE; - case Token::AND: return BinaryOp::AND; - case Token::OR: return BinaryOp::OR; - case Token::IMPL: return BinaryOp::IMPL; - default: throw std::runtime_error("Invalid binary operator"); - } - } - - std::shared_ptr parse_expr() { - // Try to parse lambda - auto lambda = try_parse_lambda(); - if (lambda) return lambda; - - if (consume(Token::IF)) { - auto cond = parse_expr(); - expect(Token::THEN); - auto then = parse_expr(); - expect(Token::ELSE); - auto else_ = parse_expr(); - return std::make_shared(IfNode(cond, then, else_)); - } - if (consume(Token::LET)) { - bool is_rec = consume(Token::REC); - std::vector>> bindings; - parse_bindings(bindings); - expect(Token::IN); - auto body = parse_expr(); - - if (is_rec) { - auto letrec = LetRecNode(body); - letrec.bindings = std::move(bindings); - return std::make_shared(std::move(letrec)); - } else { - auto let = LetNode(body); - let.bindings = std::move(bindings); - return std::make_shared(std::move(let)); + if (consume(Token::DOT)) { + Token name2 = current(); + if (name2.type == Token::IDENT) { + advance(); + auto attr2 = std::make_shared(ConstStringNode(name2.value)); + auto* curr = result->get_if(); + while (curr && consume(Token::DOT)) { + Token n = current(); + expect(Token::IDENT); + auto a = std::make_shared(ConstStringNode(n.value)); + curr->attr = + std::make_shared(AppNode(std::make_shared(AppNode(curr->attr, a)), + std::make_shared(ConstNullNode()))); } + } } - if (consume(Token::ASSERT)) { - auto cond = parse_expr(); - expect(Token::SEMICOLON); - auto body = parse_expr(); - return std::make_shared(AssertNode(cond, body)); - } - if (consume(Token::WITH)) { - auto attrs = parse_expr(); - expect(Token::SEMICOLON); - auto body = parse_expr(); - return std::make_shared(WithNode(attrs, body)); - } - - return parse_expr1(); - } - - std::shared_ptr parse_expr1() { - return parse_binary_op(0); - } - - // Precedence climbing for binary operators - std::shared_ptr parse_binary_op(int min_prec) { - auto left = parse_selection(); - - while (true) { - int prec = get_precedence(current().type); - if (prec == 0 || prec < min_prec) break; - - Token op_token = current(); - advance(); - - auto right = parse_binary_op(prec + 1); - left = std::make_shared(BinaryOpNode( - token_to_binop(op_token.type), - left, - right - )); - } - - return left; - } - - std::shared_ptr parse_selection() { - auto left = parse_expr2(); - - while (current().type == Token::DOT) { - advance(); - Token name = current(); - if (name.type == Token::IDENT) { - advance(); - auto attr = std::make_shared(ConstStringNode(name.value)); - auto result = std::make_shared(SelectNode(left, attr)); - - if (consume(Token::DOT)) { - Token name2 = current(); - if (name2.type == Token::IDENT) { - advance(); - auto attr2 = std::make_shared(ConstStringNode(name2.value)); - auto* curr = result->get_if(); - while (curr && consume(Token::DOT)) { - Token n = current(); - expect(Token::IDENT); - auto a = std::make_shared(ConstStringNode(n.value)); - curr->attr = std::make_shared(AppNode( - std::make_shared(AppNode(curr->attr, a)), - std::make_shared(ConstNullNode()) - )); - } - } - } - return result; - } else if (consume(Token::LBRACE)) { - auto result = std::make_shared(SelectNode(left, std::make_shared(ConstStringNode(name.value)))); - parse_expr_attrs(result); - expect(Token::RBRACE); - return result; - } - return left; - } - - return left; - } - - void parse_expr_attrs(std::shared_ptr&) { - // Extended selection syntax - } - - std::shared_ptr parse_expr2() { - std::shared_ptr left = parse_expr3(); - - while (true) { - if (current().type == Token::LBRACKET) { - advance(); - auto arg = parse_expr(); - expect(Token::RBRACKET); - left = std::make_shared(AppNode(left, arg)); - } else if (current().type == Token::STRING) { - Token s = current(); - advance(); - auto arg = std::make_shared(ConstStringNode(s.value)); - left = std::make_shared(AppNode(left, arg)); - } else { - break; - } - } - - return left; - } - - std::shared_ptr parse_expr3() { - // Handle unary operators - if (consume(Token::MINUS)) { - auto operand = parse_expr3(); - return std::make_shared(UnaryOpNode(UnaryOp::NEG, operand)); - } - - if (consume(Token::NOT)) { - auto operand = parse_expr3(); - return std::make_shared(UnaryOpNode(UnaryOp::NOT, operand)); - } - - if (consume(Token::LPAREN)) { - auto expr = parse_expr(); - expect(Token::RPAREN); - return expr; - } - - if (consume(Token::LBRACE)) { - return parse_attrs(); - } - - if (consume(Token::LBRACKET)) { - return parse_list(); - } - - Token t = current(); - - if (t.type == Token::IDENT) { - advance(); - return std::make_shared(VarNode(0, t.value)); - } - - if (t.type == Token::INT) { - advance(); - return std::make_shared(ConstIntNode(std::stoll(t.value))); - } - - if (t.type == Token::STRING) { - advance(); - return std::make_shared(ConstStringNode(t.value)); - } - - if (t.type == Token::STRING_INTERP) { - Token str_token = current(); - advance(); - return parse_string_interp(str_token.value); - } - - if (t.type == Token::PATH) { - advance(); - return std::make_shared(ConstPathNode(t.value)); - } - - if (t.type == Token::BOOL) { - advance(); - return std::make_shared(ConstBoolNode(t.value == "true")); - } - - std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n"; - advance(); - return std::make_shared(ConstNullNode()); - } - - std::shared_ptr parse_attrs() { - auto attrs = AttrsetNode(false); - - while (current().type != Token::RBRACE && current().type != Token::EOF_) { - if (consume(Token::REC)) { - attrs.recursive = true; - continue; - } - - // Handle inherit keyword - if (consume(Token::INHERIT)) { - std::shared_ptr source; - - // Check for (expr) form - if (consume(Token::LPAREN)) { - source = parse_expr(); - expect(Token::RPAREN); - } - - // Parse identifier list - while (current().type == Token::IDENT) { - Token name = current(); - advance(); - - if (source) { - // inherit (expr) x → x = expr.x - auto select = std::make_shared(SelectNode( - source, - std::make_shared(ConstStringNode(name.value)) - )); - attrs.attrs.push_back({name.value, select}); - } else { - // inherit x → x = x - auto var = std::make_shared(VarNode(0, name.value)); - attrs.attrs.push_back({name.value, var}); - } - } - - expect(Token::SEMICOLON); - continue; - } - - if (current().type == Token::IDENT || current().type == Token::STRING) { - Token key = current(); - advance(); - std::string key_str = key.value; - - if (consume(Token::EQUALS)) { - auto value = parse_expr(); - attrs.attrs.push_back({key_str, value}); - } else if (consume(Token::AT)) { - auto pattern = parse_expr(); - auto value = parse_expr(); - attrs.attrs.push_back({key_str, value}); - } - } - - if (consume(Token::COMMA)) continue; - if (consume(Token::SEMICOLON)) continue; - - // If we get here and haven't handled the token, break - if (current().type != Token::RBRACE && current().type != Token::EOF_) { - break; - } - } - + return result; + } else if (consume(Token::LBRACE)) { + auto result = std::make_shared( + SelectNode(left, std::make_shared(ConstStringNode(name.value)))); + parse_expr_attrs(result); expect(Token::RBRACE); - return std::make_shared(std::move(attrs)); + return result; + } + return left; } - std::shared_ptr parse_list() { - std::shared_ptr list = std::make_shared(ConstNullNode()); + return left; + } - if (consume(Token::RBRACKET)) { - return list; - } + void parse_expr_attrs(std::shared_ptr&) { + // Extended selection syntax + } - std::vector> elements; - while (current().type != Token::RBRACKET) { - elements.push_back(parse_expr()); - if (!consume(Token::COMMA)) break; - } + std::shared_ptr parse_expr2() { + std::shared_ptr left = parse_expr3(); + + while (true) { + if (current().type == Token::LBRACKET) { + advance(); + auto arg = parse_expr(); expect(Token::RBRACKET); - - for (auto it = elements.rbegin(); it != elements.rend(); ++it) { - list = std::make_shared(AppNode( - std::make_shared(AppNode( - std::make_shared(VarNode(0, "__list")), - *it - )), - list - )); - } - - return list; + left = std::make_shared(AppNode(left, arg)); + } else if (current().type == Token::STRING) { + Token s = current(); + advance(); + auto arg = std::make_shared(ConstStringNode(s.value)); + left = std::make_shared(AppNode(left, arg)); + } else { + break; + } } - void parse_bindings(std::vector>>& bindings) { - while (current().type == Token::IDENT || current().type == Token::INHERIT) { - // Handle inherit keyword - if (consume(Token::INHERIT)) { - std::shared_ptr source; + return left; + } - // Check for (expr) form - if (consume(Token::LPAREN)) { - source = parse_expr(); - expect(Token::RPAREN); - } - - // Parse identifier list - while (current().type == Token::IDENT) { - Token name = current(); - advance(); - - if (source) { - // inherit (expr) x → x = expr.x - auto select = std::make_shared(SelectNode( - source, - std::make_shared(ConstStringNode(name.value)) - )); - bindings.push_back({name.value, select}); - } else { - // inherit x → x = x - auto var = std::make_shared(VarNode(0, name.value)); - bindings.push_back({name.value, var}); - } - } - - expect(Token::SEMICOLON); - continue; - } - - if (current().type != Token::IDENT) break; - Token key = current(); - advance(); - - if (consume(Token::AT)) { - auto pattern = parse_expr(); - auto value = parse_expr(); - bindings.push_back({key.value, value}); - } else { - expect(Token::EQUALS); - auto value = parse_expr(); - bindings.push_back({key.value, value}); - } - - if (!consume(Token::SEMICOLON)) break; - } + std::shared_ptr parse_expr3() { + // Handle unary operators + if (consume(Token::MINUS)) { + auto operand = parse_expr3(); + return std::make_shared(UnaryOpNode(UnaryOp::NEG, operand)); } - // Try to parse lambda, return nullptr if not a lambda - std::shared_ptr try_parse_lambda() { - size_t saved_pos = pos; + if (consume(Token::NOT)) { + auto operand = parse_expr3(); + return std::make_shared(UnaryOpNode(UnaryOp::NOT, operand)); + } - // Check for named pattern: arg@{ ... }: - std::optional named_arg; - if (current().type == Token::IDENT) { - Token name = current(); - advance(); - if (consume(Token::AT)) { - named_arg = name.value; - } else if (consume(Token::COLON)) { - // Simple lambda: x: body - auto body = parse_expr(); - auto lambda = LambdaNode(1, body); - lambda.param_name = name.value; - return std::make_shared(std::move(lambda)); - } else { - // Not a lambda, restore position - pos = saved_pos; - return nullptr; - } + if (consume(Token::LPAREN)) { + auto expr = parse_expr(); + expect(Token::RPAREN); + return expr; + } + + if (consume(Token::LBRACE)) { + return parse_attrs(); + } + + if (consume(Token::LBRACKET)) { + return parse_list(); + } + + Token t = current(); + + if (t.type == Token::IDENT) { + advance(); + return std::make_shared(VarNode(0, t.value)); + } + + if (t.type == Token::INT) { + advance(); + return std::make_shared(ConstIntNode(std::stoll(t.value))); + } + + if (t.type == Token::STRING) { + advance(); + return std::make_shared(ConstStringNode(t.value)); + } + + if (t.type == Token::STRING_INTERP) { + Token str_token = current(); + advance(); + return parse_string_interp(str_token.value); + } + + if (t.type == Token::PATH) { + advance(); + return std::make_shared(ConstPathNode(t.value)); + } + + if (t.type == Token::BOOL) { + advance(); + return std::make_shared(ConstBoolNode(t.value == "true")); + } + + std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n"; + advance(); + return std::make_shared(ConstNullNode()); + } + + std::shared_ptr parse_attrs() { + auto attrs = AttrsetNode(false); + + while (current().type != Token::RBRACE && current().type != Token::EOF_) { + if (consume(Token::REC)) { + attrs.recursive = true; + continue; + } + + // Handle inherit keyword + if (consume(Token::INHERIT)) { + std::shared_ptr source; + + // Check for (expr) form + if (consume(Token::LPAREN)) { + source = parse_expr(); + expect(Token::RPAREN); } - // Check for pattern: { ... }: - if (current().type == Token::LBRACE) { - advance(); + // Parse identifier list + while (current().type == Token::IDENT) { + Token name = current(); + advance(); - // Parse pattern fields - struct Field { - std::string name; - std::optional> default_val; - }; - std::vector fields; - bool has_ellipsis = false; - - while (current().type != Token::RBRACE && current().type != Token::EOF_) { - if (consume(Token::ELLIPSIS)) { - has_ellipsis = true; - if (consume(Token::COMMA)) continue; - break; - } - - if (current().type == Token::IDENT) { - Token field_name = current(); - advance(); - - Field field; - field.name = field_name.value; - - // Check for default value - if (consume(Token::QUESTION)) { - field.default_val = parse_expr(); - } - - fields.push_back(field); - - if (consume(Token::COMMA)) continue; - break; - } else { - break; - } - } - - if (!consume(Token::RBRACE)) { - // Not a lambda pattern, restore - pos = saved_pos; - return nullptr; - } - - if (!consume(Token::COLON)) { - // Not a lambda, restore - pos = saved_pos; - return nullptr; - } - - // Parse body - auto body = parse_expr(); - - // Desugar pattern to lambda with let bindings - // { a, b ? x }: body → arg: let a = arg.a; b = if arg ? a then arg.a else x; in body - - std::string arg_name = named_arg.value_or("_arg"); - auto arg_var = std::make_shared(VarNode(0, arg_name)); - - std::vector>> bindings; - - for (const auto& field : fields) { - // Create arg.field selection - auto select = std::make_shared(SelectNode( - arg_var, - std::make_shared(ConstStringNode(field.name)) - )); - - if (field.default_val) { - // if arg ? field then arg.field else default - auto has_attr = std::make_shared(HasAttrNode( - arg_var, - std::make_shared(ConstStringNode(field.name)) - )); - auto if_node = std::make_shared(IfNode( - has_attr, - select, - *field.default_val - )); - bindings.push_back({field.name, if_node}); - } else { - bindings.push_back({field.name, select}); - } - } - - // If named pattern, also bind the argument name - if (named_arg) { - bindings.push_back({*named_arg, arg_var}); - } - - // Create let expression - auto let = LetNode(body); - let.bindings = std::move(bindings); - auto let_node = std::make_shared(std::move(let)); - - // Create lambda - auto lambda = LambdaNode(1, let_node); - lambda.param_name = arg_name; - lambda.strict_pattern = !has_ellipsis; - return std::make_shared(std::move(lambda)); + if (source) { + // inherit (expr) x → x = expr.x + auto select = std::make_shared( + SelectNode(source, std::make_shared(ConstStringNode(name.value)))); + attrs.attrs.push_back({name.value, select}); + } else { + // inherit x → x = x + auto var = std::make_shared(VarNode(0, name.value)); + attrs.attrs.push_back({name.value, var}); + } } - // Not a lambda + expect(Token::SEMICOLON); + continue; + } + + if (current().type == Token::IDENT || current().type == Token::STRING) { + Token key = current(); + advance(); + std::string key_str = key.value; + + if (consume(Token::EQUALS)) { + auto value = parse_expr(); + attrs.attrs.push_back({key_str, value}); + } else if (consume(Token::AT)) { + auto pattern = parse_expr(); + auto value = parse_expr(); + attrs.attrs.push_back({key_str, value}); + } + } + + if (consume(Token::COMMA)) + continue; + if (consume(Token::SEMICOLON)) + continue; + + // If we get here and haven't handled the token, break + if (current().type != Token::RBRACE && current().type != Token::EOF_) { + break; + } + } + + expect(Token::RBRACE); + return std::make_shared(std::move(attrs)); + } + + std::shared_ptr parse_list() { + std::shared_ptr list = std::make_shared(ConstNullNode()); + + if (consume(Token::RBRACKET)) { + return list; + } + + std::vector> elements; + while (current().type != Token::RBRACKET) { + elements.push_back(parse_expr()); + if (!consume(Token::COMMA)) + break; + } + expect(Token::RBRACKET); + + for (auto it = elements.rbegin(); it != elements.rend(); ++it) { + list = std::make_shared(AppNode( + std::make_shared(AppNode(std::make_shared(VarNode(0, "__list")), *it)), + list)); + } + + return list; + } + + void parse_bindings(std::vector>>& bindings) { + while (current().type == Token::IDENT || current().type == Token::INHERIT) { + // Handle inherit keyword + if (consume(Token::INHERIT)) { + std::shared_ptr source; + + // Check for (expr) form + if (consume(Token::LPAREN)) { + source = parse_expr(); + expect(Token::RPAREN); + } + + // Parse identifier list + while (current().type == Token::IDENT) { + Token name = current(); + advance(); + + if (source) { + // inherit (expr) x → x = expr.x + auto select = std::make_shared( + SelectNode(source, std::make_shared(ConstStringNode(name.value)))); + bindings.push_back({name.value, select}); + } else { + // inherit x → x = x + auto var = std::make_shared(VarNode(0, name.value)); + bindings.push_back({name.value, var}); + } + } + + expect(Token::SEMICOLON); + continue; + } + + if (current().type != Token::IDENT) + break; + Token key = current(); + advance(); + + if (consume(Token::AT)) { + auto pattern = parse_expr(); + auto value = parse_expr(); + bindings.push_back({key.value, value}); + } else { + expect(Token::EQUALS); + auto value = parse_expr(); + bindings.push_back({key.value, value}); + } + + if (!consume(Token::SEMICOLON)) + break; + } + } + + // Try to parse lambda, return nullptr if not a lambda + std::shared_ptr try_parse_lambda() { + size_t saved_pos = pos; + + // Check for named pattern: arg@{ ... }: + std::optional named_arg; + if (current().type == Token::IDENT) { + Token name = current(); + advance(); + if (consume(Token::AT)) { + named_arg = name.value; + } else if (consume(Token::COLON)) { + // Simple lambda: x: body + auto body = parse_expr(); + auto lambda = LambdaNode(1, body); + lambda.param_name = name.value; + return std::make_shared(std::move(lambda)); + } else { + // Not a lambda, restore position pos = saved_pos; return nullptr; + } } - std::shared_ptr parse_string_interp(const std::string& raw) { - std::vector> parts; - size_t i = 0; - std::string current_str; + // Check for pattern: { ... }: + if (current().type == Token::LBRACE) { + advance(); - while (i < raw.size()) { - if (raw[i] == '$' && i + 1 < raw.size() && raw[i + 1] == '{') { - // Save current string part if any - if (!current_str.empty()) { - parts.push_back(std::make_shared(ConstStringNode(current_str))); - current_str.clear(); - } + // Parse pattern fields + struct Field { + std::string name; + std::optional> default_val; + }; + std::vector fields; + bool has_ellipsis = false; - // Find matching } - i += 2; // Skip ${ - int depth = 1; - size_t expr_start = i; - bool in_string = false; - char string_quote = 0; - - while (i < raw.size() && depth > 0) { - if (!in_string) { - if (raw[i] == '"' || raw[i] == '\'') { - in_string = true; - string_quote = raw[i]; - } else if (raw[i] == '{') { - depth++; - } else if (raw[i] == '}') { - depth--; - } - } else { - if (raw[i] == string_quote && (i == 0 || raw[i-1] != '\\')) { - in_string = false; - } else if (raw[i] == '\\') { - i++; - } - } - if (depth > 0) i++; - } - - if (depth > 0) { - throw std::runtime_error("unterminated ${ in string interpolation"); - } - - // Parse the expression - std::string expr_str = raw.substr(expr_start, i - expr_start); - - // Tokenize and parse the expression - Lexer lexer(expr_str); - auto expr_tokens = lexer.tokenize(); - - // Save current state - auto saved_tokens = tokens; - auto saved_pos = pos; - - // Parse expression - tokens = expr_tokens; - pos = 0; - auto expr = parse_expr(); - - // Restore state - tokens = saved_tokens; - pos = saved_pos; - - // Convert to string using toString builtin - auto to_string = std::make_shared(VarNode(0, "toString")); - auto str_expr = std::make_shared(AppNode(to_string, expr)); - parts.push_back(str_expr); - - i++; // Skip } - } else { - current_str += raw[i]; - i++; - } + while (current().type != Token::RBRACE && current().type != Token::EOF_) { + if (consume(Token::ELLIPSIS)) { + has_ellipsis = true; + if (consume(Token::COMMA)) + continue; + break; } - // Add remaining string part + if (current().type == Token::IDENT) { + Token field_name = current(); + advance(); + + Field field; + field.name = field_name.value; + + // Check for default value + if (consume(Token::QUESTION)) { + field.default_val = parse_expr(); + } + + fields.push_back(field); + + if (consume(Token::COMMA)) + continue; + break; + } else { + break; + } + } + + if (!consume(Token::RBRACE)) { + // Not a lambda pattern, restore + pos = saved_pos; + return nullptr; + } + + if (!consume(Token::COLON)) { + // Not a lambda, restore + pos = saved_pos; + return nullptr; + } + + // Parse body + auto body = parse_expr(); + + // Desugar pattern to lambda with let bindings + // { a, b ? x }: body → arg: let a = arg.a; b = if arg ? a then arg.a else x; in body + + std::string arg_name = named_arg.value_or("_arg"); + auto arg_var = std::make_shared(VarNode(0, arg_name)); + + std::vector>> bindings; + + for (const auto& field : fields) { + // Create arg.field selection + auto select = std::make_shared( + SelectNode(arg_var, std::make_shared(ConstStringNode(field.name)))); + + if (field.default_val) { + // if arg ? field then arg.field else default + auto has_attr = std::make_shared( + HasAttrNode(arg_var, std::make_shared(ConstStringNode(field.name)))); + auto if_node = std::make_shared(IfNode(has_attr, select, *field.default_val)); + bindings.push_back({field.name, if_node}); + } else { + bindings.push_back({field.name, select}); + } + } + + // If named pattern, also bind the argument name + if (named_arg) { + bindings.push_back({*named_arg, arg_var}); + } + + // Create let expression + auto let = LetNode(body); + let.bindings = std::move(bindings); + auto let_node = std::make_shared(std::move(let)); + + // Create lambda + auto lambda = LambdaNode(1, let_node); + lambda.param_name = arg_name; + lambda.strict_pattern = !has_ellipsis; + return std::make_shared(std::move(lambda)); + } + + // Not a lambda + pos = saved_pos; + return nullptr; + } + + std::shared_ptr parse_string_interp(const std::string& raw) { + std::vector> parts; + size_t i = 0; + std::string current_str; + + while (i < raw.size()) { + if (raw[i] == '$' && i + 1 < raw.size() && raw[i + 1] == '{') { + // Save current string part if any if (!current_str.empty()) { - parts.push_back(std::make_shared(ConstStringNode(current_str))); + parts.push_back(std::make_shared(ConstStringNode(current_str))); + current_str.clear(); } - // Build concatenation tree - if (parts.empty()) { - return std::make_shared(ConstStringNode("")); + // Find matching } + i += 2; // Skip ${ + int depth = 1; + size_t expr_start = i; + bool in_string = false; + char string_quote = 0; + + while (i < raw.size() && depth > 0) { + if (!in_string) { + if (raw[i] == '"' || raw[i] == '\'') { + in_string = true; + string_quote = raw[i]; + } else if (raw[i] == '{') { + depth++; + } else if (raw[i] == '}') { + depth--; + } + } else { + if (raw[i] == string_quote && (i == 0 || raw[i - 1] != '\\')) { + in_string = false; + } else if (raw[i] == '\\') { + i++; + } + } + if (depth > 0) + i++; } - auto result = parts[0]; - for (size_t j = 1; j < parts.size(); j++) { - // Use ADD (+) for string concatenation; CONCAT (++) is Nix list concatenation - result = std::make_shared(BinaryOpNode(BinaryOp::ADD, result, parts[j])); + if (depth > 0) { + throw std::runtime_error("unterminated ${ in string interpolation"); } - return result; + // Parse the expression + std::string expr_str = raw.substr(expr_start, i - expr_start); + + // Tokenize and parse the expression + Lexer lexer(expr_str); + auto expr_tokens = lexer.tokenize(); + + // Save current state + auto saved_tokens = tokens; + auto saved_pos = pos; + + // Parse expression + tokens = expr_tokens; + pos = 0; + auto expr = parse_expr(); + + // Restore state + tokens = saved_tokens; + pos = saved_pos; + + // Convert to string using toString builtin + auto to_string = std::make_shared(VarNode(0, "toString")); + auto str_expr = std::make_shared(AppNode(to_string, expr)); + parts.push_back(str_expr); + + i++; // Skip } + } else { + current_str += raw[i]; + i++; + } } + + // Add remaining string part + if (!current_str.empty()) { + parts.push_back(std::make_shared(ConstStringNode(current_str))); + } + + // Build concatenation tree + if (parts.empty()) { + return std::make_shared(ConstStringNode("")); + } + + auto result = parts[0]; + for (size_t j = 1; j < parts.size(); j++) { + // Use ADD (+) for string concatenation; CONCAT (++) is Nix list concatenation + result = std::make_shared(BinaryOpNode(BinaryOp::ADD, result, parts[j])); + } + + return result; + } }; Parser::Parser() : pImpl(std::make_unique()) {} Parser::~Parser() = default; std::shared_ptr Parser::parse(const std::string& source, const std::string& path) { - pImpl->current_file = path; + pImpl->current_file = path; - Lexer lexer(source); - pImpl->tokens = lexer.tokenize(); - pImpl->pos = 0; + Lexer lexer(source); + pImpl->tokens = lexer.tokenize(); + pImpl->pos = 0; - return pImpl->parse_expr(); + return pImpl->parse_expr(); } std::shared_ptr Parser::parse_file(const std::string& path) { - std::string content = read_file(path); - return parse(content, path); + std::string content = read_file(path); + return parse(content, path); } -} +} // namespace nix_irc diff --git a/src/irc/parser.h b/src/irc/parser.h index 7bb97c8..a5918dc 100644 --- a/src/irc/parser.h +++ b/src/irc/parser.h @@ -2,24 +2,24 @@ #define NIX_IRC_PARSER_H #include "types.h" -#include #include +#include namespace nix_irc { class Parser { public: - Parser(); - ~Parser(); - - std::shared_ptr parse(const std::string& source, const std::string& path = ""); - std::shared_ptr parse_file(const std::string& path); - + Parser(); + ~Parser(); + + std::shared_ptr parse(const std::string& source, const std::string& path = ""); + std::shared_ptr parse_file(const std::string& path); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/resolver.cpp b/src/irc/resolver.cpp index f57cf64..37dffcc 100644 --- a/src/irc/resolver.cpp +++ b/src/irc/resolver.cpp @@ -1,111 +1,114 @@ #include "resolver.h" #include "parser.h" -#include -#include -#include #include +#include +#include #include +#include namespace nix_irc { namespace fs = std::filesystem; struct Resolver::Impl { - ResolverConfig config; - std::vector> resolved_imports; - std::unordered_set visited; - Parser parser; - - Impl(const ResolverConfig& cfg) : config(cfg) {} - - std::string resolve_path(const std::string& path, const std::string& from_file) { - fs::path p(path); - - if (p.is_absolute()) { - if (fs::exists(p)) return path; - return ""; - } - - fs::path from_dir = fs::path(from_file).parent_path(); - fs::path candidate = from_dir / p; - if (fs::exists(candidate)) return candidate.string(); - - for (const auto& search : config.search_paths) { - candidate = fs::path(search) / p; - if (fs::exists(candidate)) return candidate.string(); - } - - return ""; + ResolverConfig config; + std::vector> resolved_imports; + std::unordered_set visited; + Parser parser; + + Impl(const ResolverConfig& cfg) : config(cfg) {} + + std::string resolve_path(const std::string& path, const std::string& from_file) { + fs::path p(path); + + if (p.is_absolute()) { + if (fs::exists(p)) + return path; + return ""; } - - ImportResult do_resolve(const std::string& path, const std::string& from_file) { - std::string resolved = resolve_path(path, from_file); - - if (resolved.empty()) { - return {false, "", "Cannot find file: " + path, nullptr}; - } - - if (visited.count(resolved)) { - return {true, resolved, "", nullptr}; - } - visited.insert(resolved); - - try { - auto ast = parser.parse_file(resolved); - return {true, resolved, "", ast}; - } catch (const std::exception& e) { - return {false, "", e.what(), nullptr}; - } + + fs::path from_dir = fs::path(from_file).parent_path(); + fs::path candidate = from_dir / p; + if (fs::exists(candidate)) + return candidate.string(); + + for (const auto& search : config.search_paths) { + candidate = fs::path(search) / p; + if (fs::exists(candidate)) + return candidate.string(); } + + return ""; + } + + ImportResult do_resolve(const std::string& path, const std::string& from_file) { + std::string resolved = resolve_path(path, from_file); + + if (resolved.empty()) { + return {false, "", "Cannot find file: " + path, nullptr}; + } + + if (visited.count(resolved)) { + return {true, resolved, "", nullptr}; + } + visited.insert(resolved); + + try { + auto ast = parser.parse_file(resolved); + return {true, resolved, "", ast}; + } catch (const std::exception& e) { + return {false, "", e.what(), nullptr}; + } + } }; Resolver::Resolver(const ResolverConfig& config) : pImpl(std::make_unique(config)) {} Resolver::~Resolver() = default; void Resolver::add_search_path(const std::string& path) { - pImpl->config.search_paths.push_back(path); + pImpl->config.search_paths.push_back(path); } void Resolver::set_search_paths(const std::vector& paths) { - pImpl->config.search_paths = paths; + pImpl->config.search_paths = paths; } ImportResult Resolver::resolve_import(const std::string& path, const std::string& from_file) { - auto result = pImpl->do_resolve(path, from_file); - if (result.success && result.ast) { - pImpl->resolved_imports.push_back({path, result.path}); - } - return result; + auto result = pImpl->do_resolve(path, from_file); + if (result.success && result.ast) { + pImpl->resolved_imports.push_back({path, result.path}); + } + return result; } ImportResult Resolver::resolve_import(const Node& import_node, const std::string& from_file) { - const ConstPathNode* path_node = import_node.get_if(); - if (!path_node) { - return {false, "", "Dynamic import not supported", nullptr}; - } - return resolve_import(path_node->value, from_file); + const ConstPathNode* path_node = import_node.get_if(); + if (!path_node) { + return {false, "", "Dynamic import not supported", nullptr}; + } + return resolve_import(path_node->value, from_file); } std::vector Resolver::get_resolved_files() const { - std::vector files; - for (const auto& [orig, resolved] : pImpl->resolved_imports) { - (void)orig; - files.push_back(resolved); - } - return files; + std::vector files; + for (const auto& [orig, resolved] : pImpl->resolved_imports) { + (void) orig; + files.push_back(resolved); + } + return files; } std::vector> Resolver::get_imports() const { - return pImpl->resolved_imports; + return pImpl->resolved_imports; } bool is_static_import(const Node& node) { - return node.holds(); + return node.holds(); } std::string normalize_path(const std::string& path) { - fs::path p(path); - return fs::absolute(p).string(); + fs::path p(path); + return fs::absolute(p).string(); } -} +} // namespace nix_irc diff --git a/src/irc/resolver.h b/src/irc/resolver.h index 39f8d40..89167d7 100644 --- a/src/irc/resolver.h +++ b/src/irc/resolver.h @@ -2,47 +2,47 @@ #define NIX_IRC_RESOLVER_H #include "types.h" -#include -#include -#include #include +#include +#include +#include namespace nix_irc { struct ImportResult { - bool success; - std::string path; - std::string error; - std::shared_ptr ast; + bool success; + std::string path; + std::string error; + std::shared_ptr ast; }; struct ResolverConfig { - std::vector search_paths; - bool resolve_imports = true; + std::vector search_paths; + bool resolve_imports = true; }; class Resolver { public: - Resolver(const ResolverConfig& config = {}); - ~Resolver(); - - void add_search_path(const std::string& path); - void set_search_paths(const std::vector& paths); - - ImportResult resolve_import(const std::string& path, const std::string& from_file); - ImportResult resolve_import(const Node& import_node, const std::string& from_file); - - std::vector get_resolved_files() const; - std::vector> get_imports() const; - + Resolver(const ResolverConfig& config = {}); + ~Resolver(); + + void add_search_path(const std::string& path); + void set_search_paths(const std::vector& paths); + + ImportResult resolve_import(const std::string& path, const std::string& from_file); + ImportResult resolve_import(const Node& import_node, const std::string& from_file); + + std::vector get_resolved_files() const; + std::vector> get_imports() const; + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; bool is_static_import(const Node& node); std::string normalize_path(const std::string& path); -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/serializer.cpp b/src/irc/serializer.cpp index 8819789..fff2208 100644 --- a/src/irc/serializer.cpp +++ b/src/irc/serializer.cpp @@ -1,392 +1,428 @@ #include "serializer.h" #include -#include #include +#include namespace nix_irc { struct Serializer::Impl { - std::vector buffer; + std::vector buffer; - void write_u32(uint32_t val) { - buffer.push_back((val >> 0) & 0xFF); - buffer.push_back((val >> 8) & 0xFF); - buffer.push_back((val >> 16) & 0xFF); - buffer.push_back((val >> 24) & 0xFF); + void write_u32(uint32_t val) { + buffer.push_back((val >> 0) & 0xFF); + buffer.push_back((val >> 8) & 0xFF); + buffer.push_back((val >> 16) & 0xFF); + buffer.push_back((val >> 24) & 0xFF); + } + + void write_u64(uint64_t val) { + for (int i = 0; i < 8; i++) { + buffer.push_back((val >> (i * 8)) & 0xFF); } + } - void write_u64(uint64_t val) { - for (int i = 0; i < 8; i++) { - buffer.push_back((val >> (i * 8)) & 0xFF); - } - } - - void write_u8(uint8_t val) { - buffer.push_back(val); - } - - void write_string(const std::string& str) { - write_u32(str.size()); - buffer.insert(buffer.end(), str.begin(), str.end()); - } - - NodeType get_node_type(const Node& node) { - if (node.holds()) return NodeType::CONST_INT; - if (node.holds()) return NodeType::CONST_STRING; - if (node.holds()) return NodeType::CONST_PATH; - if (node.holds()) return NodeType::CONST_BOOL; - if (node.holds()) return NodeType::CONST_NULL; - if (node.holds()) return NodeType::VAR; - if (node.holds()) return NodeType::LAMBDA; - if (node.holds()) return NodeType::APP; - if (node.holds()) return NodeType::BINARY_OP; - if (node.holds()) return NodeType::UNARY_OP; - if (node.holds()) return NodeType::ATTRSET; - if (node.holds()) return NodeType::SELECT; - if (node.holds()) return NodeType::HAS_ATTR; - if (node.holds()) return NodeType::WITH; - if (node.holds()) return NodeType::IF; - if (node.holds()) return NodeType::LET; - if (node.holds()) return NodeType::LETREC; - if (node.holds()) return NodeType::ASSERT; - return NodeType::ERROR; - } - - uint32_t get_node_line(const Node& node) { - return std::visit([](const auto& n) { return n.line; }, node.data); - } - - void write_node(const Node& node) { - write_u8(static_cast(get_node_type(node))); - write_u32(get_node_line(node)); - - if (auto* n = node.get_if()) { - write_u64(static_cast(n->value)); - } else if (auto* n = node.get_if()) { - write_string(n->value); - } else if (auto* n = node.get_if()) { - write_string(n->value); - } else if (auto* n = node.get_if()) { - write_u8(n->value ? 1 : 0); - } else if (auto* n = node.get_if()) { - // No data for null - } else if (auto* n = node.get_if()) { - write_u32(n->index); - } else if (auto* n = node.get_if()) { - write_u32(n->arity); - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - if (n->func) write_node(*n->func); - if (n->arg) write_node(*n->arg); - } else if (auto* n = node.get_if()) { - write_u8(static_cast(n->op)); - if (n->left) write_node(*n->left); - if (n->right) write_node(*n->right); - } else if (auto* n = node.get_if()) { - write_u8(static_cast(n->op)); - if (n->operand) write_node(*n->operand); - } else if (auto* n = node.get_if()) { - write_u8(n->recursive ? 1 : 0); - write_u32(n->attrs.size()); - for (const auto& [key, val] : n->attrs) { - write_string(key); - if (val) write_node(*val); - } - } else if (auto* n = node.get_if()) { - if (n->expr) write_node(*n->expr); - if (n->attr) write_node(*n->attr); - if (n->default_expr && *n->default_expr) { - write_u8(1); - write_node(**n->default_expr); - } else { - write_u8(0); - } - } else if (auto* n = node.get_if()) { - if (n->expr) write_node(*n->expr); - if (n->attr) write_node(*n->attr); - } else if (auto* n = node.get_if()) { - if (n->attrs) write_node(*n->attrs); - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - if (n->cond) write_node(*n->cond); - if (n->then_branch) write_node(*n->then_branch); - if (n->else_branch) write_node(*n->else_branch); - } else if (auto* n = node.get_if()) { - write_u32(n->bindings.size()); - for (const auto& [key, val] : n->bindings) { - write_string(key); - if (val) write_node(*val); - } - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - write_u32(n->bindings.size()); - for (const auto& [key, val] : n->bindings) { - write_string(key); - if (val) write_node(*val); - } - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - if (n->cond) write_node(*n->cond); - if (n->body) write_node(*n->body); - } + void write_u8(uint8_t val) { buffer.push_back(val); } + + void write_string(const std::string& str) { + write_u32(str.size()); + buffer.insert(buffer.end(), str.begin(), str.end()); + } + + NodeType get_node_type(const Node& node) { + if (node.holds()) + return NodeType::CONST_INT; + if (node.holds()) + return NodeType::CONST_STRING; + if (node.holds()) + return NodeType::CONST_PATH; + if (node.holds()) + return NodeType::CONST_BOOL; + if (node.holds()) + return NodeType::CONST_NULL; + if (node.holds()) + return NodeType::VAR; + if (node.holds()) + return NodeType::LAMBDA; + if (node.holds()) + return NodeType::APP; + if (node.holds()) + return NodeType::BINARY_OP; + if (node.holds()) + return NodeType::UNARY_OP; + if (node.holds()) + return NodeType::ATTRSET; + if (node.holds()) + return NodeType::SELECT; + if (node.holds()) + return NodeType::HAS_ATTR; + if (node.holds()) + return NodeType::WITH; + if (node.holds()) + return NodeType::IF; + if (node.holds()) + return NodeType::LET; + if (node.holds()) + return NodeType::LETREC; + if (node.holds()) + return NodeType::ASSERT; + return NodeType::ERROR; + } + + uint32_t get_node_line(const Node& node) { + return std::visit([](const auto& n) { return n.line; }, node.data); + } + + void write_node(const Node& node) { + write_u8(static_cast(get_node_type(node))); + write_u32(get_node_line(node)); + + if (auto* n = node.get_if()) { + write_u64(static_cast(n->value)); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_u8(n->value ? 1 : 0); + } else if (auto* n = node.get_if()) { + // No data for null + } else if (auto* n = node.get_if()) { + write_u32(n->index); + } else if (auto* n = node.get_if()) { + write_u32(n->arity); + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->func) + write_node(*n->func); + if (n->arg) + write_node(*n->arg); + } else if (auto* n = node.get_if()) { + write_u8(static_cast(n->op)); + if (n->left) + write_node(*n->left); + if (n->right) + write_node(*n->right); + } else if (auto* n = node.get_if()) { + write_u8(static_cast(n->op)); + if (n->operand) + write_node(*n->operand); + } else if (auto* n = node.get_if()) { + write_u8(n->recursive ? 1 : 0); + write_u32(n->attrs.size()); + for (const auto& [key, val] : n->attrs) { + write_string(key); + if (val) + write_node(*val); + } + } else if (auto* n = node.get_if()) { + if (n->expr) + write_node(*n->expr); + if (n->attr) + write_node(*n->attr); + if (n->default_expr && *n->default_expr) { + write_u8(1); + write_node(**n->default_expr); + } else { + write_u8(0); + } + } else if (auto* n = node.get_if()) { + if (n->expr) + write_node(*n->expr); + if (n->attr) + write_node(*n->attr); + } else if (auto* n = node.get_if()) { + if (n->attrs) + write_node(*n->attrs); + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->cond) + write_node(*n->cond); + if (n->then_branch) + write_node(*n->then_branch); + if (n->else_branch) + write_node(*n->else_branch); + } else if (auto* n = node.get_if()) { + write_u32(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + write_string(key); + if (val) + write_node(*val); + } + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + write_u32(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + write_string(key); + if (val) + write_node(*val); + } + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->cond) + write_node(*n->cond); + if (n->body) + write_node(*n->body); } + } }; Serializer::Serializer() : pImpl(std::make_unique()) {} Serializer::~Serializer() = default; void Serializer::serialize(const IRModule& module, const std::string& path) { - auto bytes = serialize_to_bytes(module); - std::ofstream out(path, std::ios::binary); - out.write(reinterpret_cast(bytes.data()), bytes.size()); + auto bytes = serialize_to_bytes(module); + std::ofstream out(path, std::ios::binary); + out.write(reinterpret_cast(bytes.data()), bytes.size()); } std::vector Serializer::serialize_to_bytes(const IRModule& module) { - pImpl->buffer.clear(); + pImpl->buffer.clear(); - pImpl->write_u32(IR_MAGIC); - pImpl->write_u32(IR_VERSION); + pImpl->write_u32(IR_MAGIC); + pImpl->write_u32(IR_VERSION); - pImpl->write_u32(module.sources.size()); - for (const auto& src : module.sources) { - pImpl->write_string(src.path); - pImpl->write_string(src.content); - } + pImpl->write_u32(module.sources.size()); + for (const auto& src : module.sources) { + pImpl->write_string(src.path); + pImpl->write_string(src.content); + } - pImpl->write_u32(module.imports.size()); - for (const auto& [from, to] : module.imports) { - pImpl->write_string(from); - pImpl->write_string(to); - } + pImpl->write_u32(module.imports.size()); + for (const auto& [from, to] : module.imports) { + pImpl->write_string(from); + pImpl->write_string(to); + } - pImpl->write_u32(module.string_table.size()); - for (const auto& [str, id] : module.string_table) { - pImpl->write_string(str); - pImpl->write_u32(id); - } + pImpl->write_u32(module.string_table.size()); + for (const auto& [str, id] : module.string_table) { + pImpl->write_string(str); + pImpl->write_u32(id); + } - if (module.entry && module.entry != nullptr) { - pImpl->write_u8(1); - pImpl->write_node(*module.entry); - } else { - pImpl->write_u8(0); - } + if (module.entry && module.entry != nullptr) { + pImpl->write_u8(1); + pImpl->write_node(*module.entry); + } else { + pImpl->write_u8(0); + } - return pImpl->buffer; + return pImpl->buffer; } struct Deserializer::Impl { - std::vector buffer; - size_t pos = 0; + std::vector buffer; + size_t pos = 0; - uint32_t read_u32() { - uint32_t val = 0; - val |= buffer[pos + 0]; - val |= (uint32_t)buffer[pos + 1] << 8; - val |= (uint32_t)buffer[pos + 2] << 16; - val |= (uint32_t)buffer[pos + 3] << 24; - pos += 4; - return val; + uint32_t read_u32() { + uint32_t val = 0; + val |= buffer[pos + 0]; + val |= (uint32_t) buffer[pos + 1] << 8; + val |= (uint32_t) buffer[pos + 2] << 16; + val |= (uint32_t) buffer[pos + 3] << 24; + pos += 4; + return val; + } + + uint64_t read_u64() { + uint64_t val = 0; + for (int i = 0; i < 8; i++) { + val |= (uint64_t) buffer[pos + i] << (i * 8); } + pos += 8; + return val; + } - uint64_t read_u64() { - uint64_t val = 0; - for (int i = 0; i < 8; i++) { - val |= (uint64_t)buffer[pos + i] << (i * 8); - } - pos += 8; - return val; + uint8_t read_u8() { return buffer[pos++]; } + + std::string read_string() { + uint32_t len = read_u32(); + std::string str(reinterpret_cast(&buffer[pos]), len); + pos += len; + return str; + } + + std::shared_ptr read_node() { + NodeType type = static_cast(read_u8()); + uint32_t line = read_u32(); + + switch (type) { + case NodeType::CONST_INT: { + int64_t val = static_cast(read_u64()); + return std::make_shared(ConstIntNode(val, line)); } - - uint8_t read_u8() { - return buffer[pos++]; + case NodeType::CONST_STRING: { + std::string val = read_string(); + return std::make_shared(ConstStringNode(val, line)); } - - std::string read_string() { - uint32_t len = read_u32(); - std::string str(reinterpret_cast(&buffer[pos]), len); - pos += len; - return str; + case NodeType::CONST_PATH: { + std::string val = read_string(); + return std::make_shared(ConstPathNode(val, line)); } - - std::shared_ptr read_node() { - NodeType type = static_cast(read_u8()); - uint32_t line = read_u32(); - - switch (type) { - case NodeType::CONST_INT: { - int64_t val = static_cast(read_u64()); - return std::make_shared(ConstIntNode(val, line)); - } - case NodeType::CONST_STRING: { - std::string val = read_string(); - return std::make_shared(ConstStringNode(val, line)); - } - case NodeType::CONST_PATH: { - std::string val = read_string(); - return std::make_shared(ConstPathNode(val, line)); - } - case NodeType::CONST_BOOL: { - bool val = read_u8() != 0; - return std::make_shared(ConstBoolNode(val, line)); - } - case NodeType::CONST_NULL: - return std::make_shared(ConstNullNode(line)); - case NodeType::VAR: { - uint32_t index = read_u32(); - return std::make_shared(VarNode(index, "", line)); - } - case NodeType::LAMBDA: { - uint32_t arity = read_u32(); - auto body = read_node(); - return std::make_shared(LambdaNode(arity, body, line)); - } - case NodeType::APP: { - auto func = read_node(); - auto arg = read_node(); - return std::make_shared(AppNode(func, arg, line)); - } - case NodeType::BINARY_OP: { - BinaryOp op = static_cast(read_u8()); - auto left = read_node(); - auto right = read_node(); - return std::make_shared(BinaryOpNode(op, left, right, line)); - } - case NodeType::UNARY_OP: { - UnaryOp op = static_cast(read_u8()); - auto operand = read_node(); - return std::make_shared(UnaryOpNode(op, operand, line)); - } - case NodeType::ATTRSET: { - bool recursive = read_u8() != 0; - uint32_t num_attrs = read_u32(); - AttrsetNode attrs(recursive, line); - for (uint32_t i = 0; i < num_attrs; i++) { - std::string key = read_string(); - auto val = read_node(); - attrs.attrs.push_back({key, val}); - } - return std::make_shared(std::move(attrs)); - } - case NodeType::SELECT: { - auto expr = read_node(); - auto attr = read_node(); - uint8_t has_default = read_u8(); - std::optional> default_expr; - if (has_default) { - default_expr = read_node(); - } - SelectNode select_node(expr, attr, line); - select_node.default_expr = default_expr; - return std::make_shared(std::move(select_node)); - } - case NodeType::HAS_ATTR: { - auto expr = read_node(); - auto attr = read_node(); - return std::make_shared(HasAttrNode(expr, attr, line)); - } - case NodeType::WITH: { - auto attrs = read_node(); - auto body = read_node(); - return std::make_shared(WithNode(attrs, body, line)); - } - case NodeType::IF: { - auto cond = read_node(); - auto then_branch = read_node(); - auto else_branch = read_node(); - return std::make_shared(IfNode(cond, then_branch, else_branch, line)); - } - case NodeType::LET: { - uint32_t num_bindings = read_u32(); - std::vector>> bindings; - for (uint32_t i = 0; i < num_bindings; i++) { - std::string key = read_string(); - auto val = read_node(); - bindings.push_back({key, val}); - } - auto body = read_node(); - LetNode let(body, line); - let.bindings = std::move(bindings); - return std::make_shared(std::move(let)); - } - case NodeType::LETREC: { - uint32_t num_bindings = read_u32(); - std::vector>> bindings; - for (uint32_t i = 0; i < num_bindings; i++) { - std::string key = read_string(); - auto val = read_node(); - bindings.push_back({key, val}); - } - auto body = read_node(); - LetRecNode letrec(body, line); - letrec.bindings = std::move(bindings); - return std::make_shared(std::move(letrec)); - } - case NodeType::ASSERT: { - auto cond = read_node(); - auto body = read_node(); - return std::make_shared(AssertNode(cond, body, line)); - } - default: - throw std::runtime_error("Unknown node type in IR"); - } + case NodeType::CONST_BOOL: { + bool val = read_u8() != 0; + return std::make_shared(ConstBoolNode(val, line)); } + case NodeType::CONST_NULL: + return std::make_shared(ConstNullNode(line)); + case NodeType::VAR: { + uint32_t index = read_u32(); + return std::make_shared(VarNode(index, "", line)); + } + case NodeType::LAMBDA: { + uint32_t arity = read_u32(); + auto body = read_node(); + return std::make_shared(LambdaNode(arity, body, line)); + } + case NodeType::APP: { + auto func = read_node(); + auto arg = read_node(); + return std::make_shared(AppNode(func, arg, line)); + } + case NodeType::BINARY_OP: { + BinaryOp op = static_cast(read_u8()); + auto left = read_node(); + auto right = read_node(); + return std::make_shared(BinaryOpNode(op, left, right, line)); + } + case NodeType::UNARY_OP: { + UnaryOp op = static_cast(read_u8()); + auto operand = read_node(); + return std::make_shared(UnaryOpNode(op, operand, line)); + } + case NodeType::ATTRSET: { + bool recursive = read_u8() != 0; + uint32_t num_attrs = read_u32(); + AttrsetNode attrs(recursive, line); + for (uint32_t i = 0; i < num_attrs; i++) { + std::string key = read_string(); + auto val = read_node(); + attrs.attrs.push_back({key, val}); + } + return std::make_shared(std::move(attrs)); + } + case NodeType::SELECT: { + auto expr = read_node(); + auto attr = read_node(); + uint8_t has_default = read_u8(); + std::optional> default_expr; + if (has_default) { + default_expr = read_node(); + } + SelectNode select_node(expr, attr, line); + select_node.default_expr = default_expr; + return std::make_shared(std::move(select_node)); + } + case NodeType::HAS_ATTR: { + auto expr = read_node(); + auto attr = read_node(); + return std::make_shared(HasAttrNode(expr, attr, line)); + } + case NodeType::WITH: { + auto attrs = read_node(); + auto body = read_node(); + return std::make_shared(WithNode(attrs, body, line)); + } + case NodeType::IF: { + auto cond = read_node(); + auto then_branch = read_node(); + auto else_branch = read_node(); + return std::make_shared(IfNode(cond, then_branch, else_branch, line)); + } + case NodeType::LET: { + uint32_t num_bindings = read_u32(); + std::vector>> bindings; + for (uint32_t i = 0; i < num_bindings; i++) { + std::string key = read_string(); + auto val = read_node(); + bindings.push_back({key, val}); + } + auto body = read_node(); + LetNode let(body, line); + let.bindings = std::move(bindings); + return std::make_shared(std::move(let)); + } + case NodeType::LETREC: { + uint32_t num_bindings = read_u32(); + std::vector>> bindings; + for (uint32_t i = 0; i < num_bindings; i++) { + std::string key = read_string(); + auto val = read_node(); + bindings.push_back({key, val}); + } + auto body = read_node(); + LetRecNode letrec(body, line); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } + case NodeType::ASSERT: { + auto cond = read_node(); + auto body = read_node(); + return std::make_shared(AssertNode(cond, body, line)); + } + default: + throw std::runtime_error("Unknown node type in IR"); + } + } }; Deserializer::Deserializer() : pImpl(std::make_unique()) {} Deserializer::~Deserializer() = default; IRModule Deserializer::deserialize(const std::string& path) { - std::ifstream in(path, std::ios::binary | std::ios::ate); - size_t size = in.tellg(); - in.seekg(0); - pImpl->buffer.resize(size); - in.read(reinterpret_cast(pImpl->buffer.data()), size); - pImpl->pos = 0; - return deserialize(pImpl->buffer); + std::ifstream in(path, std::ios::binary | std::ios::ate); + size_t size = in.tellg(); + in.seekg(0); + pImpl->buffer.resize(size); + in.read(reinterpret_cast(pImpl->buffer.data()), size); + pImpl->pos = 0; + return deserialize(pImpl->buffer); } IRModule Deserializer::deserialize(const std::vector& data) { - pImpl->buffer = data; - pImpl->pos = 0; + pImpl->buffer = data; + pImpl->pos = 0; - IRModule module; + IRModule module; - uint32_t magic = pImpl->read_u32(); - if (magic != IR_MAGIC) { - throw std::runtime_error("Invalid IR file"); - } + uint32_t magic = pImpl->read_u32(); + if (magic != IR_MAGIC) { + throw std::runtime_error("Invalid IR file"); + } - uint32_t version = pImpl->read_u32(); - if (version != IR_VERSION) { - throw std::runtime_error("Unsupported IR version"); - } + uint32_t version = pImpl->read_u32(); + if (version != IR_VERSION) { + throw std::runtime_error("Unsupported IR version"); + } - uint32_t num_sources = pImpl->read_u32(); - for (uint32_t i = 0; i < num_sources; i++) { - SourceFile src; - src.path = pImpl->read_string(); - src.content = pImpl->read_string(); - module.sources.push_back(src); - } + uint32_t num_sources = pImpl->read_u32(); + for (uint32_t i = 0; i < num_sources; i++) { + SourceFile src; + src.path = pImpl->read_string(); + src.content = pImpl->read_string(); + module.sources.push_back(src); + } - uint32_t num_imports = pImpl->read_u32(); - for (uint32_t i = 0; i < num_imports; i++) { - module.imports.push_back({pImpl->read_string(), pImpl->read_string()}); - } + uint32_t num_imports = pImpl->read_u32(); + for (uint32_t i = 0; i < num_imports; i++) { + module.imports.push_back({pImpl->read_string(), pImpl->read_string()}); + } - uint32_t num_strings = pImpl->read_u32(); - for (uint32_t i = 0; i < num_strings; i++) { - std::string str = pImpl->read_string(); - uint32_t id = pImpl->read_u32(); - module.string_table[str] = id; - } + uint32_t num_strings = pImpl->read_u32(); + for (uint32_t i = 0; i < num_strings; i++) { + std::string str = pImpl->read_string(); + uint32_t id = pImpl->read_u32(); + module.string_table[str] = id; + } - if (pImpl->read_u8()) { - module.entry = pImpl->read_node(); - } + if (pImpl->read_u8()) { + module.entry = pImpl->read_node(); + } - return module; + return module; } -} +} // namespace nix_irc diff --git a/src/irc/serializer.h b/src/irc/serializer.h index 67e72b5..a6785ab 100644 --- a/src/irc/serializer.h +++ b/src/irc/serializer.h @@ -2,38 +2,38 @@ #define NIX_IRC_SERIALIZER_H #include "types.h" +#include #include #include -#include namespace nix_irc { class Serializer { public: - Serializer(); - ~Serializer(); - - void serialize(const IRModule& module, const std::string& path); - std::vector serialize_to_bytes(const IRModule& module); - + Serializer(); + ~Serializer(); + + void serialize(const IRModule& module, const std::string& path); + std::vector serialize_to_bytes(const IRModule& module); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; class Deserializer { public: - Deserializer(); - ~Deserializer(); - - IRModule deserialize(const std::string& path); - IRModule deserialize(const std::vector& data); - + Deserializer(); + ~Deserializer(); + + IRModule deserialize(const std::string& path); + IRModule deserialize(const std::vector& data); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/types.h b/src/irc/types.h index d10acf1..457950c 100644 --- a/src/irc/types.h +++ b/src/irc/types.h @@ -2,14 +2,14 @@ #define NIX_IRC_TYPES_H #include -#include -#include -#include -#include -#include -#include #include +#include +#include #include +#include +#include +#include +#include namespace nix_irc { @@ -17,219 +17,191 @@ constexpr uint32_t IR_MAGIC = 0x4E495258; constexpr uint32_t IR_VERSION = 2; enum class NodeType : uint8_t { - CONST_INT = 0x01, - CONST_STRING = 0x02, - CONST_PATH = 0x03, - CONST_BOOL = 0x04, - CONST_NULL = 0x05, - VAR = 0x10, - LAMBDA = 0x20, - APP = 0x21, - BINARY_OP = 0x22, - UNARY_OP = 0x23, - ATTRSET = 0x30, - SELECT = 0x31, - HAS_ATTR = 0x34, - WITH = 0x32, - IF = 0x40, - LET = 0x50, - LETREC = 0x51, - ASSERT = 0x52, - THUNK = 0x60, - FORCE = 0x61, - ERROR = 0xFF + CONST_INT = 0x01, + CONST_STRING = 0x02, + CONST_PATH = 0x03, + CONST_BOOL = 0x04, + CONST_NULL = 0x05, + VAR = 0x10, + LAMBDA = 0x20, + APP = 0x21, + BINARY_OP = 0x22, + UNARY_OP = 0x23, + ATTRSET = 0x30, + SELECT = 0x31, + HAS_ATTR = 0x34, + WITH = 0x32, + IF = 0x40, + LET = 0x50, + LETREC = 0x51, + ASSERT = 0x52, + THUNK = 0x60, + FORCE = 0x61, + ERROR = 0xFF }; -enum class BinaryOp : uint8_t { - ADD, SUB, MUL, DIV, CONCAT, - EQ, NE, LT, GT, LE, GE, - AND, OR, IMPL -}; +enum class BinaryOp : uint8_t { ADD, SUB, MUL, DIV, CONCAT, EQ, NE, LT, GT, LE, GE, AND, OR, IMPL }; -enum class UnaryOp : uint8_t { - NEG, NOT -}; +enum class UnaryOp : uint8_t { NEG, NOT }; // Forward declare Node for use in shared_ptr class Node; struct ConstIntNode { - int64_t value; - uint32_t line = 0; - ConstIntNode(int64_t v = 0, uint32_t l = 0) : value(v), line(l) {} + int64_t value; + uint32_t line = 0; + ConstIntNode(int64_t v = 0, uint32_t l = 0) : value(v), line(l) {} }; struct ConstStringNode { - std::string value; - uint32_t line = 0; - ConstStringNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} + std::string value; + uint32_t line = 0; + ConstStringNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} }; struct ConstPathNode { - std::string value; - uint32_t line = 0; - ConstPathNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} + std::string value; + uint32_t line = 0; + ConstPathNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} }; struct ConstBoolNode { - bool value; - uint32_t line = 0; - ConstBoolNode(bool v = false, uint32_t l = 0) : value(v), line(l) {} + bool value; + uint32_t line = 0; + ConstBoolNode(bool v = false, uint32_t l = 0) : value(v), line(l) {} }; struct ConstNullNode { - uint32_t line = 0; - ConstNullNode(uint32_t l = 0) : line(l) {} + uint32_t line = 0; + ConstNullNode(uint32_t l = 0) : line(l) {} }; struct VarNode { - uint32_t index = 0; - std::optional name; - uint32_t line = 0; - VarNode(uint32_t idx = 0, std::string n = "", uint32_t l = 0) - : index(idx), name(n.empty() ? std::nullopt : std::optional(n)), line(l) {} + uint32_t index = 0; + std::optional name; + uint32_t line = 0; + VarNode(uint32_t idx = 0, std::string n = "", uint32_t l = 0) + : index(idx), name(n.empty() ? std::nullopt : std::optional(n)), line(l) {} }; struct LambdaNode { - uint32_t arity = 1; - std::shared_ptr body; - std::optional param_name; - bool strict_pattern = true; - uint32_t line = 0; - LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l = 0); + uint32_t arity = 1; + std::shared_ptr body; + std::optional param_name; + bool strict_pattern = true; + uint32_t line = 0; + LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l = 0); }; struct AppNode { - std::shared_ptr func; - std::shared_ptr arg; - uint32_t line = 0; - AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l = 0); + std::shared_ptr func; + std::shared_ptr arg; + uint32_t line = 0; + AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l = 0); }; struct BinaryOpNode { - BinaryOp op; - std::shared_ptr left; - std::shared_ptr right; - uint32_t line = 0; - BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln = 0); + BinaryOp op; + std::shared_ptr left; + std::shared_ptr right; + uint32_t line = 0; + BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln = 0); }; struct UnaryOpNode { - UnaryOp op; - std::shared_ptr operand; - uint32_t line = 0; - UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l = 0); + UnaryOp op; + std::shared_ptr operand; + uint32_t line = 0; + UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l = 0); }; struct AttrsetNode { - std::vector>> attrs; - bool recursive = false; - uint32_t line = 0; - AttrsetNode(bool rec = false, uint32_t l = 0) : recursive(rec), line(l) {} + std::vector>> attrs; + bool recursive = false; + uint32_t line = 0; + AttrsetNode(bool rec = false, uint32_t l = 0) : recursive(rec), line(l) {} }; struct SelectNode { - std::shared_ptr expr; - std::shared_ptr attr; - std::optional> default_expr; - uint32_t line = 0; - SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); + std::shared_ptr expr; + std::shared_ptr attr; + std::optional> default_expr; + uint32_t line = 0; + SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); }; struct HasAttrNode { - std::shared_ptr expr; - std::shared_ptr attr; - uint32_t line = 0; - HasAttrNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); + std::shared_ptr expr; + std::shared_ptr attr; + uint32_t line = 0; + HasAttrNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); }; struct WithNode { - std::shared_ptr attrs; - std::shared_ptr body; - uint32_t line = 0; - WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l = 0); + std::shared_ptr attrs; + std::shared_ptr body; + uint32_t line = 0; + WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l = 0); }; struct IfNode { - std::shared_ptr cond; - std::shared_ptr then_branch; - std::shared_ptr else_branch; - uint32_t line = 0; - IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l = 0); + std::shared_ptr cond; + std::shared_ptr then_branch; + std::shared_ptr else_branch; + uint32_t line = 0; + IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l = 0); }; struct LetNode { - std::vector>> bindings; - std::shared_ptr body; - uint32_t line = 0; - LetNode(std::shared_ptr b, uint32_t l = 0); + std::vector>> bindings; + std::shared_ptr body; + uint32_t line = 0; + LetNode(std::shared_ptr b, uint32_t l = 0); }; struct LetRecNode { - std::vector>> bindings; - std::shared_ptr body; - uint32_t line = 0; - LetRecNode(std::shared_ptr b, uint32_t l = 0); + std::vector>> bindings; + std::shared_ptr body; + uint32_t line = 0; + LetRecNode(std::shared_ptr b, uint32_t l = 0); }; struct AssertNode { - std::shared_ptr cond; - std::shared_ptr body; - uint32_t line = 0; - AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l = 0); + std::shared_ptr cond; + std::shared_ptr body; + uint32_t line = 0; + AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l = 0); }; struct ThunkNode { - std::shared_ptr expr; - uint32_t line = 0; - ThunkNode(std::shared_ptr e, uint32_t l = 0); + std::shared_ptr expr; + uint32_t line = 0; + ThunkNode(std::shared_ptr e, uint32_t l = 0); }; struct ForceNode { - std::shared_ptr expr; - uint32_t line = 0; - ForceNode(std::shared_ptr e, uint32_t l = 0); + std::shared_ptr expr; + uint32_t line = 0; + ForceNode(std::shared_ptr e, uint32_t l = 0); }; // Node wraps a variant for type-safe AST class Node { public: - using Variant = std::variant< - ConstIntNode, - ConstStringNode, - ConstPathNode, - ConstBoolNode, - ConstNullNode, - VarNode, - LambdaNode, - AppNode, - BinaryOpNode, - UnaryOpNode, - AttrsetNode, - SelectNode, - HasAttrNode, - WithNode, - IfNode, - LetNode, - LetRecNode, - AssertNode, - ThunkNode, - ForceNode - >; + using Variant = std::variant; - Variant data; + Variant data; - template - Node(T&& value) : data(std::forward(value)) {} + template Node(T&& value) : data(std::forward(value)) {} - template - T* get_if() { return std::get_if(&data); } + template T* get_if() { return std::get_if(&data); } - template - const T* get_if() const { return std::get_if(&data); } + template const T* get_if() const { return std::get_if(&data); } - template - bool holds() const { return std::holds_alternative(data); } + template bool holds() const { return std::holds_alternative(data); } }; // Constructor implementations @@ -239,7 +211,8 @@ inline LambdaNode::LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l) inline AppNode::AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l) : func(f), arg(a), line(l) {} -inline BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln) +inline BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, + uint32_t ln) : op(o), left(l), right(r), line(ln) {} inline UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l) @@ -254,37 +227,34 @@ inline HasAttrNode::HasAttrNode(std::shared_ptr e, std::shared_ptr a inline WithNode::WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l) : attrs(a), body(b), line(l) {} -inline IfNode::IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l) +inline IfNode::IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, + uint32_t l) : cond(c), then_branch(t), else_branch(e), line(l) {} -inline LetNode::LetNode(std::shared_ptr b, uint32_t l) - : body(b), line(l) {} +inline LetNode::LetNode(std::shared_ptr b, uint32_t l) : body(b), line(l) {} -inline LetRecNode::LetRecNode(std::shared_ptr b, uint32_t l) - : body(b), line(l) {} +inline LetRecNode::LetRecNode(std::shared_ptr b, uint32_t l) : body(b), line(l) {} inline AssertNode::AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l) : cond(c), body(b), line(l) {} -inline ThunkNode::ThunkNode(std::shared_ptr e, uint32_t l) - : expr(e), line(l) {} +inline ThunkNode::ThunkNode(std::shared_ptr e, uint32_t l) : expr(e), line(l) {} -inline ForceNode::ForceNode(std::shared_ptr e, uint32_t l) - : expr(e), line(l) {} +inline ForceNode::ForceNode(std::shared_ptr e, uint32_t l) : expr(e), line(l) {} struct SourceFile { - std::string path; - std::string content; - std::shared_ptr ast; + std::string path; + std::string content; + std::shared_ptr ast; }; struct IRModule { - uint32_t version = IR_VERSION; - std::vector sources; - std::vector> imports; - std::shared_ptr entry; - std::unordered_map string_table; + uint32_t version = IR_VERSION; + std::vector sources; + std::vector> imports; + std::shared_ptr entry; + std::unordered_map string_table; }; -} +} // namespace nix_irc #endif diff --git a/src/plugin.cpp b/src/plugin.cpp index bad00fd..18a832c 100644 --- a/src/plugin.cpp +++ b/src/plugin.cpp @@ -5,20 +5,14 @@ #include "nix/expr/eval.hh" #include "nix/expr/primops.hh" #include "nix/expr/value.hh" -#include "nix/store/store-api.hh" -#include "nix/util/source-path.hh" +#include "irc/evaluator.h" #include "irc/ir_gen.h" #include "irc/parser.h" -#include "irc/resolver.h" #include "irc/serializer.h" #include "irc/types.h" -#include "irc/evaluator.h" -#include #include -#include -#include namespace nix_ir_plugin { @@ -29,11 +23,9 @@ using namespace nix_irc; * Load and evaluate a pre-compiled IR bundle * Usage: builtins.nixIR.loadIR "/path/to/file.nixir" */ -static void prim_loadIR(EvalState &state, const PosIdx pos, Value **args, - Value &v) { +static void prim_loadIR(EvalState& state, const PosIdx pos, Value** args, Value& v) { auto path = state.forceStringNoCtx( - *args[0], pos, - "while evaluating the first argument to builtins.nixIR.loadIR"); + *args[0], pos, "while evaluating the first argument to builtins.nixIR.loadIR"); std::string pathStr(path); @@ -42,25 +34,19 @@ static void prim_loadIR(EvalState &state, const PosIdx pos, Value **args, try { module = deserializer.deserialize(pathStr); - } catch (const std::exception &e) { - state.error("failed to deserialize IR bundle: %s", e.what()) - .atPos(pos) - .debugThrow(); + } catch (const std::exception& e) { + state.error("failed to deserialize IR bundle: %s", e.what()).atPos(pos).debugThrow(); } if (!module.entry) { - state.error("IR bundle has no entry point") - .atPos(pos) - .debugThrow(); + state.error("IR bundle has no entry point").atPos(pos).debugThrow(); } try { Evaluator evaluator(state); evaluator.eval_to_nix(module.entry, v); - } catch (const std::exception &e) { - state.error("failed to evaluate IR: %s", e.what()) - .atPos(pos) - .debugThrow(); + } catch (const std::exception& e) { + state.error("failed to evaluate IR: %s", e.what()).atPos(pos).debugThrow(); } } @@ -68,11 +54,9 @@ static void prim_loadIR(EvalState &state, const PosIdx pos, Value **args, * Compile Nix source to IR on-the-fly * Usage: builtins.nixIR.compile "{ x = 1; }" */ -static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, - Value &v) { +static void prim_compileNix(EvalState& state, const PosIdx pos, Value** args, Value& v) { auto source = state.forceStringNoCtx( - *args[0], pos, - "while evaluating the first argument to builtins.nixIR.compile"); + *args[0], pos, "while evaluating the first argument to builtins.nixIR.compile"); std::string sourceStr(source); @@ -81,9 +65,7 @@ static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, auto ast = parser.parse(sourceStr, ""); if (!ast) { - state.error("failed to parse Nix expression") - .atPos(pos) - .debugThrow(); + state.error("failed to parse Nix expression").atPos(pos).debugThrow(); } IRGenerator ir_gen; @@ -92,10 +74,8 @@ static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, Evaluator evaluator(state); evaluator.eval_to_nix(ir, v); - } catch (const std::exception &e) { - state.error("IR compilation failed: %s", e.what()) - .atPos(pos) - .debugThrow(); + } catch (const std::exception& e) { + state.error("IR compilation failed: %s", e.what()).atPos(pos).debugThrow(); } } @@ -103,19 +83,18 @@ static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, * Get information about the IR plugin * Usage: builtins.nixIR.info */ -static void prim_info(EvalState &state, const PosIdx pos, Value **args, - Value &v) { +static void prim_info(EvalState& state, const PosIdx pos, Value** args, Value& v) { auto bindings = state.buildBindings(3); - Value *vName = state.allocValue(); + Value* vName = state.allocValue(); vName->mkString("nix-ir-plugin"); bindings.insert(state.symbols.create("name"), vName); - Value *vVersion = state.allocValue(); + Value* vVersion = state.allocValue(); vVersion->mkString("0.1.0"); bindings.insert(state.symbols.create("version"), vVersion); - Value *vStatus = state.allocValue(); + Value* vStatus = state.allocValue(); vStatus->mkString("runtime-active"); bindings.insert(state.symbols.create("status"), vStatus);