commit 618a58b2b8c7bd07ad5b21ce23fb6e8236dda4ff Author: NotAShelf Date: Fri Feb 20 22:01:32 2026 +0300 initial commit Signed-off-by: NotAShelf Change-Id: I1ad48ade1bc8234b7d7c9fe3d976a5be6a6a6964 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..0fdccae --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,74 @@ +cmake_minimum_required(VERSION 3.19) +project(nix-ir-plugin) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(PkgConfig REQUIRED) + +pkg_check_modules(NIX_STORE REQUIRED IMPORTED_TARGET nix-store) +pkg_check_modules(NIX_EXPR REQUIRED IMPORTED_TARGET nix-expr) +pkg_check_modules(NIX_CMD REQUIRED IMPORTED_TARGET nix-cmd) +pkg_check_modules(NIX_UTIL REQUIRED IMPORTED_TARGET nix-util) +pkg_check_modules(NIX_FETCHERS REQUIRED IMPORTED_TARGET nix-fetchers) +pkg_check_modules(NIX_MAIN REQUIRED IMPORTED_TARGET nix-main) + +# nix-irc (External IR Compiler) +add_executable(nix-irc + src/irc/main.cpp + src/irc/parser.cpp + src/irc/resolver.cpp + src/irc/ir_gen.cpp + src/irc/serializer.cpp +) + +target_include_directories(nix-irc PRIVATE + ${NIX_STORE_INCLUDE_DIRS} + ${NIX_EXPR_INCLUDE_DIRS} + ${NIX_UTIL_INCLUDE_DIRS} +) + +target_link_libraries(nix-irc PRIVATE + ${NIX_STORE_LINK_LIBRARIES} + ${NIX_EXPR_LINK_LIBRARIES} + ${NIX_UTIL_LINK_LIBRARIES} +) + +# Nix IR Plugin +add_library(nix-ir-plugin MODULE + src/plugin.cpp + src/irc/parser.cpp + src/irc/resolver.cpp + src/irc/ir_gen.cpp + src/irc/serializer.cpp +) + +# Include directories from pkg-config +target_include_directories(nix-ir-plugin PRIVATE + ${CMAKE_SOURCE_DIR}/src + ${NIX_STORE_INCLUDE_DIRS} + ${NIX_EXPR_INCLUDE_DIRS} + ${NIX_CMD_INCLUDE_DIRS} + ${NIX_UTIL_INCLUDE_DIRS} + ${NIX_FETCHERS_INCLUDE_DIRS} + ${NIX_MAIN_INCLUDE_DIRS} +) + +# Link libraries +target_link_libraries(nix-ir-plugin PRIVATE + ${NIX_STORE_LINK_LIBRARIES} + ${NIX_EXPR_LINK_LIBRARIES} + ${NIX_CMD_LINK_LIBRARIES} + ${NIX_UTIL_LINK_LIBRARIES} + ${NIX_FETCHERS_LINK_LIBRARIES} + ${NIX_MAIN_LINK_LIBRARIES} +) + +# Set output name +set_target_properties(nix-ir-plugin PROPERTIES + PREFIX "" + OUTPUT_NAME "nix-ir-plugin" +) + +# Install to plugin directory +install(TARGETS nix-ir-plugin LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/nix/plugins") diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..42ea3b1 --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1771369470, + "narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0182a361324364ae3f436a63005877674cf45efb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..7619ca1 --- /dev/null +++ b/flake.nix @@ -0,0 +1,32 @@ +{ + inputs.nixpkgs.url = "github:NixOS/nixpkgs?ref=nixos-unstable"; + + outputs = {nixpkgs, ...}: let + systems = ["x86_64-linux" "aarch64-linux"]; + forAllSystems = nixpkgs.lib.genAttrs systems; + in { + devShells = forAllSystems (system: { + default = nixpkgs.legacyPackages.${system}.mkShell { + buildInputs = with nixpkgs.legacyPackages.${system}; [ + cmake + pkg-config + ninja + boost + bear + + nixVersions.nixComponents_2_32.nix-store + nixVersions.nixComponents_2_32.nix-expr + nixVersions.nixComponents_2_32.nix-cmd + nixVersions.nixComponents_2_32.nix-fetchers + nixVersions.nixComponents_2_32.nix-main + nixVersions.nixComponents_2_32.nix-util + nixVersions.nix_2_32 + ]; + + env = { + NIX_PLUGINABI = "0.2"; + }; + }; + }); + }; +} diff --git a/src/irc/ir_gen.cpp b/src/irc/ir_gen.cpp new file mode 100644 index 0000000..d149c1a --- /dev/null +++ b/src/irc/ir_gen.cpp @@ -0,0 +1,214 @@ +#include "ir_gen.h" +#include +#include +#include + +namespace nix_irc { + +struct NameResolver::Impl { + std::vector> scopes; + std::vector> scope_names; + + Impl() { + scopes.push_back({}); + scope_names.push_back({}); + } +}; + +NameResolver::NameResolver() : pImpl(std::make_unique()) {} +NameResolver::~NameResolver() = default; + +void NameResolver::enter_scope() { + pImpl->scopes.push_back({}); + pImpl->scope_names.push_back({}); +} + +void NameResolver::exit_scope() { + if (!pImpl->scopes.empty()) { + pImpl->scopes.pop_back(); + pImpl->scope_names.pop_back(); + } +} + +void NameResolver::bind(const std::string& name) { + if (pImpl->scopes.empty()) return; + uint32_t idx = pImpl->scope_names.back().size(); + pImpl->scopes.back()[name] = idx; + pImpl->scope_names.back().push_back(name); +} + +uint32_t NameResolver::resolve(const std::string& name) { + for (int i = (int)pImpl->scopes.size() - 1; i >= 0; --i) { + auto it = pImpl->scopes[i].find(name); + if (it != pImpl->scopes[i].end()) { + uint32_t depth = pImpl->scopes.size() - 1 - i; + uint32_t offset = it->second; + return depth << 16 | offset; + } + } + return 0xFFFFFFFF; +} + +bool NameResolver::is_bound(const std::string& name) const { + for (auto it = pImpl->scopes.rbegin(); it != pImpl->scopes.rend(); ++it) { + if (it->count(name)) return true; + } + return false; +} + +struct IRGenerator::Impl { + std::unordered_map string_table; + uint32_t next_string_id = 0; + NameResolver name_resolver; + + Impl() {} + + uint32_t add_string(const std::string& str) { + auto it = string_table.find(str); + if (it != string_table.end()) { + return it->second; + } + uint32_t id = next_string_id++; + string_table[str] = id; + return id; + } + + std::shared_ptr convert(const std::shared_ptr& node_ptr) { + if (!node_ptr) return std::make_shared(ConstNullNode{}); + + const Node& node = *node_ptr; + + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + uint32_t idx = name_resolver.resolve(n->name.value_or("")); + VarNode converted(idx); + converted.name = n->name; + converted.line = n->line; + return std::make_shared(converted); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + if (n->param_name) { + name_resolver.bind(*n->param_name); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LambdaNode lambda(n->arity, body, n->line); + lambda.param_name = n->param_name; + return std::make_shared(lambda); + } + if (auto* n = node.get_if()) { + auto func = convert(n->func); + auto arg = convert(n->arg); + return std::make_shared(AppNode(func, arg, n->line)); + } + if (auto* n = node.get_if()) { + AttrsetNode attrs(n->recursive, n->line); + name_resolver.enter_scope(); + for (const auto& [key, val] : n->attrs) { + name_resolver.bind(key); + } + for (const auto& [key, val] : n->attrs) { + attrs.attrs.push_back({key, convert(val)}); + } + name_resolver.exit_scope(); + return std::make_shared(attrs); + } + if (auto* n = node.get_if()) { + auto expr = convert(n->expr); + auto attr = convert(n->attr); + SelectNode select(expr, attr, n->line); + if (n->default_expr) { + select.default_expr = convert(*n->default_expr); + } + return std::make_shared(select); + } + if (auto* n = node.get_if()) { + auto attrs = convert(n->attrs); + auto body = convert(n->body); + return std::make_shared(WithNode(attrs, body, n->line)); + } + if (auto* n = node.get_if()) { + auto cond = convert(n->cond); + auto then_b = convert(n->then_branch); + auto else_b = convert(n->else_branch); + return std::make_shared(IfNode(cond, then_b, else_b, n->line)); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + for (const auto& [key, val] : n->bindings) { + name_resolver.bind(key); + } + std::vector>> new_bindings; + for (const auto& [key, val] : n->bindings) { + new_bindings.push_back({key, convert(val)}); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LetNode let(body, n->line); + let.bindings = std::move(new_bindings); + return std::make_shared(let); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + for (const auto& [key, val] : n->bindings) { + name_resolver.bind(key); + } + std::vector>> new_bindings; + for (const auto& [key, val] : n->bindings) { + new_bindings.push_back({key, convert(val)}); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LetRecNode letrec(body, n->line); + letrec.bindings = std::move(new_bindings); + return std::make_shared(letrec); + } + if (auto* n = node.get_if()) { + auto cond = convert(n->cond); + auto body = convert(n->body); + return std::make_shared(AssertNode(cond, body, n->line)); + } + if (auto* n = node.get_if()) { + auto left = convert(n->left); + auto right = convert(n->right); + return std::make_shared(BinaryOpNode(n->op, left, right, n->line)); + } + if (auto* n = node.get_if()) { + auto operand = convert(n->operand); + return std::make_shared(UnaryOpNode(n->op, operand, n->line)); + } + return std::make_shared(ConstNullNode{}); + } +}; + +IRGenerator::IRGenerator() : pImpl(std::make_unique()) {} +IRGenerator::~IRGenerator() = default; + +void IRGenerator::set_string_table(const std::unordered_map& table) { + pImpl->string_table = table; +} + +uint32_t IRGenerator::add_string(const std::string& str) { + return pImpl->add_string(str); +} + +std::shared_ptr IRGenerator::generate(const std::shared_ptr& ast) { + return pImpl->convert(ast); +} + +} diff --git a/src/irc/ir_gen.h b/src/irc/ir_gen.h new file mode 100644 index 0000000..de082a6 --- /dev/null +++ b/src/irc/ir_gen.h @@ -0,0 +1,45 @@ +#ifndef NIX_IRC_IR_GEN_H +#define NIX_IRC_IR_GEN_H + +#include "types.h" +#include +#include +#include +#include + +namespace nix_irc { + +class IRGenerator { +public: + IRGenerator(); + ~IRGenerator(); + + void set_string_table(const std::unordered_map& table); + uint32_t add_string(const std::string& str); + + std::shared_ptr generate(const std::shared_ptr& ast); + +private: + struct Impl; + std::unique_ptr pImpl; +}; + +class NameResolver { +public: + NameResolver(); + ~NameResolver(); + + void enter_scope(); + void exit_scope(); + void bind(const std::string& name); + uint32_t resolve(const std::string& name); + bool is_bound(const std::string& name) const; + +private: + struct Impl; + std::unique_ptr pImpl; +}; + +} + +#endif \ No newline at end of file diff --git a/src/irc/main.cpp b/src/irc/main.cpp new file mode 100644 index 0000000..c5c28bf --- /dev/null +++ b/src/irc/main.cpp @@ -0,0 +1,150 @@ +#include +#include "parser.h" +#include "resolver.h" +#include "ir_gen.h" +#include "serializer.h" +#include +#include +#include + +namespace nix_irc { + +void print_usage(const char* prog) { + std::cout << "Usage: " << prog << " [options] [output.nixir]\n" + << "\nOptions:\n" + << " -I Add search path for imports\n" + << " --no-imports Disable import resolution\n" + << " --help Show this help\n"; +} + +int run_compile(int argc, char** argv) { + std::string input_file; + std::string output_file; + std::vector search_paths; + bool resolve_imports = true; + + int i = 1; + while (i < argc) { + std::string arg = argv[i]; + if (arg == "-I") { + if (i + 1 >= argc) { + std::cerr << "Error: -I requires a path argument\n"; + return 1; + } + search_paths.push_back(argv[++i]); + } else if (arg == "--no-imports") { + resolve_imports = false; + } else if (arg == "--help" || arg == "-h") { + print_usage(argv[0]); + return 0; + } else if (arg[0] != '-') { + input_file = arg; + if (i + 1 < argc && argv[i + 1][0] != '-') { + output_file = argv[++i]; + } + } else { + std::cerr << "Unknown option: " << arg << "\n"; + print_usage(argv[0]); + return 1; + } + i++; + } + + if (input_file.empty()) { + std::cerr << "Error: No input file specified\n"; + print_usage(argv[0]); + return 1; + } + + if (output_file.empty()) { + output_file = input_file + "r"; + } + + try { + Parser parser; + Resolver resolver; + + for (const auto& path : search_paths) { + resolver.add_search_path(path); + } + + std::cout << "Parsing: " << input_file << "\n"; + auto ast = parser.parse_file(input_file); + + if (!ast) { + std::cerr << "Error: Failed to parse input\n"; + return 1; + } + + std::cout << "Resolving imports...\n"; + + IRGenerator ir_gen; + + std::cout << "Generating IR...\n"; + auto ir = ir_gen.generate(ast); + + IRModule module; + module.version = IR_VERSION; + module.entry = ir; + + std::cout << "Serializing to: " << output_file << "\n"; + Serializer serializer; + serializer.serialize(module, output_file); + + std::cout << "Done!\n"; + return 0; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } +} + +void print_decompile_usage(const char* prog) { + std::cout << "Usage: " << prog << " decompile \n"; +} + +int run_decompile(int argc, char** argv) { + if (argc < 3) { + print_decompile_usage(argv[0]); + return 1; + } + + std::string input_file = argv[2]; + + try { + Deserializer deserializer; + auto module = deserializer.deserialize(input_file); + + std::cout << "IR Version: " << module.version << "\n"; + std::cout << "Sources: " << module.sources.size() << "\n"; + std::cout << "Imports: " << module.imports.size() << "\n"; + + return 0; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } +} + +} + +int main(int argc, char** argv) { + if (argc < 2) { + nix_irc::print_usage(argv[0]); + return 1; + } + + std::string cmd = argv[1]; + + if (cmd == "compile" || cmd == "c") { + return nix_irc::run_compile(argc - 1, argv + 1); + } else if (cmd == "decompile" || cmd == "d") { + return nix_irc::run_decompile(argc, argv); + } else if (cmd == "help" || cmd == "--help" || cmd == "-h") { + nix_irc::print_usage(argv[0]); + return 0; + } else { + return nix_irc::run_compile(argc, argv); + } +} diff --git a/src/irc/parser.cpp b/src/irc/parser.cpp new file mode 100644 index 0000000..77f325a --- /dev/null +++ b/src/irc/parser.cpp @@ -0,0 +1,621 @@ +#include "parser.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nix_irc { + +static std::string trim(const std::string& s) { + size_t start = s.find_first_not_of(" \t\n\r"); + if (start == std::string::npos) return ""; + size_t end = s.find_last_not_of(" \t\n\r"); + return s.substr(start, end - start + 1); +} + +static std::string read_file(const std::string& path) { + FILE* f = fopen(path.c_str(), "r"); + if (!f) { + throw std::runtime_error("Cannot open file: " + path); + } + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + std::string content(size, '\0'); + fread(content.data(), 1, size, f); + fclose(f); + return content; +} + +static std::pair run_command(const std::string& cmd) { + std::array buffer; + std::string result; + std::string error; + + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) throw std::runtime_error("popen failed"); + + while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { + result += buffer.data(); + } + + int status = pclose(pipe); + if (status != 0) { + throw std::runtime_error("Command failed: " + cmd); + } + return {result, error}; +} + +struct Token { + enum Type { + LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET, + IDENT, STRING, PATH, INT, BOOL, + LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH, + DOT, SEMICOLON, COLON, AT, COMMA, + // Operators + PLUS, MINUS, STAR, SLASH, CONCAT, + EQEQ, NE, LT, GT, LE, GE, + AND, OR, IMPL, NOT, + EOF_ + } type; + std::string value; + size_t line; + size_t col; +}; + +class Lexer { +public: + Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {} + + std::vector tokenize() { + #define TOKEN(t) Token{Token::t, "", line, col} + + while (pos < input.size()) { + skip_whitespace(); + if (pos >= input.size()) break; + + char c = input[pos]; + + if (c == '(') { emit(TOKEN(LPAREN)); } + else if (c == ')') { emit(TOKEN(RPAREN)); } + else if (c == '{') { emit(TOKEN(LBRACE)); } + else if (c == '}') { emit(TOKEN(RBRACE)); } + else if (c == '[') { emit(TOKEN(LBRACKET)); } + else if (c == ']') { emit(TOKEN(RBRACKET)); } + else if (c == ';') { emit(TOKEN(SEMICOLON)); } + else if (c == ':') { emit(TOKEN(COLON)); } + else if (c == '@') { emit(TOKEN(AT)); } + else if (c == ',') { emit(TOKEN(COMMA)); } + else if (c == '"') { tokenize_string(); } + // Two-char operators + else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(EQEQ)); + pos += 2; col += 2; + } + else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(NE)); + pos += 2; col += 2; + } + else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(LE)); + pos += 2; col += 2; + } + else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(GE)); + pos += 2; col += 2; + } + else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') { + tokens.push_back(TOKEN(CONCAT)); + pos += 2; col += 2; + } + else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') { + tokens.push_back(TOKEN(AND)); + pos += 2; col += 2; + } + else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') { + tokens.push_back(TOKEN(OR)); + pos += 2; col += 2; + } + else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') { + tokens.push_back(TOKEN(IMPL)); + pos += 2; col += 2; + } + // Single-char operators + else if (c == '+') { emit(TOKEN(PLUS)); } + else if (c == '*') { emit(TOKEN(STAR)); } + else if (c == '/') { + // Check if it's a path or division + if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) { + tokenize_path(); + } else { + emit(TOKEN(SLASH)); + } + } + else if (c == '<') { emit(TOKEN(LT)); } + else if (c == '>') { emit(TOKEN(GT)); } + else if (c == '!') { emit(TOKEN(NOT)); } + else if (c == '.') { emit(TOKEN(DOT)); } + else if (c == '-') { + // Check if it's a negative number or minus operator + if (pos + 1 < input.size() && isdigit(input[pos + 1])) { + tokenize_int(); + } else { + emit(TOKEN(MINUS)); + } + } + else if (isdigit(c)) { tokenize_int(); } + else if (isalpha(c) || c == '_') { tokenize_ident(); } + else { pos++; col++; } + } + tokens.push_back({Token::EOF_, "", line, col}); + + #undef TOKEN + return tokens; + } + +private: + std::vector tokens; + const std::string& input; + size_t pos; + size_t line; + size_t col; + + void emit(Token t) { + tokens.push_back(t); + pos++; + col++; + } + + void skip_whitespace() { + while (pos < input.size()) { + char c = input[pos]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + if (c == '\n') { line++; col = 1; } + else { col++; } + pos++; + } else if (c == '#') { + while (pos < input.size() && input[pos] != '\n') pos++; + } else { + break; + } + } + } + + void tokenize_string() { + pos++; + std::string s; + while (pos < input.size() && input[pos] != '"') { + if (input[pos] == '\\' && pos + 1 < input.size()) { + pos++; + switch (input[pos]) { + case 'n': s += '\n'; break; + case 't': s += '\t'; break; + case 'r': s += '\r'; break; + case '"': s += '"'; break; + case '\\': s += '\\'; break; + default: s += input[pos]; break; + } + } else { + s += input[pos]; + } + pos++; + } + pos++; + tokens.push_back({Token::STRING, s, line, col}); + col += s.size() + 2; + } + + void tokenize_path() { + size_t start = pos; + while (pos < input.size() && !isspace(input[pos]) && + input[pos] != '(' && input[pos] != ')' && + input[pos] != '{' && input[pos] != '}' && + input[pos] != '[' && input[pos] != ']') { + pos++; + } + std::string path = input.substr(start, pos - start); + tokens.push_back({Token::PATH, path, line, col}); + col += path.size(); + } + + void tokenize_int() { + size_t start = pos; + if (input[pos] == '-') pos++; + while (pos < input.size() && isdigit(input[pos])) pos++; + std::string num = input.substr(start, pos - start); + tokens.push_back({Token::INT, num, line, col}); + col += num.size(); + } + + void tokenize_ident() { + size_t start = pos; + while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++; + std::string ident = input.substr(start, pos - start); + + Token::Type type = Token::IDENT; + if (ident == "let") type = Token::LET; + else if (ident == "in") type = Token::IN; + else if (ident == "rec") type = Token::REC; + else if (ident == "if") type = Token::IF; + else if (ident == "then") type = Token::THEN; + else if (ident == "else") type = Token::ELSE; + else if (ident == "assert") type = Token::ASSERT; + else if (ident == "with") type = Token::WITH; + else if (ident == "true") type = Token::BOOL; + else if (ident == "false") type = Token::BOOL; + + tokens.push_back({type, ident, line, col}); + col += ident.size(); + } +}; + +class Parser::Impl { +public: + std::vector tokens; + size_t pos = 0; + std::string current_file; + + const Token& current() { + if (pos < tokens.size()) return tokens[pos]; + static Token eof{Token::EOF_, "", 0, 0}; + return eof; + } + + void advance() { pos++; } + + bool consume(Token::Type type) { + if (current().type == type) { + advance(); + return true; + } + return false; + } + + bool expect(Token::Type type) { + if (current().type != type) { + std::cerr << "Expected token " << type << " but got " << current().type + << " at " << current().line << ":" << current().col << "\n"; + return false; + } + advance(); + return true; + } + + // Get operator precedence (higher = tighter binding) + int get_precedence(Token::Type type) { + switch (type) { + case Token::OR: return 1; + case Token::AND: return 2; + case Token::IMPL: return 3; + case Token::EQEQ: case Token::NE: return 4; + case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5; + case Token::CONCAT: return 6; + case Token::PLUS: case Token::MINUS: return 7; + case Token::STAR: case Token::SLASH: return 8; + default: return 0; + } + } + + // Convert token type to binary operator + BinaryOp token_to_binop(Token::Type type) { + switch (type) { + case Token::PLUS: return BinaryOp::ADD; + case Token::MINUS: return BinaryOp::SUB; + case Token::STAR: return BinaryOp::MUL; + case Token::SLASH: return BinaryOp::DIV; + case Token::CONCAT: return BinaryOp::CONCAT; + case Token::EQEQ: return BinaryOp::EQ; + case Token::NE: return BinaryOp::NE; + case Token::LT: return BinaryOp::LT; + case Token::GT: return BinaryOp::GT; + case Token::LE: return BinaryOp::LE; + case Token::GE: return BinaryOp::GE; + case Token::AND: return BinaryOp::AND; + case Token::OR: return BinaryOp::OR; + case Token::IMPL: return BinaryOp::IMPL; + default: throw std::runtime_error("Invalid binary operator"); + } + } + + std::shared_ptr parse_expr() { + if (consume(Token::IF)) { + auto cond = parse_expr(); + expect(Token::THEN); + auto then = parse_expr(); + expect(Token::ELSE); + auto else_ = parse_expr(); + return std::make_shared(IfNode(cond, then, else_)); + } + if (consume(Token::LET)) { + bool is_rec = consume(Token::REC); + std::vector>> bindings; + parse_bindings(bindings); + expect(Token::IN); + auto body = parse_expr(); + + if (is_rec) { + auto letrec = LetRecNode(body); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } else { + auto let = LetNode(body); + let.bindings = std::move(bindings); + return std::make_shared(std::move(let)); + } + } + if (consume(Token::ASSERT)) { + auto cond = parse_expr(); + expect(Token::SEMICOLON); + auto body = parse_expr(); + return std::make_shared(AssertNode(cond, body)); + } + if (consume(Token::WITH)) { + auto attrs = parse_expr(); + expect(Token::SEMICOLON); + auto body = parse_expr(); + return std::make_shared(WithNode(attrs, body)); + } + + return parse_expr1(); + } + + std::shared_ptr parse_expr1() { + return parse_binary_op(0); + } + + // Precedence climbing for binary operators + std::shared_ptr parse_binary_op(int min_prec) { + auto left = parse_selection(); + + while (true) { + int prec = get_precedence(current().type); + if (prec == 0 || prec < min_prec) break; + + Token op_token = current(); + advance(); + + auto right = parse_binary_op(prec + 1); + left = std::make_shared(BinaryOpNode( + token_to_binop(op_token.type), + left, + right + )); + } + + return left; + } + + std::shared_ptr parse_selection() { + auto left = parse_expr2(); + + while (current().type == Token::DOT) { + advance(); + Token name = current(); + if (name.type == Token::IDENT) { + advance(); + auto attr = std::make_shared(ConstStringNode(name.value)); + auto result = std::make_shared(SelectNode(left, attr)); + + if (consume(Token::DOT)) { + Token name2 = current(); + if (name2.type == Token::IDENT) { + advance(); + auto attr2 = std::make_shared(ConstStringNode(name2.value)); + auto* curr = result->get_if(); + while (curr && consume(Token::DOT)) { + Token n = current(); + expect(Token::IDENT); + auto a = std::make_shared(ConstStringNode(n.value)); + curr->attr = std::make_shared(AppNode( + std::make_shared(AppNode(curr->attr, a)), + std::make_shared(ConstNullNode()) + )); + } + } + } + return result; + } else if (consume(Token::LBRACE)) { + auto result = std::make_shared(SelectNode(left, std::make_shared(ConstStringNode(name.value)))); + parse_expr_attrs(result); + expect(Token::RBRACE); + return result; + } + return left; + } + + return left; + } + + void parse_expr_attrs(std::shared_ptr&) { + // Extended selection syntax + } + + std::shared_ptr parse_expr2() { + std::shared_ptr left = parse_expr3(); + + while (true) { + if (current().type == Token::LBRACKET) { + advance(); + auto arg = parse_expr(); + expect(Token::RBRACKET); + left = std::make_shared(AppNode(left, arg)); + } else if (current().type == Token::STRING) { + Token s = current(); + advance(); + auto arg = std::make_shared(ConstStringNode(s.value)); + left = std::make_shared(AppNode(left, arg)); + } else { + break; + } + } + + return left; + } + + std::shared_ptr parse_expr3() { + // Handle unary operators + if (consume(Token::MINUS)) { + auto operand = parse_expr3(); + return std::make_shared(UnaryOpNode(UnaryOp::NEG, operand)); + } + + if (consume(Token::NOT)) { + auto operand = parse_expr3(); + return std::make_shared(UnaryOpNode(UnaryOp::NOT, operand)); + } + + if (consume(Token::LPAREN)) { + auto expr = parse_expr(); + expect(Token::RPAREN); + return expr; + } + + if (consume(Token::LBRACE)) { + return parse_attrs(); + } + + if (consume(Token::LBRACKET)) { + return parse_list(); + } + + Token t = current(); + + if (t.type == Token::IDENT) { + advance(); + return std::make_shared(VarNode(0, t.value)); + } + + if (t.type == Token::INT) { + advance(); + return std::make_shared(ConstIntNode(std::stoll(t.value))); + } + + if (t.type == Token::STRING) { + advance(); + return std::make_shared(ConstStringNode(t.value)); + } + + if (t.type == Token::PATH) { + advance(); + return std::make_shared(ConstPathNode(t.value)); + } + + if (t.type == Token::BOOL) { + advance(); + return std::make_shared(ConstBoolNode(t.value == "true")); + } + + std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n"; + advance(); + return std::make_shared(ConstNullNode()); + } + + std::shared_ptr parse_attrs() { + auto attrs = AttrsetNode(false); + + while (current().type != Token::RBRACE && current().type != Token::EOF_) { + if (consume(Token::REC)) { + attrs.recursive = true; + continue; + } + + if (current().type == Token::IDENT || current().type == Token::STRING) { + Token key = current(); + advance(); + std::string key_str = key.value; + + if (consume(Token::COLON)) { + auto value = parse_expr(); + attrs.attrs.push_back({key_str, value}); + } else if (consume(Token::AT)) { + auto pattern = parse_expr(); + auto value = parse_expr(); + attrs.attrs.push_back({key_str, value}); + } + } + + if (consume(Token::COMMA)) continue; + if (consume(Token::SEMICOLON)) continue; + } + + expect(Token::RBRACE); + return std::make_shared(std::move(attrs)); + } + + std::shared_ptr parse_list() { + std::shared_ptr list = std::make_shared(ConstNullNode()); + + if (consume(Token::RBRACKET)) { + return list; + } + + std::vector> elements; + while (current().type != Token::RBRACKET) { + elements.push_back(parse_expr()); + if (!consume(Token::COMMA)) break; + } + expect(Token::RBRACKET); + + for (auto it = elements.rbegin(); it != elements.rend(); ++it) { + list = std::make_shared(AppNode( + std::make_shared(AppNode( + std::make_shared(VarNode(0, "__list")), + *it + )), + list + )); + } + + return list; + } + + void parse_bindings(std::vector>>& bindings) { + while (current().type == Token::IDENT || current().type == Token::LBRACE) { + if (current().type == Token::LBRACE) { + auto inherit = parse_expr(); + (void)inherit; + continue; + } + + Token key = current(); + expect(Token::IDENT); + + if (consume(Token::AT)) { + auto pattern = parse_expr(); + auto value = parse_expr(); + bindings.push_back({key.value, value}); + } else { + expect(Token::COLON); + auto value = parse_expr(); + bindings.push_back({key.value, value}); + } + + if (!consume(Token::SEMICOLON)) break; + } + } +}; + +Parser::Parser() : pImpl(std::make_unique()) {} +Parser::~Parser() = default; + +std::shared_ptr Parser::parse(const std::string& source, const std::string& path) { + pImpl->current_file = path; + + Lexer lexer(source); + pImpl->tokens = lexer.tokenize(); + pImpl->pos = 0; + + return pImpl->parse_expr(); +} + +std::shared_ptr Parser::parse_file(const std::string& path) { + std::string content = read_file(path); + return parse(content, path); +} + +} diff --git a/src/irc/parser.h b/src/irc/parser.h new file mode 100644 index 0000000..7bb97c8 --- /dev/null +++ b/src/irc/parser.h @@ -0,0 +1,25 @@ +#ifndef NIX_IRC_PARSER_H +#define NIX_IRC_PARSER_H + +#include "types.h" +#include +#include + +namespace nix_irc { + +class Parser { +public: + Parser(); + ~Parser(); + + std::shared_ptr parse(const std::string& source, const std::string& path = ""); + std::shared_ptr parse_file(const std::string& path); + +private: + struct Impl; + std::unique_ptr pImpl; +}; + +} + +#endif \ No newline at end of file diff --git a/src/irc/resolver.cpp b/src/irc/resolver.cpp new file mode 100644 index 0000000..f57cf64 --- /dev/null +++ b/src/irc/resolver.cpp @@ -0,0 +1,111 @@ +#include "resolver.h" +#include "parser.h" +#include +#include +#include +#include +#include + +namespace nix_irc { + +namespace fs = std::filesystem; + +struct Resolver::Impl { + ResolverConfig config; + std::vector> resolved_imports; + std::unordered_set visited; + Parser parser; + + Impl(const ResolverConfig& cfg) : config(cfg) {} + + std::string resolve_path(const std::string& path, const std::string& from_file) { + fs::path p(path); + + if (p.is_absolute()) { + if (fs::exists(p)) return path; + return ""; + } + + fs::path from_dir = fs::path(from_file).parent_path(); + fs::path candidate = from_dir / p; + if (fs::exists(candidate)) return candidate.string(); + + for (const auto& search : config.search_paths) { + candidate = fs::path(search) / p; + if (fs::exists(candidate)) return candidate.string(); + } + + return ""; + } + + ImportResult do_resolve(const std::string& path, const std::string& from_file) { + std::string resolved = resolve_path(path, from_file); + + if (resolved.empty()) { + return {false, "", "Cannot find file: " + path, nullptr}; + } + + if (visited.count(resolved)) { + return {true, resolved, "", nullptr}; + } + visited.insert(resolved); + + try { + auto ast = parser.parse_file(resolved); + return {true, resolved, "", ast}; + } catch (const std::exception& e) { + return {false, "", e.what(), nullptr}; + } + } +}; + +Resolver::Resolver(const ResolverConfig& config) : pImpl(std::make_unique(config)) {} +Resolver::~Resolver() = default; + +void Resolver::add_search_path(const std::string& path) { + pImpl->config.search_paths.push_back(path); +} + +void Resolver::set_search_paths(const std::vector& paths) { + pImpl->config.search_paths = paths; +} + +ImportResult Resolver::resolve_import(const std::string& path, const std::string& from_file) { + auto result = pImpl->do_resolve(path, from_file); + if (result.success && result.ast) { + pImpl->resolved_imports.push_back({path, result.path}); + } + return result; +} + +ImportResult Resolver::resolve_import(const Node& import_node, const std::string& from_file) { + const ConstPathNode* path_node = import_node.get_if(); + if (!path_node) { + return {false, "", "Dynamic import not supported", nullptr}; + } + return resolve_import(path_node->value, from_file); +} + +std::vector Resolver::get_resolved_files() const { + std::vector files; + for (const auto& [orig, resolved] : pImpl->resolved_imports) { + (void)orig; + files.push_back(resolved); + } + return files; +} + +std::vector> Resolver::get_imports() const { + return pImpl->resolved_imports; +} + +bool is_static_import(const Node& node) { + return node.holds(); +} + +std::string normalize_path(const std::string& path) { + fs::path p(path); + return fs::absolute(p).string(); +} + +} diff --git a/src/irc/resolver.h b/src/irc/resolver.h new file mode 100644 index 0000000..39f8d40 --- /dev/null +++ b/src/irc/resolver.h @@ -0,0 +1,48 @@ +#ifndef NIX_IRC_RESOLVER_H +#define NIX_IRC_RESOLVER_H + +#include "types.h" +#include +#include +#include +#include + +namespace nix_irc { + +struct ImportResult { + bool success; + std::string path; + std::string error; + std::shared_ptr ast; +}; + +struct ResolverConfig { + std::vector search_paths; + bool resolve_imports = true; +}; + +class Resolver { +public: + Resolver(const ResolverConfig& config = {}); + ~Resolver(); + + void add_search_path(const std::string& path); + void set_search_paths(const std::vector& paths); + + ImportResult resolve_import(const std::string& path, const std::string& from_file); + ImportResult resolve_import(const Node& import_node, const std::string& from_file); + + std::vector get_resolved_files() const; + std::vector> get_imports() const; + +private: + struct Impl; + std::unique_ptr pImpl; +}; + +bool is_static_import(const Node& node); +std::string normalize_path(const std::string& path); + +} + +#endif \ No newline at end of file diff --git a/src/irc/serializer.cpp b/src/irc/serializer.cpp new file mode 100644 index 0000000..cc6cdc8 --- /dev/null +++ b/src/irc/serializer.cpp @@ -0,0 +1,265 @@ +#include "serializer.h" +#include +#include +#include + +namespace nix_irc { + +struct Serializer::Impl { + std::vector buffer; + + void write_u32(uint32_t val) { + buffer.push_back((val >> 0) & 0xFF); + buffer.push_back((val >> 8) & 0xFF); + buffer.push_back((val >> 16) & 0xFF); + buffer.push_back((val >> 24) & 0xFF); + } + + void write_u64(uint64_t val) { + for (int i = 0; i < 8; i++) { + buffer.push_back((val >> (i * 8)) & 0xFF); + } + } + + void write_u8(uint8_t val) { + buffer.push_back(val); + } + + void write_string(const std::string& str) { + write_u32(str.size()); + buffer.insert(buffer.end(), str.begin(), str.end()); + } + + NodeType get_node_type(const Node& node) { + if (node.holds()) return NodeType::CONST_INT; + if (node.holds()) return NodeType::CONST_STRING; + if (node.holds()) return NodeType::CONST_PATH; + if (node.holds()) return NodeType::CONST_BOOL; + if (node.holds()) return NodeType::CONST_NULL; + if (node.holds()) return NodeType::VAR; + if (node.holds()) return NodeType::LAMBDA; + if (node.holds()) return NodeType::APP; + if (node.holds()) return NodeType::BINARY_OP; + if (node.holds()) return NodeType::UNARY_OP; + if (node.holds()) return NodeType::ATTRSET; + if (node.holds()) return NodeType::SELECT; + if (node.holds()) return NodeType::WITH; + if (node.holds()) return NodeType::IF; + if (node.holds()) return NodeType::LET; + if (node.holds()) return NodeType::LETREC; + if (node.holds()) return NodeType::ASSERT; + return NodeType::ERROR; + } + + uint32_t get_node_line(const Node& node) { + return std::visit([](const auto& n) { return n.line; }, node.data); + } + + void write_node(const Node& node) { + write_u8(static_cast(get_node_type(node))); + write_u32(get_node_line(node)); + + if (auto* n = node.get_if()) { + write_u64(static_cast(n->value)); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_u8(n->value ? 1 : 0); + } else if (auto* n = node.get_if()) { + // No data for null + } else if (auto* n = node.get_if()) { + write_u32(n->index); + } else if (auto* n = node.get_if()) { + write_u32(n->arity); + if (n->body) write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->func) write_node(*n->func); + if (n->arg) write_node(*n->arg); + } else if (auto* n = node.get_if()) { + write_u8(static_cast(n->op)); + if (n->left) write_node(*n->left); + if (n->right) write_node(*n->right); + } else if (auto* n = node.get_if()) { + write_u8(static_cast(n->op)); + if (n->operand) write_node(*n->operand); + } else if (auto* n = node.get_if()) { + write_u8(n->recursive ? 1 : 0); + write_u32(n->attrs.size()); + for (const auto& [key, val] : n->attrs) { + write_string(key); + if (val) write_node(*val); + } + } else if (auto* n = node.get_if()) { + if (n->expr) write_node(*n->expr); + if (n->attr) write_node(*n->attr); + if (n->default_expr && *n->default_expr) { + write_u8(1); + write_node(**n->default_expr); + } else { + write_u8(0); + } + } else if (auto* n = node.get_if()) { + if (n->attrs) write_node(*n->attrs); + if (n->body) write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->cond) write_node(*n->cond); + if (n->then_branch) write_node(*n->then_branch); + if (n->else_branch) write_node(*n->else_branch); + } else if (auto* n = node.get_if()) { + write_u32(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + write_string(key); + if (val) write_node(*val); + } + if (n->body) write_node(*n->body); + } else if (auto* n = node.get_if()) { + write_u32(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + write_string(key); + if (val) write_node(*val); + } + if (n->body) write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->cond) write_node(*n->cond); + if (n->body) write_node(*n->body); + } + } +}; + +Serializer::Serializer() : pImpl(std::make_unique()) {} +Serializer::~Serializer() = default; + +void Serializer::serialize(const IRModule& module, const std::string& path) { + auto bytes = serialize_to_bytes(module); + std::ofstream out(path, std::ios::binary); + out.write(reinterpret_cast(bytes.data()), bytes.size()); +} + +std::vector Serializer::serialize_to_bytes(const IRModule& module) { + pImpl->buffer.clear(); + + pImpl->write_u32(IR_MAGIC); + pImpl->write_u32(IR_VERSION); + + pImpl->write_u32(module.sources.size()); + for (const auto& src : module.sources) { + pImpl->write_string(src.path); + pImpl->write_string(src.content); + } + + pImpl->write_u32(module.imports.size()); + for (const auto& [from, to] : module.imports) { + pImpl->write_string(from); + pImpl->write_string(to); + } + + pImpl->write_u32(module.string_table.size()); + for (const auto& [str, id] : module.string_table) { + pImpl->write_string(str); + pImpl->write_u32(id); + } + + if (module.entry && module.entry != nullptr) { + pImpl->write_u8(1); + pImpl->write_node(*module.entry); + } else { + pImpl->write_u8(0); + } + + return pImpl->buffer; +} + +struct Deserializer::Impl { + std::vector buffer; + size_t pos = 0; + + uint32_t read_u32() { + uint32_t val = 0; + val |= buffer[pos + 0]; + val |= (uint32_t)buffer[pos + 1] << 8; + val |= (uint32_t)buffer[pos + 2] << 16; + val |= (uint32_t)buffer[pos + 3] << 24; + pos += 4; + return val; + } + + uint64_t read_u64() { + uint64_t val = 0; + for (int i = 0; i < 8; i++) { + val |= (uint64_t)buffer[pos + i] << (i * 8); + } + pos += 8; + return val; + } + + uint8_t read_u8() { + return buffer[pos++]; + } + + std::string read_string() { + uint32_t len = read_u32(); + std::string str(reinterpret_cast(&buffer[pos]), len); + pos += len; + return str; + } +}; + +Deserializer::Deserializer() : pImpl(std::make_unique()) {} +Deserializer::~Deserializer() = default; + +IRModule Deserializer::deserialize(const std::string& path) { + std::ifstream in(path, std::ios::binary | std::ios::ate); + size_t size = in.tellg(); + in.seekg(0); + pImpl->buffer.resize(size); + in.read(reinterpret_cast(pImpl->buffer.data()), size); + pImpl->pos = 0; + return deserialize(pImpl->buffer); +} + +IRModule Deserializer::deserialize(const std::vector& data) { + pImpl->buffer = data; + pImpl->pos = 0; + + IRModule module; + + uint32_t magic = pImpl->read_u32(); + if (magic != IR_MAGIC) { + throw std::runtime_error("Invalid IR file"); + } + + uint32_t version = pImpl->read_u32(); + if (version != IR_VERSION) { + throw std::runtime_error("Unsupported IR version"); + } + + uint32_t num_sources = pImpl->read_u32(); + for (uint32_t i = 0; i < num_sources; i++) { + SourceFile src; + src.path = pImpl->read_string(); + src.content = pImpl->read_string(); + module.sources.push_back(src); + } + + uint32_t num_imports = pImpl->read_u32(); + for (uint32_t i = 0; i < num_imports; i++) { + module.imports.push_back({pImpl->read_string(), pImpl->read_string()}); + } + + uint32_t num_strings = pImpl->read_u32(); + for (uint32_t i = 0; i < num_strings; i++) { + std::string str = pImpl->read_string(); + uint32_t id = pImpl->read_u32(); + module.string_table[str] = id; + } + + if (pImpl->read_u8()) { + // TODO: deserialize AST + } + + return module; +} + +} diff --git a/src/irc/serializer.h b/src/irc/serializer.h new file mode 100644 index 0000000..67e72b5 --- /dev/null +++ b/src/irc/serializer.h @@ -0,0 +1,39 @@ +#ifndef NIX_IRC_SERIALIZER_H +#define NIX_IRC_SERIALIZER_H + +#include "types.h" +#include +#include +#include + +namespace nix_irc { + +class Serializer { +public: + Serializer(); + ~Serializer(); + + void serialize(const IRModule& module, const std::string& path); + std::vector serialize_to_bytes(const IRModule& module); + +private: + struct Impl; + std::unique_ptr pImpl; +}; + +class Deserializer { +public: + Deserializer(); + ~Deserializer(); + + IRModule deserialize(const std::string& path); + IRModule deserialize(const std::vector& data); + +private: + struct Impl; + std::unique_ptr pImpl; +}; + +} + +#endif \ No newline at end of file diff --git a/src/irc/types.h b/src/irc/types.h new file mode 100644 index 0000000..68aedbe --- /dev/null +++ b/src/irc/types.h @@ -0,0 +1,277 @@ +#ifndef NIX_IRC_TYPES_H +#define NIX_IRC_TYPES_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nix_irc { + +constexpr uint32_t IR_MAGIC = 0x4E495258; +constexpr uint32_t IR_VERSION = 1; + +enum class NodeType : uint8_t { + CONST_INT = 0x01, + CONST_STRING = 0x02, + CONST_PATH = 0x03, + CONST_BOOL = 0x04, + CONST_NULL = 0x05, + VAR = 0x10, + LAMBDA = 0x20, + APP = 0x21, + BINARY_OP = 0x22, + UNARY_OP = 0x23, + ATTRSET = 0x30, + SELECT = 0x31, + WITH = 0x32, + IF = 0x40, + LET = 0x50, + LETREC = 0x51, + ASSERT = 0x52, + THUNK = 0x60, + FORCE = 0x61, + ERROR = 0xFF +}; + +enum class BinaryOp : uint8_t { + ADD, SUB, MUL, DIV, CONCAT, + EQ, NE, LT, GT, LE, GE, + AND, OR, IMPL +}; + +enum class UnaryOp : uint8_t { + NEG, NOT +}; + +// Forward declare Node for use in shared_ptr +class Node; + +struct ConstIntNode { + int64_t value; + uint32_t line = 0; + ConstIntNode(int64_t v = 0, uint32_t l = 0) : value(v), line(l) {} +}; + +struct ConstStringNode { + std::string value; + uint32_t line = 0; + ConstStringNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} +}; + +struct ConstPathNode { + std::string value; + uint32_t line = 0; + ConstPathNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} +}; + +struct ConstBoolNode { + bool value; + uint32_t line = 0; + ConstBoolNode(bool v = false, uint32_t l = 0) : value(v), line(l) {} +}; + +struct ConstNullNode { + uint32_t line = 0; + ConstNullNode(uint32_t l = 0) : line(l) {} +}; + +struct VarNode { + uint32_t index = 0; + std::optional name; + uint32_t line = 0; + VarNode(uint32_t idx = 0, std::string n = "", uint32_t l = 0) + : index(idx), name(n.empty() ? std::nullopt : std::optional(n)), line(l) {} +}; + +struct LambdaNode { + uint32_t arity = 1; + std::shared_ptr body; + std::optional param_name; + uint32_t line = 0; + LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l = 0); +}; + +struct AppNode { + std::shared_ptr func; + std::shared_ptr arg; + uint32_t line = 0; + AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l = 0); +}; + +struct BinaryOpNode { + BinaryOp op; + std::shared_ptr left; + std::shared_ptr right; + uint32_t line = 0; + BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln = 0); +}; + +struct UnaryOpNode { + UnaryOp op; + std::shared_ptr operand; + uint32_t line = 0; + UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l = 0); +}; + +struct AttrsetNode { + std::vector>> attrs; + bool recursive = false; + uint32_t line = 0; + AttrsetNode(bool rec = false, uint32_t l = 0) : recursive(rec), line(l) {} +}; + +struct SelectNode { + std::shared_ptr expr; + std::shared_ptr attr; + std::optional> default_expr; + uint32_t line = 0; + SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); +}; + +struct WithNode { + std::shared_ptr attrs; + std::shared_ptr body; + uint32_t line = 0; + WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l = 0); +}; + +struct IfNode { + std::shared_ptr cond; + std::shared_ptr then_branch; + std::shared_ptr else_branch; + uint32_t line = 0; + IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l = 0); +}; + +struct LetNode { + std::vector>> bindings; + std::shared_ptr body; + uint32_t line = 0; + LetNode(std::shared_ptr b, uint32_t l = 0); +}; + +struct LetRecNode { + std::vector>> bindings; + std::shared_ptr body; + uint32_t line = 0; + LetRecNode(std::shared_ptr b, uint32_t l = 0); +}; + +struct AssertNode { + std::shared_ptr cond; + std::shared_ptr body; + uint32_t line = 0; + AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l = 0); +}; + +struct ThunkNode { + std::shared_ptr expr; + uint32_t line = 0; + ThunkNode(std::shared_ptr e, uint32_t l = 0); +}; + +struct ForceNode { + std::shared_ptr expr; + uint32_t line = 0; + ForceNode(std::shared_ptr e, uint32_t l = 0); +}; + +// Node wraps a variant for type-safe AST +class Node { +public: + using Variant = std::variant< + ConstIntNode, + ConstStringNode, + ConstPathNode, + ConstBoolNode, + ConstNullNode, + VarNode, + LambdaNode, + AppNode, + BinaryOpNode, + UnaryOpNode, + AttrsetNode, + SelectNode, + WithNode, + IfNode, + LetNode, + LetRecNode, + AssertNode, + ThunkNode, + ForceNode + >; + + Variant data; + + template + Node(T&& value) : data(std::forward(value)) {} + + template + T* get_if() { return std::get_if(&data); } + + template + const T* get_if() const { return std::get_if(&data); } + + template + bool holds() const { return std::holds_alternative(data); } +}; + +// Constructor implementations +inline LambdaNode::LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l) + : arity(a), body(b), line(l) {} + +inline AppNode::AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l) + : func(f), arg(a), line(l) {} + +inline BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln) + : op(o), left(l), right(r), line(ln) {} + +inline UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l) + : op(o), operand(operand), line(l) {} + +inline SelectNode::SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l) + : expr(e), attr(a), line(l) {} + +inline WithNode::WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l) + : attrs(a), body(b), line(l) {} + +inline IfNode::IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l) + : cond(c), then_branch(t), else_branch(e), line(l) {} + +inline LetNode::LetNode(std::shared_ptr b, uint32_t l) + : body(b), line(l) {} + +inline LetRecNode::LetRecNode(std::shared_ptr b, uint32_t l) + : body(b), line(l) {} + +inline AssertNode::AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l) + : cond(c), body(b), line(l) {} + +inline ThunkNode::ThunkNode(std::shared_ptr e, uint32_t l) + : expr(e), line(l) {} + +inline ForceNode::ForceNode(std::shared_ptr e, uint32_t l) + : expr(e), line(l) {} + +struct SourceFile { + std::string path; + std::string content; + std::shared_ptr ast; +}; + +struct IRModule { + uint32_t version = IR_VERSION; + std::vector sources; + std::vector> imports; + std::shared_ptr entry; + std::unordered_map string_table; +}; + +} +#endif diff --git a/src/plugin.cpp b/src/plugin.cpp new file mode 100644 index 0000000..e00e035 --- /dev/null +++ b/src/plugin.cpp @@ -0,0 +1,191 @@ +#ifdef unix +#undef unix +#endif + +#include "nix/expr/eval.hh" +#include "nix/expr/primops.hh" +#include "nix/expr/value.hh" +#include "nix/store/store-api.hh" +#include "nix/util/source-path.hh" + +#include "irc/ir_gen.h" +#include "irc/parser.h" +#include "irc/resolver.h" +#include "irc/serializer.h" +#include "irc/types.h" + +#include +#include +#include +#include + +namespace nix_ir_plugin { + +using namespace nix; +using namespace nix_irc; + +/** + * Load and evaluate a pre-compiled IR bundle + * Usage: builtins.nixIR.loadIR "/path/to/file.nixir" + */ +static void prim_loadIR(EvalState &state, const PosIdx pos, Value **args, + Value &v) { + auto path = state.forceStringNoCtx( + *args[0], pos, + "while evaluating the first argument to builtins.nixIR.loadIR"); + + std::string pathStr(path); + + // Try to load the IR bundle + std::ifstream file(pathStr, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + state.error("cannot open IR bundle: %s", pathStr) + .atPos(pos) + .debugThrow(); + } + + // Read magic number to verify it's an IR file + file.seekg(0); + uint32_t magic = 0; + file.read(reinterpret_cast(&magic), sizeof(magic)); + + if (magic != IR_MAGIC) { + state + .error("not a valid IR bundle: %s (bad magic number)", + pathStr) + .atPos(pos) + .debugThrow(); + } + + // For now, just return a marker that this is an IR file + // FIXME: complete full VM implementation + auto bindings = state.buildBindings(2); + + Value *vType = state.allocValue(); + vType->mkString("ir-bundle"); + bindings.insert(state.symbols.create("type"), vType); + + Value *vPath = state.allocValue(); + vPath->mkString(pathStr); + bindings.insert(state.symbols.create("path"), vPath); + + v.mkAttrs(bindings.finish()); +} + +/** + * Compile Nix source to IR on-the-fly + * Usage: builtins.nixIR.compile "{ x = 1; }" + */ +static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, + Value &v) { + auto source = state.forceStringNoCtx( + *args[0], pos, + "while evaluating the first argument to builtins.nixIR.compile"); + + std::string sourceStr(source); + + try { + // Parse the Nix source + Parser parser; + auto ast = parser.parse(sourceStr, ""); + + if (!ast) { + state.error("failed to parse Nix expression") + .atPos(pos) + .debugThrow(); + } + + // Generate IR + IRGenerator ir_gen; + auto ir = ir_gen.generate(ast); + + // For now, return a marker that compilation succeeded + // FIXME: full evaluation + auto bindings = state.buildBindings(2); + + Value *vType = state.allocValue(); + vType->mkString("ir-compiled"); + bindings.insert(state.symbols.create("type"), vType); + + Value *vSource = state.allocValue(); + vSource->mkString(sourceStr.substr(0, 50)); + bindings.insert(state.symbols.create("source"), vSource); + + v.mkAttrs(bindings.finish()); + + } catch (const std::exception &e) { + state.error("IR compilation failed: %s", e.what()) + .atPos(pos) + .debugThrow(); + } +} + +/** + * Get information about the IR plugin + * Usage: builtins.nixIR.info + */ +static void prim_info(EvalState &state, const PosIdx pos, Value **args, + Value &v) { + auto bindings = state.buildBindings(4); + + Value *vName = state.allocValue(); + vName->mkString("nix-ir-plugin"); + bindings.insert(state.symbols.create("name"), vName); + + Value *vVersion = state.allocValue(); + vVersion->mkString("0.1.0-alpha"); + bindings.insert(state.symbols.create("version"), vVersion); + + Value *vPhase = state.allocValue(); + vPhase->mkString("phase-4"); + bindings.insert(state.symbols.create("phase"), vPhase); + + Value *vStatus = state.allocValue(); + vStatus->mkString("compiler-complete"); + bindings.insert(state.symbols.create("status"), vStatus); + + v.mkAttrs(bindings.finish()); +} + +// Register primops on plugin load +static RegisterPrimOp rp_loadIR({ + .name = "__nixIR_loadIR", + .args = {"path"}, + .arity = 1, + .doc = R"( + Load a pre-compiled IR bundle from the given path. + Returns an attrset with information about the loaded bundle. + + )", + .fun = prim_loadIR, +}); + +static RegisterPrimOp rp_compileNix({ + .name = "__nixIR_compile", + .args = {"source"}, + .arity = 1, + .doc = R"( + Compile Nix source code to IR on-the-fly. + Returns an attrset with information about the compiled IR. + + )", + .fun = prim_compileNix, +}); + +static RegisterPrimOp rp_info({ + .name = "__nixIR_info", + .args = {}, + .arity = 0, + .doc = R"( + Get information about the IR plugin. + Returns an attrset with plugin name, version, and status. + )", + .fun = prim_info, +}); + +} // namespace nix_ir_plugin + +// Plugin initialization message +__attribute__((constructor)) static void init_plugin() { + std::cerr << "nix-ir-plugin loaded" << std::endl; +}