diff --git a/CMakeLists.txt b/CMakeLists.txt index edb503c..1516e03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,11 @@ pkg_check_modules(NIX_MAIN REQUIRED IMPORTED_TARGET nix-main) add_executable(nix-irc src/irc/main.cpp src/irc/parser.cpp + src/irc/lexer.cpp src/irc/resolver.cpp src/irc/ir_gen.cpp src/irc/serializer.cpp + src/irc/types.cpp ) target_include_directories(nix-irc PRIVATE @@ -38,10 +40,12 @@ target_link_libraries(nix-irc PRIVATE add_library(nix-ir-plugin MODULE src/plugin.cpp src/irc/parser.cpp + src/irc/lexer.cpp src/irc/resolver.cpp src/irc/ir_gen.cpp src/irc/serializer.cpp src/irc/evaluator.cpp + src/irc/types.cpp ) # Include directories from pkg-config @@ -65,6 +69,10 @@ target_link_libraries(nix-ir-plugin PRIVATE ${NIX_MAIN_LINK_LIBRARIES} ) +# Set output directories to build/ +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + # Set output name set_target_properties(nix-ir-plugin PROPERTIES PREFIX "" @@ -78,6 +86,9 @@ install(TARGETS nix-ir-plugin LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/n add_executable(regression_test tests/regression_test.cpp src/irc/serializer.cpp + src/irc/parser.cpp + src/irc/lexer.cpp + src/irc/types.cpp ) target_include_directories(regression_test PRIVATE @@ -92,3 +103,7 @@ target_link_libraries(regression_test PRIVATE ${NIX_EXPR_LINK_LIBRARIES} ${NIX_UTIL_LINK_LIBRARIES} ) + +# CTest integration +enable_testing() +add_test(NAME regression_test COMMAND regression_test) diff --git a/README.md b/README.md index 1048d9c..9ff280b 100644 --- a/README.md +++ b/README.md @@ -169,27 +169,44 @@ Entry: ### Building ```bash -# Configure -$ cmake -B build +# Using just (recommended) +$ just build -# Build -$ make +# Or manually with CMake +$ cmake -B build -G Ninja +$ cmake --build build -# The nix-irc executable will be in the project root -$ ./nix-irc --help +# The nix-irc executable will be in build/ +$ ./build/nix-irc --help ``` +### Available Tasks + +Run `just` to see all available tasks: + +- `just build` - Build all targets +- `just test` - Run all tests (unit, compile, integration) +- `just bench` - Run performance benchmarks +- `just clean` - Clean build artifacts +- `just smoke` - Run quick smoke test +- `just stats` - Show project statistics + +See `just --list` for the complete list of available commands. + ### Compiling Nix to IR ```bash # Basic compilation -$ nix-irc input.nix output.nixir +$ ./build/nix-irc input.nix output.nixir # With import search paths -$ nix-irc -I ./lib -I /nix/store/... input.nix output.nixir +$ ./build/nix-irc -I ./lib -I /nix/store/... input.nix output.nixir # Disable import resolution -$ nix-irc --no-imports input.nix output.nixir +$ ./build/nix-irc --no-imports input.nix output.nixir + +# Using just +$ just compile input.nix output.nixir ``` ### Runtime Evaluation (Plugin) @@ -212,13 +229,21 @@ $ nix --plugin-files ./nix-ir-plugin.so eval --expr 'builtins.nixIR_info' ### Running Tests ```bash -# Test all sample files -for f in tests/*.nix; do - ./nix-irc "$f" "${f%.nix}.nixir" +# Run all tests +$ just test + +# Run specific test suites +$ just test-unit # Unit tests only +$ just test-compile # Compilation tests only +$ just test-integration # Integration tests only + +# Manually test all fixtures +$ for f in tests/fixtures/*.nix; do + ./build/nix-irc "$f" "${f%.nix}.nixir" done # Verify IR format -$ hexdump -C tests/simple.nixir | head -3 +$ hexdump -C tests/fixtures/simple.nixir | head -3 ``` ## Contributing diff --git a/docs/SPEC.md b/docs/SPEC.md new file mode 100644 index 0000000..f948679 --- /dev/null +++ b/docs/SPEC.md @@ -0,0 +1,265 @@ +# Nixir Technical Specification + +This is a distillation of my personal notes on my "research" within the Nix +codebase and the subsequent design notes on Nixir. While some of those, +naturally, belong in the README I have elected to compile a list of noteworthy +details into a "specification document" for those possibly interested, for some +reason, in integrating with Nixir. + +Beware, here be observations. + +## What This Project Is + +Nixir is, most simply (and elegantly) put, a Nix compiler _and runtime_ packaged +as a plugin. The compiler component compiles a subset of Nix source to a custom +binary intermediate representation (IR) and then executes IR inside a virtual +machine running within the plugin process. Hence it's called Nix-ir. + +As you might've caught on from the README already, the project consists of two +artifacts: a standalone compiler tool called `nix-irc` that transforms `.nix` +files into `.nixir` bundles, and a plugin library (`nix-ir-plugin.so`) that Nix +loads to provide three primops for interacting with compiled IR. + +The architecture handles the full compilation pipeline. Static imports are +resolved at compile time and inlined into the output bundle, while the compiled +VM handles all evaluation at runtime. This mirrors how Nixpkgs itself +distinguishes between stable library code and application-specific expressions. + +The plugin does not intercept evaluation automatically. Instead, it exposes +primops that users invoke explicitly. This design exists because Nix's plugin +API does not provide hooks into the core evaluation loop. Unfortunate, but 'tis +life. + +## Why Compile Nix + +Every invocation of `nix eval` or `nix build` must parse, type-check, and +evaluate expressions from scratch. For large codebases, this overhead is +measurable. + +Nix does provide a persistent evaluation cache, stored in SQLite. However, this +cache only applies to flake-based workflows. Direct imports like +`import ./foo.nix` do not benefit from the cache and re-parse on each +invocation. + +For example, a NixOS configuration using direct imports to `nixpkgs.lib` +re-parses source files on every rebuild. The compiler front-end accounts for +substantial wall-clock time before evaluation begins. + +Precompiled IR eliminates, or rather, attempts to eliminate this cost. A +`.nixir` bundle contains serialized AST nodes with all variable names converted +to numeric indices. Loading skips parsing entirely and begins directly with the +VM executing pre-processed code. + +The project _also_ serves as an implementation study. I say also, but it is +actually the main goal of this project. Reimplementing Nix's evaluation +semantics reveals details that the upstream C++ code obscures. The thunk +mechanism, environment model, and cycle detection become tangible when you can +read and step through the implementation. I don't expect to get a better +understanding of the Nix language, but I now have more reasons to badmouth it. + +## The IR Format + +The binary format uses 36-byte fixed header followed by variable-length +sections. All multi-byte integers use little-endian byte order. + +The header layout: + +```plaintext +0x00-0x03: Magic identifier, value 0x4E495258 +0x04-0x07: Version number, currently 2 +0x08-0x0B: Flags field, reserved +0x0C-0x0F: Offset to string table +0x10-0x13: Offset to primop table +0x14-0x17: Offset to IR blob +0x18-0x1B: String count +0x1C-0x1F: Primop count +0x20-0x23: Reserved +``` + +The magic value `0x4E495258` corresponds to the bytes N I R X when read in +big-endian order. + +The string table follows the header. Each entry encodes length as a varint, then +that many UTF-8 bytes. All attribute names, identifiers, and string literals in +the source are de-duplicated at compile time and stored here. References +throughout the IR use indices into this table rather than inline strings. + +The primop table defines built-in operations. Each entry contains the string +table index for the operation name, its arity, and optional flags. This table +enables the VM to dispatch operations by index without string comparison. + +The IR blob contains the actual program. Each node begins with a type byte +followed by type-specific payload. + +Node type enumeration from the source: + +```plaintext +0x01: CONST_INT - Signed 64-bit integer +0x02: CONST_STRING - String table index +0x03: CONST_PATH - String table index +0x04: CONST_BOOL - 0x00 or 0x01 +0x05: CONST_NULL - No payload +0x06: CONST_FLOAT - IEEE 754 double +0x07: CONST_URI - String table index +0x08: CONST_LOOKUP_PATH - String table index for +0x10: VAR - Two varints: depth and index +0x20: LAMBDA - Arity and body offset +0x21: APP - Function and argument offsets +0x22: BINARY_OP - Operation enum and operands +0x23: UNARY_OP - Operation enum and operand +0x24: IMPORT - String table index for file path +0x30: ATTRSET - Count and recursive flag +0x31: SELECT - Expression, attribute, optional default +0x32: WITH - Attribute set and body offsets +0x33: LIST - Count and element offsets +0x34: HAS_ATTR - Expression and attribute +0x40: IF - Condition, then, and else offsets +0x50: LET - Binding count and body offset +0x51: LETREC - Binding count and body offset +0x52: ASSERT - Condition and body offsets +0x60: THUNK - Expression offset +0x61: FORCE - Expression offset +0xFF: ERROR - Error marker +``` + +Binary operations supported: + +```plaintext +ADD, SUB, MUL, DIV - Arithmetic on integers +CONCAT - List concatenation (++) +EQ, NE - Equality comparison +LT, GT, LE, GE - Ordering comparison +AND, OR, IMPL - Boolean logic +MERGE - Attribute set override (//) +``` + +## Variable Representation + +The compiler converts variable names to De Bruijn indices during IR generation. +Rather than storing strings like "x" in the output, each variable reference +encodes two numbers: the lexical depth and the position within that scope. + +The depth indicates how many lambda boundaries enclose the reference. A variable +in the outermost scope has depth zero. A variable referenced from inside one +lambda that refers to the outer scope has depth one. + +The index indicates the position in that scope's environment array. The first +bound variable in a scope has index zero, the second has index one, and so +forth. + +During evaluation, the VM combines these two numbers into a single 32-bit value +where the high 16 bits encode depth and the low 16 bits encode index. Lookup +traverses the environment chain depth times, then indexes into the resulting +scope's binding array. This achieves O(1) variable resolution. + +## The Virtual Machine + +The VM implements lazy evaluation using an explicit thunk mechanism. Every +unevaluated expression and function argument wraps in a Thunk structure +containing the expression AST node and a pointer to the captured environment. + +When the VM needs a value, it calls `force()` on the thunk. The force operation +checks whether the thunk is already being evaluated. If evaluation attempts to +force a thunk that is currently evaluating, the VM detects the cycle and raises +"infinite recursion encountered". This matches Nix's behavior for recursive +definitions. + +The environment structure is an array-based chain. Each scope holds a pointer to +its parent scope and a vector of bound values. Looking up a variable traverses +parent pointers until reaching the scope at the correct depth, then indexes into +that scope's value array. This replaces string comparison with pointer traversal +and array indexing. + +Function application follows currying. When applying a function to an argument, +the VM checks whether the function's arity is satisfied. If yes, it extends the +environment with the new binding and evaluates the body. If not, it returns a +partial application awaiting additional arguments. + +The evaluator handles binary operations with type-specific dispatch. Addition +supports integers, strings, and paths with appropriate type coercion rules. +Comparison operators work on integers and strings. The merge operator combines +two attribute sets with right-side precedence. + +## Plugin Primops + +The plugin registers three primops through Nix's `RegisterPrimOp` interface: + +`__nixIR_loadIR` accepts a file path string, deserializes the `.nixir` bundle, +evaluates the entry expression, and returns the resulting value. The VM measures +deserialization time and evaluation time separately, printing timing data to +stderr. + +`__nixIR_compile` accepts a string containing Nix source code, parses it +in-memory, generates IR, and evaluates the result. This enables runtime +compilation without external tooling. + +`__nixIR_info` returns an attribute set containing the plugin name +"nix-ir-plugin", version "0.1.0", and status "runtime-active". This is a +development-only primop that will be removed eventually. + +The primops use the double-underscore prefix internally. Users access them +through `builtins.nixIR_loadIR`, `builtins.nixIR_compile`, and +`builtins.nixIR_info` in their expressions. + +## Import Handling + +The compiler performs static import resolution when the import path meets +specific conditions. The path must be a literal string literal in the source, +not an interpolation or variable. The path must not use home directory +expansion. The resolved path must remain within the project root for security. +The target file must exist and be readable at compile time. + +When these conditions hold, the compiler reads the imported file, recursively +processes its imports, and embeds the resulting IR into the output bundle. The +final `.nixir` file is self-contained and requires no additional file lookups at +load time. + +When conditions do not hold, the compiler records the import as dynamic and +emits an IMPORT node containing the string table index. At runtime, the VM +evaluates the import expression to obtain the actual file path, then uses Nix's +standard evaluator to load that file. + +## What Works And What Does Not + +The implementation covers a substantial subset of Nix's expression language. +Literals work across all types including integers, floats, strings, paths, URIs, +booleans, and null. Lambda expressions, function application, and currying are +implemented. Attribute sets with both static and dynamic keys are supported. The +let and letrec forms work with proper recursive binding semantics. The if +expression, assert statement, with expression, and list literals are all +functional. + +The implementation does not cover derivations, builtins other than those +required for basic operation, or the full module system. These require +integration with Nix's store and download mechanisms that the VM does not +replicate. + +## Building And Using + +Create a build directory and configure with CMake: + +``` +cmake -B build -G Ninja +cmake --build build +``` + +This produces `nix-irc` in the build directory and `nix-ir-plugin.so` in the +project root. + +Compile a Nix file to IR: + +``` +./build/nix-irc input.nix output.nixir +``` + +Load and evaluate the compiled bundle through Nix: + +``` +nix --plugin-files ./nix-ir-plugin.so eval --expr 'builtins.nixIR_loadIR "output.nixir"' +``` + +Compile and evaluate source at runtime: + +``` +nix --plugin-files ./nix-ir-plugin.so eval --expr 'builtins.nixIR_compile "1 + 2"' +``` diff --git a/flake.nix b/flake.nix index ff42014..7d9db93 100644 --- a/flake.nix +++ b/flake.nix @@ -4,22 +4,28 @@ outputs = {nixpkgs, ...}: let systems = ["x86_64-linux" "aarch64-linux"]; forAllSystems = nixpkgs.lib.genAttrs systems; + pkgsFor = system: nixpkgs.legacyPackages.${system}; in { devShells = forAllSystems (system: let - pkgs = nixpkgs.legacyPackages.${system}; + pkgs = pkgsFor system; in { default = pkgs.mkShell { - buildInputs = with pkgs; [ + name = "nixir"; + buildInputs = with pkgs; let + nixForLinking = nixVersions.nixComponents_2_32; + nixForRuntime = nixVersions.nix_2_32; + in [ boost.dev libblake3.dev + pegtl - nixVersions.nixComponents_2_32.nix-store - nixVersions.nixComponents_2_32.nix-expr - nixVersions.nixComponents_2_32.nix-cmd - nixVersions.nixComponents_2_32.nix-fetchers - nixVersions.nixComponents_2_32.nix-main - nixVersions.nixComponents_2_32.nix-util - nixVersions.nix_2_32 + nixForRuntime + nixForLinking.nix-store + nixForLinking.nix-expr + nixForLinking.nix-cmd + nixForLinking.nix-fetchers + nixForLinking.nix-main + nixForLinking.nix-util ]; nativeBuildInputs = with pkgs; [ @@ -27,12 +33,29 @@ pkg-config ninja bear + clang-tools + just + entr ]; - env = { - NIX_PLUGINABI = "0.2"; - }; + env.NIX_PLUGINABI = "0.2"; }; }); + + formatter = forAllSystems (system: let + pkgs = pkgsFor system; + in + pkgs.writeShellApplication { + name = "nix3-fmt-wrapper"; + + runtimeInputs = [ + pkgs.alejandra + pkgs.fd + ]; + + text = '' + fd "$@" -t f -e nix -x alejandra -q '{}' + ''; + }); }; } diff --git a/justfile b/justfile new file mode 100644 index 0000000..9ca92f8 --- /dev/null +++ b/justfile @@ -0,0 +1,98 @@ +# Default recipe, show available commands +default: + @just --list + +# Build all targets +build: + cmake --build build + +# Clean build artifacts +clean: + rm -rf build + find tests -name '*.nixir' -delete + +# Configure and build from scratch +rebuild: clean + cmake -B build -G Ninja + cmake --build build + +# Run unit tests +test-unit: + ./build/regression_test + +# Run compilation tests (do all fixtures compile?) +test-compile: + #!/usr/bin/env bash + total=0 + success=0 + for f in tests/fixtures/*.nix; do + total=$((total+1)) + if ./build/nix-irc "$f" "${f%.nix}.nixir" 2>&1 | grep -q "Done!"; then + success=$((success+1)) + fi + done + echo "Compiled: $success/$total test files" + [ $success -eq $total ] + +# Run integration tests +test-integration: + ./tests/integration/run.sh + +# Run all tests +test: test-unit test-compile test-integration + @echo "All tests passed" + +# Run benchmarks +bench: + ./tests/benchmark/run.sh + +# Compile a single Nix file to IR +compile FILE OUTPUT="": + #!/usr/bin/env bash + if [ -z "{{OUTPUT}}" ]; then + file="{{FILE}}" + output="${file%.nix}.nixir" + else + output="{{OUTPUT}}" + fi + ./build/nix-irc "{{FILE}}" "$output" + +# Load plugin and evaluate Nix expression +eval FILE: + nix-instantiate --plugin-files ./build/nix-ir-plugin.so --eval --strict "{{FILE}}" + +# Format C++ code with clang-format +format: + find src tests -name '*.cpp' -o -name '*.h' | xargs clang-format -i + +# Run clang-tidy on source files +lint: + find src -name '*.cpp' | xargs clang-tidy --fix + +# Show project statistics +stats: + @echo "Lines of code:" + @find src -name '*.cpp' -o -name '*.h' | xargs wc -l | tail -1 + @echo "" + @echo "Test files:" + @find tests/fixtures -name '*.nix' | wc -l + @echo "" + @echo "Build status:" + @ls -lh build/nix-irc build/nix-ir-plugin.so build/regression_test 2>/dev/null || echo "Not built" + +# Run a quick smoke test +smoke: + ./build/nix-irc tests/fixtures/simple.nix /tmp/smoke.nixir + nix-instantiate --plugin-files ./build/nix-ir-plugin.so --eval tests/integration/simple_eval.nix + +# Generate IR from a Nix file and inspect it +inspect FILE: + ./build/nix-irc "{{FILE}}" /tmp/inspect.nixir + @echo "IR bundle size:" + @ls -lh /tmp/inspect.nixir | awk '{print $5}' + @echo "Magic number:" + @xxd -l 4 /tmp/inspect.nixir + +# Watch mode, rebuild on file changes +watch: + find src tests -name '*.cpp' -o -name '*.h' | entr -c just build test-unit diff --git a/src/irc/evaluator.cpp b/src/irc/evaluator.cpp index dfd6eb1..1684d40 100644 --- a/src/irc/evaluator.cpp +++ b/src/irc/evaluator.cpp @@ -5,9 +5,7 @@ #include "evaluator.h" #include "nix/expr/eval.hh" #include "nix/expr/value.hh" -#include "nix/util/error.hh" - -#include +#include "nix/util/url.hh" #include namespace nix_irc { @@ -23,15 +21,20 @@ struct IREnvironment { void bind(Value* val) { bindings.push_back(val); } - Value* lookup(uint32_t index) { + Value* lookup(uint32_t encoded_index) { + // Decode the index: high 16 bits = depth, low 16 bits = offset + uint32_t depth = encoded_index >> 16; + uint32_t offset = encoded_index & 0xFFFF; + IREnvironment* env = this; - while (env) { - if (index < env->bindings.size()) { - return env->bindings[index]; - } - index -= env->bindings.size(); + // Skip 'depth' levels to get to the right scope + for (uint32_t i = 0; i < depth && env; i++) { env = env->parent; } + + if (env && offset < env->bindings.size()) { + return env->bindings[offset]; + } return nullptr; } @@ -66,10 +69,34 @@ struct Evaluator::Impl { explicit Impl(EvalState& s) : state(s) {} - ~Impl() { - for (auto& env : environments) { - delete env.release(); + static std::string escape_nix_string(std::string_view value) { + std::string escaped; + escaped.reserve(value.size()); + + for (char ch : value) { + switch (ch) { + case '\\': + escaped += "\\\\"; + break; + case '"': + escaped += "\\\""; + break; + case '\n': + escaped += "\\n"; + break; + case '\r': + escaped += "\\r"; + break; + case '\t': + escaped += "\\t"; + break; + default: + escaped.push_back(ch); + break; + } } + + return escaped; } IREnvironment* make_env(IREnvironment* parent = nullptr) { @@ -100,6 +127,39 @@ struct Evaluator::Impl { thunks.erase(v); } + // Copy a forced value into a destination Value + void copy_value(Value& dest, Value* src) { + if (!src) + return; + force(src); + state.forceValue(*src, noPos); + switch (src->type()) { + case nInt: + dest.mkInt(src->integer()); + break; + case nBool: + dest.mkBool(src->boolean()); + break; + case nString: + dest.mkString(src->c_str()); + break; + case nPath: + dest.mkPath(src->path()); + break; + case nNull: + dest.mkNull(); + break; + case nFloat: + dest.mkFloat(src->fpoint()); + break; + default: + // For attrs, lists, functions, etc., direct assignment is safe + // as they use reference counting internally + dest = *src; + break; + } + } + void eval_node(const std::shared_ptr& node, Value& v, IREnvironment* env) { if (!node) { v.mkNull(); @@ -108,14 +168,42 @@ struct Evaluator::Impl { if (auto* n = node->get_if()) { v.mkInt(n->value); + } else if (auto* n = node->get_if()) { + v.mkFloat(n->value); } else if (auto* n = node->get_if()) { v.mkString(n->value); } else if (auto* n = node->get_if()) { - v.mkPath(state.rootPath(CanonPath(n->value))); + std::string path = n->value; + // Expand ~/ to home directory + if (path.size() >= 2 && path[0] == '~' && path[1] == '/') { + const char* home = getenv("HOME"); + if (home) { + path = std::string(home) + path.substr(1); + } + } + v.mkPath(state.rootPath(CanonPath(path))); } else if (auto* n = node->get_if()) { v.mkBool(n->value); - } else if (auto* n = node->get_if()) { // NOLINT(bugprone-branch-clone) + } else if (auto* n = node->get_if()) { // NOLINT(bugprone-branch-clone) v.mkNull(); + } else if (auto* n = node->get_if()) { + // Parse and validate URI, then create string with URI context + auto parsed = parseURL(n->value, true); + // Store URI with context - use simple mkString with context + v.mkString(parsed.to_string(), nix::NixStringContext{}); + } else if (auto* n = node->get_if()) { + // Lookup path like ; resolve via Nix search path + // We can use EvalState's searchPath to resolve + auto path = state.findFile(n->value); + v.mkPath(path); + } else if (auto* n = node->get_if()) { + // Evaluate list - allocate and populate + auto builder = state.buildList(n->elements.size()); + for (size_t i = 0; i < n->elements.size(); i++) { + builder.elems[i] = state.allocValue(); + eval_node(n->elements[i], *builder.elems[i], env); + } + v.mkList(builder); } else if (auto* n = node->get_if()) { Value* bound = env ? env->lookup(n->index) : nullptr; if (!bound && env && n->name.has_value()) { @@ -124,8 +212,7 @@ struct Evaluator::Impl { if (!bound) { state.error("variable not found").debugThrow(); } - force(bound); - v = *bound; + copy_value(v, bound); } else if (auto* n = node->get_if()) { auto lambda_env = env; auto body = n->body; @@ -216,6 +303,22 @@ struct Evaluator::Impl { v.mkInt((left->integer() + right->integer()).valueWrapping()); } else if (left->type() == nString && right->type() == nString) { v.mkString(std::string(left->c_str()) + std::string(right->c_str())); + } else if (left->type() == nPath && right->type() == nString) { + // Path + string = path + std::string leftPath = std::string(left->path().path.abs()); + std::string result = leftPath + std::string(right->c_str()); + v.mkPath(state.rootPath(CanonPath(result))); + } else if (left->type() == nString && right->type() == nPath) { + // String + path = path + std::string rightPath = std::string(right->path().path.abs()); + std::string result = std::string(left->c_str()) + rightPath; + v.mkPath(state.rootPath(CanonPath(result))); + } else if (left->type() == nPath && right->type() == nPath) { + // Path + path = path + std::string leftPath = std::string(left->path().path.abs()); + std::string rightPath = std::string(right->path().path.abs()); + std::string result = leftPath + rightPath; + v.mkPath(state.rootPath(CanonPath(result))); } else { state.error("type error in addition").debugThrow(); } @@ -286,10 +389,60 @@ struct Evaluator::Impl { state.error("type error in comparison").debugThrow(); } break; - case BinaryOp::CONCAT: - // ++ is list concatenation in Nix; string concat uses ADD (+) - state.error("list concatenation not yet implemented").debugThrow(); + case BinaryOp::CONCAT: { + // List concatenation: left ++ right + if (left->type() != nList || right->type() != nList) { + state.error("list concatenation requires two lists").debugThrow(); + } + + size_t left_size = left->listSize(); + size_t right_size = right->listSize(); + size_t total_size = left_size + right_size; + + auto builder = state.buildList(total_size); + auto left_view = left->listView(); + auto right_view = right->listView(); + + // Copy elements from left list + size_t idx = 0; + for (auto elem : left_view) { + builder.elems[idx++] = elem; + } + + // Copy elements from right list + for (auto elem : right_view) { + builder.elems[idx++] = elem; + } + + v.mkList(builder); break; + } + case BinaryOp::MERGE: { + // // is attrset merge - right overrides left + if (left->type() != nAttrs || right->type() != nAttrs) { + state.error("attrset merge requires two attrsets").debugThrow(); + } + + // Build a map of right attrs first (these have priority) + std::unordered_map right_attrs; + for (auto& attr : *right->attrs()) { + right_attrs[attr.name] = attr.value; + } + + // Copy right attrs to result + auto builder = state.buildBindings(left->attrs()->size() + right->attrs()->size()); + for (auto& attr : *right->attrs()) { + builder.insert(attr.name, attr.value); + } + // Add left attrs that don't exist in right + for (auto& attr : *left->attrs()) { + if (right_attrs.find(attr.name) == right_attrs.end()) { + builder.insert(attr.name, attr.value); + } + } + v.mkAttrs(builder.finish()); + break; + } default: state.error("unknown binary operator").debugThrow(); } @@ -334,42 +487,72 @@ struct Evaluator::Impl { } } else if (auto* n = node->get_if()) { auto let_env = make_env(env); + // Nix's let is recursive: bind all names first, then evaluate + // We allocate Values immediately and evaluate into them + std::vector values; for (const auto& [name, expr] : n->bindings) { - Value* val = make_thunk(expr, env); + Value* val = state.allocValue(); + values.push_back(val); let_env->bind(val); } + // Now evaluate each binding expression into its pre-allocated Value + size_t idx = 0; + for (const auto& [name, expr] : n->bindings) { + eval_node(expr, *values[idx++], let_env); + } eval_node(n->body, v, let_env); } else if (auto* n = node->get_if()) { auto letrec_env = make_env(env); - std::vector thunk_vals; - + // Same as LetNode - both are recursive in Nix + std::vector values; for (const auto& [name, expr] : n->bindings) { - Value* val = make_thunk(expr, letrec_env); - thunk_vals.push_back(val); + Value* val = state.allocValue(); + values.push_back(val); letrec_env->bind(val); } - + size_t idx = 0; + for (const auto& [name, expr] : n->bindings) { + eval_node(expr, *values[idx++], letrec_env); + } eval_node(n->body, v, letrec_env); } else if (auto* n = node->get_if()) { auto bindings = state.buildBindings(n->attrs.size()); IREnvironment* attr_env = env; if (n->recursive) { + // For recursive attrsets, create environment where all bindings can + // see each other attr_env = make_env(env); - for (const auto& [key, val] : n->attrs) { - Value* thunk = make_thunk(val, attr_env); - attr_env->bind(thunk); + for (const auto& binding : n->attrs) { + if (!binding.is_dynamic()) { + Value* thunk = make_thunk(binding.value, attr_env); + attr_env->bind(thunk); + } } } - for (const auto& [key, val] : n->attrs) { + // Evaluate attribute values immediately to avoid dangling thunks + // Our thunk system is tied to the Evaluator lifetime, so we can't + // return lazy thunks that outlive the evaluator + for (const auto& binding : n->attrs) { Value* attr_val = state.allocValue(); - if (n->recursive) { - eval_node(val, *attr_val, attr_env); + eval_node(binding.value, *attr_val, attr_env); + + if (binding.is_dynamic()) { + // Evaluate key expression to get attribute name + Value* key_val = state.allocValue(); + eval_node(binding.dynamic_name, *key_val, attr_env); + force(key_val); + + if (key_val->type() != nString) { + state.error("dynamic attribute name must evaluate to a string").debugThrow(); + } + + std::string key_str = std::string(key_val->c_str()); + bindings.insert(state.symbols.create(key_str), attr_val); } else { - eval_node(val, *attr_val, env); + bindings.insert(state.symbols.create(binding.static_name.value()), attr_val); } - bindings.insert(state.symbols.create(key), attr_val); } v.mkAttrs(bindings.finish()); @@ -394,9 +577,7 @@ struct Evaluator::Impl { auto attr = obj->attrs()->get(sym); if (attr) { - Value* val = attr->value; - force(val); - v = *val; + copy_value(v, attr->value); } else if (n->default_expr) { eval_node(*n->default_expr, v, env); } else { @@ -446,6 +627,42 @@ struct Evaluator::Impl { } eval_node(n->body, v, env); + } else if (auto* n = node->get_if()) { + // Evaluate path expression to get the file path + Value* path_val = state.allocValue(); + eval_node(n->path, *path_val, env); + force(path_val); + + // Path should be a string or path type, convert to SourcePath + if (path_val->type() == nPath) { + state.evalFile(path_val->path(), v); + } else if (path_val->type() == nString) { + auto path = state.rootPath(CanonPath(path_val->c_str())); + state.evalFile(path, v); + } else { + state.error("import argument must be a path or string").debugThrow(); + } + } else if (auto* n = node->get_if()) { + std::vector args; + args.reserve(n->args.size()); + + for (const auto& arg_node : n->args) { + Value* arg = state.allocValue(); + eval_node(arg_node, *arg, env); + args.push_back(arg); + } + + if (n->builtin_name == "getFlake") { + if (args.size() != 1) { + state.error("getFlake expects exactly one argument").debugThrow(); + } + auto flake_ref = state.forceStringNoCtx(*args[0], noPos, "while evaluating getFlake"); + std::string expr = "builtins.getFlake \"" + escape_nix_string(flake_ref) + "\""; + auto* parsed = state.parseExprFromString(expr, state.rootPath(CanonPath::root)); + state.eval(parsed, v); + } else { + state.error("unsupported builtin call: %s", n->builtin_name).debugThrow(); + } } else { v.mkNull(); } diff --git a/src/irc/evaluator.h b/src/irc/evaluator.h index 107bd78..d9c0aaf 100644 --- a/src/irc/evaluator.h +++ b/src/irc/evaluator.h @@ -9,7 +9,7 @@ namespace nix { class EvalState; class Value; class PosIdx; -} +} // namespace nix namespace nix_irc { @@ -18,18 +18,17 @@ class IREnvironment; class Evaluator { public: - explicit Evaluator(nix::EvalState& state); - ~Evaluator(); + explicit Evaluator(nix::EvalState& state); + ~Evaluator(); - void eval_to_nix(const std::shared_ptr& ir_node, - nix::Value& result, - IREnvironment* env = nullptr); + void eval_to_nix(const std::shared_ptr& ir_node, nix::Value& result, + IREnvironment* env = nullptr); private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif diff --git a/src/irc/ir_gen.cpp b/src/irc/ir_gen.cpp index 5b56cec..176ae4e 100644 --- a/src/irc/ir_gen.cpp +++ b/src/irc/ir_gen.cpp @@ -1,219 +1,256 @@ #include "ir_gen.h" +#include +#include #include #include -#include namespace nix_irc { struct NameResolver::Impl { - std::vector> scopes; - std::vector> scope_names; + std::vector> scopes; + std::vector> scope_names; - Impl() { - scopes.push_back({}); - scope_names.push_back({}); - } + Impl() { + scopes.push_back({}); + scope_names.push_back({}); + } }; NameResolver::NameResolver() : pImpl(std::make_unique()) {} NameResolver::~NameResolver() = default; void NameResolver::enter_scope() { - pImpl->scopes.push_back({}); - pImpl->scope_names.push_back({}); + pImpl->scopes.push_back({}); + pImpl->scope_names.push_back({}); } void NameResolver::exit_scope() { - if (!pImpl->scopes.empty()) { - pImpl->scopes.pop_back(); - pImpl->scope_names.pop_back(); - } + if (!pImpl->scopes.empty()) { + pImpl->scopes.pop_back(); + pImpl->scope_names.pop_back(); + } } void NameResolver::bind(const std::string& name) { - if (pImpl->scopes.empty()) return; - uint32_t idx = pImpl->scope_names.back().size(); - pImpl->scopes.back()[name] = idx; - pImpl->scope_names.back().push_back(name); + if (pImpl->scopes.empty()) + return; + uint32_t idx = pImpl->scope_names.back().size(); + pImpl->scopes.back()[name] = idx; + pImpl->scope_names.back().push_back(name); } uint32_t NameResolver::resolve(const std::string& name) { - for (int i = (int)pImpl->scopes.size() - 1; i >= 0; --i) { - auto it = pImpl->scopes[i].find(name); - if (it != pImpl->scopes[i].end()) { - uint32_t depth = pImpl->scopes.size() - 1 - i; - uint32_t offset = it->second; - return depth << 16 | offset; - } + for (int i = (int) pImpl->scopes.size() - 1; i >= 0; --i) { + auto it = pImpl->scopes[i].find(name); + if (it != pImpl->scopes[i].end()) { + uint32_t depth = pImpl->scopes.size() - 1 - i; + uint32_t offset = it->second; + return depth << 16 | offset; } - return 0xFFFFFFFF; + } + return 0xFFFFFFFF; } bool NameResolver::is_bound(const std::string& name) const { - for (auto it = pImpl->scopes.rbegin(); it != pImpl->scopes.rend(); ++it) { - if (it->count(name)) return true; - } - return false; + for (auto it = pImpl->scopes.rbegin(); it != pImpl->scopes.rend(); ++it) { + if (it->count(name)) + return true; + } + return false; } struct IRGenerator::Impl { - std::unordered_map string_table; - uint32_t next_string_id = 0; - NameResolver name_resolver; + std::unordered_map string_table; + uint32_t next_string_id = 0; + NameResolver name_resolver; - Impl() {} + Impl() {} - uint32_t add_string(const std::string& str) { - auto it = string_table.find(str); - if (it != string_table.end()) { - return it->second; - } - uint32_t id = next_string_id++; - string_table[str] = id; - return id; + uint32_t add_string(const std::string& str) { + auto it = string_table.find(str); + if (it != string_table.end()) { + return it->second; } + uint32_t id = next_string_id++; + string_table[str] = id; + return id; + } - std::shared_ptr convert(const std::shared_ptr& node_ptr) { - if (!node_ptr) return std::make_shared(ConstNullNode{}); + std::shared_ptr convert(const std::shared_ptr& node_ptr) { + if (!node_ptr) + return std::make_shared(ConstNullNode{}); - const Node& node = *node_ptr; + const Node& node = *node_ptr; - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - return std::make_shared(*n); - } - if (auto* n = node.get_if()) { - uint32_t idx = name_resolver.resolve(n->name.value_or("")); - VarNode converted(idx); - converted.name = n->name; - converted.line = n->line; - return std::make_shared(converted); - } - if (auto* n = node.get_if()) { - name_resolver.enter_scope(); - if (n->param_name) { - name_resolver.bind(*n->param_name); - } - auto body = convert(n->body); - name_resolver.exit_scope(); - LambdaNode lambda(n->arity, body, n->line); - lambda.param_name = n->param_name; - return std::make_shared(lambda); - } - if (auto* n = node.get_if()) { - auto func = convert(n->func); - auto arg = convert(n->arg); - return std::make_shared(AppNode(func, arg, n->line)); - } - if (auto* n = node.get_if()) { - AttrsetNode attrs(n->recursive, n->line); - name_resolver.enter_scope(); - for (const auto& [key, val] : n->attrs) { - name_resolver.bind(key); - } - for (const auto& [key, val] : n->attrs) { - attrs.attrs.push_back({key, convert(val)}); - } - name_resolver.exit_scope(); - return std::make_shared(attrs); - } - if (auto* n = node.get_if()) { - auto expr = convert(n->expr); - auto attr = convert(n->attr); - SelectNode select(expr, attr, n->line); - if (n->default_expr) { - select.default_expr = convert(*n->default_expr); - } - return std::make_shared(select); - } - if (auto* n = node.get_if()) { - auto expr = convert(n->expr); - auto attr = convert(n->attr); - return std::make_shared(HasAttrNode(expr, attr, n->line)); - } - if (auto* n = node.get_if()) { - auto attrs = convert(n->attrs); - auto body = convert(n->body); - return std::make_shared(WithNode(attrs, body, n->line)); - } - if (auto* n = node.get_if()) { - auto cond = convert(n->cond); - auto then_b = convert(n->then_branch); - auto else_b = convert(n->else_branch); - return std::make_shared(IfNode(cond, then_b, else_b, n->line)); - } - if (auto* n = node.get_if()) { - name_resolver.enter_scope(); - for (const auto& [key, val] : n->bindings) { - name_resolver.bind(key); - } - std::vector>> new_bindings; - for (const auto& [key, val] : n->bindings) { - new_bindings.push_back({key, convert(val)}); - } - auto body = convert(n->body); - name_resolver.exit_scope(); - LetNode let(body, n->line); - let.bindings = std::move(new_bindings); - return std::make_shared(let); - } - if (auto* n = node.get_if()) { - name_resolver.enter_scope(); - for (const auto& [key, val] : n->bindings) { - name_resolver.bind(key); - } - std::vector>> new_bindings; - for (const auto& [key, val] : n->bindings) { - new_bindings.push_back({key, convert(val)}); - } - auto body = convert(n->body); - name_resolver.exit_scope(); - LetRecNode letrec(body, n->line); - letrec.bindings = std::move(new_bindings); - return std::make_shared(letrec); - } - if (auto* n = node.get_if()) { - auto cond = convert(n->cond); - auto body = convert(n->body); - return std::make_shared(AssertNode(cond, body, n->line)); - } - if (auto* n = node.get_if()) { - auto left = convert(n->left); - auto right = convert(n->right); - return std::make_shared(BinaryOpNode(n->op, left, right, n->line)); - } - if (auto* n = node.get_if()) { - auto operand = convert(n->operand); - return std::make_shared(UnaryOpNode(n->op, operand, n->line)); - } - return std::make_shared(ConstNullNode{}); + if (auto* n = node.get_if()) { + return std::make_shared(*n); } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + return std::make_shared(*n); + } + if (auto* n = node.get_if()) { + std::string var_name = n->name.value_or(""); + uint32_t idx = name_resolver.resolve(var_name); + VarNode converted(idx); + converted.name = n->name; + converted.line = n->line; + return std::make_shared(converted); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + if (n->param_name) { + name_resolver.bind(*n->param_name); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LambdaNode lambda(n->arity, body, n->line); + lambda.param_name = n->param_name; + return std::make_shared(lambda); + } + if (auto* n = node.get_if()) { + auto func = convert(n->func); + auto arg = convert(n->arg); + return std::make_shared(AppNode(func, arg, n->line)); + } + if (auto* n = node.get_if()) { + AttrsetNode attrs(n->recursive, n->line); + + // Only enter a new scope for recursive attrsets + if (n->recursive) { + name_resolver.enter_scope(); + for (const auto& binding : n->attrs) { + if (!binding.is_dynamic()) { + name_resolver.bind(binding.static_name.value()); + } + } + } + + for (const auto& binding : n->attrs) { + if (binding.is_dynamic()) { + attrs.attrs.push_back(AttrBinding(convert(binding.dynamic_name), convert(binding.value))); + } else { + attrs.attrs.push_back(AttrBinding(binding.static_name.value(), convert(binding.value))); + } + } + + if (n->recursive) { + name_resolver.exit_scope(); + } + return std::make_shared(attrs); + } + if (auto* n = node.get_if()) { + auto expr = convert(n->expr); + auto attr = convert(n->attr); + SelectNode select(expr, attr, n->line); + if (n->default_expr) { + select.default_expr = convert(*n->default_expr); + } + return std::make_shared(select); + } + if (auto* n = node.get_if()) { + auto expr = convert(n->expr); + auto attr = convert(n->attr); + return std::make_shared(HasAttrNode(expr, attr, n->line)); + } + if (auto* n = node.get_if()) { + auto attrs = convert(n->attrs); + auto body = convert(n->body); + return std::make_shared(WithNode(attrs, body, n->line)); + } + if (auto* n = node.get_if()) { + auto cond = convert(n->cond); + auto then_b = convert(n->then_branch); + auto else_b = convert(n->else_branch); + return std::make_shared(IfNode(cond, then_b, else_b, n->line)); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + for (const auto& [key, val] : n->bindings) { + name_resolver.bind(key); + } + std::vector>> new_bindings; + new_bindings.reserve(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + new_bindings.push_back({key, convert(val)}); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LetNode let(body, n->line); + let.bindings = std::move(new_bindings); + return std::make_shared(let); + } + if (auto* n = node.get_if()) { + name_resolver.enter_scope(); + for (const auto& [key, val] : n->bindings) { + name_resolver.bind(key); + } + std::vector>> new_bindings; + new_bindings.reserve(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + new_bindings.push_back({key, convert(val)}); + } + auto body = convert(n->body); + name_resolver.exit_scope(); + LetRecNode letrec(body, n->line); + letrec.bindings = std::move(new_bindings); + return std::make_shared(letrec); + } + if (auto* n = node.get_if()) { + auto cond = convert(n->cond); + auto body = convert(n->body); + return std::make_shared(AssertNode(cond, body, n->line)); + } + if (auto* n = node.get_if()) { + auto left = convert(n->left); + auto right = convert(n->right); + return std::make_shared(BinaryOpNode(n->op, left, right, n->line)); + } + if (auto* n = node.get_if()) { + auto operand = convert(n->operand); + return std::make_shared(UnaryOpNode(n->op, operand, n->line)); + } + if (auto* n = node.get_if()) { + std::vector> elements; + elements.reserve(n->elements.size()); + for (const auto& elem : n->elements) { + elements.push_back(convert(elem)); + } + return std::make_shared(ListNode(std::move(elements), n->line)); + } + if (auto* n = node.get_if()) { + std::vector> args; + args.reserve(n->args.size()); + for (const auto& arg : n->args) { + args.push_back(convert(arg)); + } + return std::make_shared(BuiltinCallNode(n->builtin_name, std::move(args), n->line)); + } + return std::make_shared(ConstNullNode{}); + } }; IRGenerator::IRGenerator() : pImpl(std::make_unique()) {} IRGenerator::~IRGenerator() = default; void IRGenerator::set_string_table(const std::unordered_map& table) { - pImpl->string_table = table; + pImpl->string_table = table; } uint32_t IRGenerator::add_string(const std::string& str) { - return pImpl->add_string(str); + return pImpl->add_string(str); } std::shared_ptr IRGenerator::generate(const std::shared_ptr& ast) { - return pImpl->convert(ast); + return pImpl->convert(ast); } -} +} // namespace nix_irc diff --git a/src/irc/ir_gen.h b/src/irc/ir_gen.h index de082a6..2c3e9b8 100644 --- a/src/irc/ir_gen.h +++ b/src/irc/ir_gen.h @@ -2,44 +2,44 @@ #define NIX_IRC_IR_GEN_H #include "types.h" +#include #include #include #include -#include namespace nix_irc { class IRGenerator { public: - IRGenerator(); - ~IRGenerator(); - - void set_string_table(const std::unordered_map& table); - uint32_t add_string(const std::string& str); - - std::shared_ptr generate(const std::shared_ptr& ast); - + IRGenerator(); + ~IRGenerator(); + + void set_string_table(const std::unordered_map& table); + uint32_t add_string(const std::string& str); + + std::shared_ptr generate(const std::shared_ptr& ast); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; class NameResolver { public: - NameResolver(); - ~NameResolver(); - - void enter_scope(); - void exit_scope(); - void bind(const std::string& name); - uint32_t resolve(const std::string& name); - bool is_bound(const std::string& name) const; - + NameResolver(); + ~NameResolver(); + + void enter_scope(); + void exit_scope(); + void bind(const std::string& name); + uint32_t resolve(const std::string& name); + bool is_bound(const std::string& name) const; + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/lexer.cpp b/src/irc/lexer.cpp new file mode 100644 index 0000000..47a4161 --- /dev/null +++ b/src/irc/lexer.cpp @@ -0,0 +1,598 @@ +#include "lexer.h" +#include +#include + +namespace nix_irc { + +Lexer::Lexer(std::string input) : input(std::move(input)), pos(0), line(1), col(1) {} + +std::vector Lexer::tokenize() { +#define TOKEN(t) \ + Token { \ + Token::t, "", line, col \ + } + + while (pos < input.size()) { + skip_whitespace(); + if (pos >= input.size()) + break; + + char c = input[pos]; + + if (c == '(') { + emit(TOKEN(LPAREN)); + } else if (c == ')') { + emit(TOKEN(RPAREN)); + } else if (c == '{') { + emit(TOKEN(LBRACE)); + } else if (c == '}') { + emit(TOKEN(RBRACE)); + } else if (c == '[') { + emit(TOKEN(LBRACKET)); + } else if (c == ']') { + emit(TOKEN(RBRACKET)); + } else if (c == ';') { + emit(TOKEN(SEMICOLON)); + } else if (c == ':') { + emit(TOKEN(COLON)); + } else if (c == '@') { + emit(TOKEN(AT)); + } else if (c == ',') { + emit(TOKEN(COMMA)); + } else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') { + tokenize_indented_string(); + } else if (c == '"') { + tokenize_string(); + } + // Two-char operators + else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(EQEQ)); + pos += 2; + col += 2; + } else if (c == '=') { + emit(TOKEN(EQUALS)); + } else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(NE)); + pos += 2; + col += 2; + } else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(LE)); + pos += 2; + col += 2; + } else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') { + tokens.push_back(TOKEN(GE)); + pos += 2; + col += 2; + } else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') { + tokens.push_back(TOKEN(CONCAT)); + pos += 2; + col += 2; + } else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '/') { + tokens.push_back(TOKEN(MERGE)); + pos += 2; + col += 2; + } else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') { + tokens.push_back(TOKEN(AND)); + pos += 2; + col += 2; + } else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') { + tokens.push_back(TOKEN(OR)); + pos += 2; + col += 2; + } else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') { + tokens.push_back(TOKEN(IMPL)); + pos += 2; + col += 2; + } + // Single-char operators + else if (c == '+') { + emit(TOKEN(PLUS)); + } else if (c == '*') { + emit(TOKEN(STAR)); + } else if (c == '/') { + // Check if it's a path or division + if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) { + tokenize_path(); + } else { + emit(TOKEN(SLASH)); + } + } else if (c == '<') { + // Check for lookup path vs comparison operator + size_t end = pos + 1; + bool is_lookup_path = false; + + // Scan for valid lookup path characters until > + while (end < input.size() && (isalnum(input[end]) || input[end] == '-' || input[end] == '_' || + input[end] == '/' || input[end] == '.')) { + end++; + } + + // If we found > and there's content, it's a lookup path + if (end < input.size() && input[end] == '>' && end > pos + 1) { + std::string path = input.substr(pos + 1, end - pos - 1); + size_t consumed = end - pos + 1; + tokens.push_back({Token::LOOKUP_PATH, path, line, col}); + pos = end + 1; + col += consumed; + is_lookup_path = true; + } + + if (!is_lookup_path) { + emit(TOKEN(LT)); + } + } else if (c == '>') { + emit(TOKEN(GT)); + } else if (c == '!') { + emit(TOKEN(NOT)); + } else if (c == '.') { + // Relative paths: ./foo and ../foo + if (pos + 1 < input.size() && input[pos + 1] == '/') { + tokenize_path(); + } else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '/') { + tokenize_path(); + } + // Check for ellipsis (...) + else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') { + tokens.push_back(TOKEN(ELLIPSIS)); + pos += 3; + col += 3; + } else { + emit(TOKEN(DOT)); + } + } else if (c == '?') { + emit(TOKEN(QUESTION)); + } else if (c == '~') { + // Home-relative path ~/... + if (pos + 1 < input.size() && input[pos + 1] == '/') { + tokenize_home_path(); + } else { + // Just ~ by itself is an identifier + tokenize_ident(); + } + } else if (c == '-') { + // Check if it's a negative number or minus operator + if (pos + 1 < input.size() && isdigit(input[pos + 1])) { + // Check for negative float + if (pos + 2 < input.size() && input[pos + 2] == '.') { + tokenize_float(); + } else { + tokenize_int(); + } + } else { + emit(TOKEN(MINUS)); + } + } else if (isdigit(c)) { + // Check if it's a float (digit followed by '.') + if (pos + 1 < input.size() && input[pos + 1] == '.') { + tokenize_float(); + } else { + tokenize_int(); + } + } else if (isalpha(c)) { + // Check if it's a URI (contains ://) - look ahead + size_t lookahead = pos; + while (lookahead < input.size() && + (isalnum(input[lookahead]) || input[lookahead] == '_' || input[lookahead] == '-' || + input[lookahead] == '+' || input[lookahead] == '.')) + lookahead++; + std::string potential_scheme = input.substr(pos, lookahead - pos); + if (lookahead + 2 < input.size() && input[lookahead] == ':' && input[lookahead + 1] == '/' && + input[lookahead + 2] == '/') { + // It's a URI, consume the whole thing + tokenize_uri(); + } else { + tokenize_ident(); + } + } else { + throw std::runtime_error("Unexpected character '" + std::string(1, c) + "' at " + + std::to_string(line) + ":" + std::to_string(col)); + } + } + tokens.push_back({Token::EOF_, "", line, col}); + +#undef TOKEN + return tokens; +} + +void Lexer::emit(const Token& t) { + tokens.push_back(t); + pos++; + col++; +} + +void Lexer::skip_whitespace() { + while (pos < input.size()) { + char c = input[pos]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + if (c == '\n') { + line++; + col = 1; + } else { + col++; + } + pos++; + } else if (c == '#') { + // Line comment - skip until newline + while (pos < input.size() && input[pos] != '\n') + pos++; + } else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') { + // Block comment /* ... */ + // Note: Nix block comments do NOT nest + size_t start_line = line; + size_t start_col = col; + bool terminated = false; + pos += 2; // Skip /* + col += 2; + while (pos + 1 < input.size()) { + if (input[pos] == '*' && input[pos + 1] == '/') { + pos += 2; // Skip */ + col += 2; + terminated = true; + break; + } + if (input[pos] == '\n') { + line++; + col = 1; + } else { + col++; + } + pos++; + } + if (!terminated) { + throw std::runtime_error("Unterminated block comment at " + std::to_string(start_line) + + ":" + std::to_string(start_col)); + } + } else { + break; + } + } +} + +void Lexer::tokenize_string() { + size_t start_line = line; + size_t start_col = col; + pos++; + col++; + std::string s; + bool has_interp = false; + + while (pos < input.size() && input[pos] != '"') { + if (input[pos] == '\\' && pos + 1 < input.size()) { + pos++; + col++; + switch (input[pos]) { + case 'n': + s += '\n'; + break; + case 't': + s += '\t'; + break; + case 'r': + s += '\r'; + break; + case '"': + s += '"'; + break; + case '\\': + s += '\\'; + break; + case '$': + s += '$'; + break; // Escaped $ + default: + s += input[pos]; + break; + } + pos++; + col++; + } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { + // Found interpolation marker + has_interp = true; + s += input[pos]; // Keep $ in raw string + pos++; + col++; + } else { + if (input[pos] == '\n') { + s += input[pos]; + pos++; + line++; + col = 1; + continue; + } + s += input[pos]; + pos++; + col++; + } + } + if (pos >= input.size()) { + throw std::runtime_error("Unterminated string at " + std::to_string(start_line) + ":" + + std::to_string(start_col)); + } + pos++; + col++; + + Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING; + tokens.push_back({type, s, start_line, start_col}); +} + +void Lexer::tokenize_indented_string() { + pos += 2; // Skip opening '' + std::string raw_content; + bool has_interp = false; + size_t start_line = line; + + // Collect raw content until closing '' + while (pos < input.size()) { + // Check for escape sequences + if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') { + // Check if it's an escape or the closing delimiter + if (pos + 2 < input.size() && input[pos + 2] == '\'') { + // ''' -> escape for '' + raw_content += "''"; + pos += 3; + continue; + } else if (pos + 2 < input.size() && input[pos + 2] == '$') { + // ''$ -> escape for $ + raw_content += '$'; + pos += 3; + continue; + } else if (pos + 2 < input.size() && input[pos + 2] == '\\') { + // ''\ -> check what follows + if (pos + 3 < input.size()) { + char next = input[pos + 3]; + if (next == 'n') { + raw_content += '\n'; + pos += 4; + continue; + } else if (next == 'r') { + raw_content += '\r'; + pos += 4; + continue; + } else if (next == 't') { + raw_content += '\t'; + pos += 4; + continue; + } else if (next == ' ' || next == '\t') { + // ''\ before whitespace - preserve the whitespace by prepending a marker + // We use a special escape sequence that won't appear in normal text + raw_content += "\x1F\x1F"; // Unit separator pair as marker for preserved whitespace + raw_content += next; + pos += 4; + continue; + } + } + // Default: literal backslash + raw_content += '\\'; + pos += 3; + continue; + } else { + // Just closing '' + pos += 2; + break; + } + } + + // Check for interpolation + if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { + has_interp = true; + raw_content += input[pos]; + pos++; + if (input[pos] == '\n') { + line++; + } + continue; + } + + // Track newlines + if (input[pos] == '\n') { + line++; + raw_content += input[pos]; + pos++; + } else { + raw_content += input[pos]; + pos++; + } + } + + // Strip common indentation + std::string stripped = strip_indentation(raw_content); + + Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING; + tokens.push_back({type, stripped, start_line, col}); +} + +std::string Lexer::strip_indentation(const std::string& s) { + if (s.empty()) + return s; + + // Split into lines + std::vector lines; + std::string current_line; + for (char c : s) { + if (c == '\n') { + lines.push_back(current_line); + current_line.clear(); + } else { + current_line += c; + } + } + if (!current_line.empty() || (!s.empty() && s.back() == '\n')) { + lines.push_back(current_line); + } + + // Find minimum indentation (spaces/tabs at start of non-empty lines) + // \x1F\x1F marker indicates preserved whitespace (from ''\ escape) + size_t min_indent = std::string::npos; + for (const auto& line : lines) { + if (line.empty()) + continue; // Skip empty lines when calculating indentation + size_t indent = 0; + for (size_t i = 0; i < line.size(); i++) { + char c = line[i]; + // If we hit the preserved whitespace marker, stop counting indentation + if (c == '\x1F' && i + 1 < line.size() && line[i + 1] == '\x1F') { + break; + } + if (c == ' ' || c == '\t') + indent++; + else + break; + } + if (indent < min_indent) + min_indent = indent; + } + + if (min_indent == std::string::npos) + min_indent = 0; + + // Strip min_indent from all lines and remove \x1F\x1F markers + std::string result; + for (size_t i = 0; i < lines.size(); i++) { + const auto& line = lines[i]; + if (line.empty()) { + // Preserve empty lines + if (i + 1 < lines.size()) + result += '\n'; + } else { + // Strip indentation, being careful about \x1F\x1F markers + size_t skip = 0; + size_t pos = 0; + while (skip < min_indent && pos < line.size()) { + if (line[pos] == '\x1F' && pos + 1 < line.size() && line[pos + 1] == '\x1F') { + // Hit preserved whitespace marker - don't strip any more + break; + } + skip++; + pos++; + } + + // Add the rest of the line, removing \x1F\x1F markers + for (size_t j = pos; j < line.size(); j++) { + if (line[j] == '\x1F' && j + 1 < line.size() && line[j + 1] == '\x1F') { + j++; // Skip both marker bytes + continue; + } + result += line[j]; + } + + if (i + 1 < lines.size()) + result += '\n'; + } + } + + return result; +} + +void Lexer::tokenize_path() { + size_t start = pos; + while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' && + input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' && + input[pos] != ';') { + pos++; + } + std::string path = input.substr(start, pos - start); + tokens.push_back({Token::PATH, path, line, col}); + col += path.size(); +} + +void Lexer::tokenize_home_path() { + size_t start = pos; + pos++; // Skip ~ + if (pos < input.size() && input[pos] == '/') { + // Home-relative path ~/something + while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' && + input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' && + input[pos] != ';') { + pos++; + } + } + std::string path = input.substr(start, pos - start); + tokens.push_back({Token::PATH, path, line, col}); + col += path.size(); +} + +void Lexer::tokenize_int() { + size_t start = pos; + if (input[pos] == '-') + pos++; + while (pos < input.size() && isdigit(input[pos])) + pos++; + std::string num = input.substr(start, pos - start); + tokens.push_back({Token::INT, num, line, col}); + col += num.size(); +} + +void Lexer::tokenize_float() { + size_t start = pos; + if (input[pos] == '-') + pos++; + while (pos < input.size() && isdigit(input[pos])) + pos++; + if (pos < input.size() && input[pos] == '.') { + pos++; + while (pos < input.size() && isdigit(input[pos])) + pos++; + } + std::string num = input.substr(start, pos - start); + tokens.push_back({Token::FLOAT, num, line, col}); + col += num.size(); +} + +void Lexer::tokenize_uri() { + size_t start = pos; + while (pos < input.size() && !isspace(input[pos]) && input[pos] != ')' && input[pos] != ']' && + input[pos] != ';') { + pos++; + } + std::string uri = input.substr(start, pos - start); + tokens.push_back({Token::URI, uri, line, col}); + col += uri.size(); +} + +void Lexer::tokenize_ident() { + size_t start = pos; + // Note: Don't include '.' here - it's used for selection (a.b.c) + // URIs are handled separately by checking for '://' pattern + while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) + pos++; + std::string ident = input.substr(start, pos - start); + + // Check if it's a URI (contains ://) + size_t scheme_end = ident.find("://"); + if (scheme_end != std::string::npos && scheme_end > 0) { + tokens.push_back({Token::URI, ident, line, col}); + col += ident.size(); + return; + } + + Token::Type type = Token::IDENT; + if (ident == "let") + type = Token::LET; + else if (ident == "in") + type = Token::IN; + else if (ident == "rec") + type = Token::REC; + else if (ident == "if") + type = Token::IF; + else if (ident == "then") + type = Token::THEN; + else if (ident == "else") + type = Token::ELSE; + else if (ident == "assert") + type = Token::ASSERT; + else if (ident == "with") + type = Token::WITH; + else if (ident == "inherit") + type = Token::INHERIT; + else if (ident == "import") + type = Token::IMPORT; + else if (ident == "true") + type = Token::BOOL; + else if (ident == "false") + type = Token::BOOL; + + tokens.push_back({type, ident, line, col}); + col += ident.size(); +} + +} // namespace nix_irc diff --git a/src/irc/lexer.h b/src/irc/lexer.h new file mode 100644 index 0000000..3804ed7 --- /dev/null +++ b/src/irc/lexer.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include + +namespace nix_irc { + +struct Token { + enum Type { + LPAREN, + RPAREN, + LBRACE, + RBRACE, + LBRACKET, + RBRACKET, + IDENT, + STRING, + STRING_INTERP, + INDENTED_STRING, + INDENTED_STRING_INTERP, + PATH, + LOOKUP_PATH, + INT, + FLOAT, + URI, + BOOL, + LET, + IN, + REC, + IF, + THEN, + ELSE, + ASSERT, + WITH, + INHERIT, + IMPORT, + DOT, + SEMICOLON, + COLON, + EQUALS, + AT, + COMMA, + QUESTION, + ELLIPSIS, + // Operators + PLUS, + MINUS, + STAR, + SLASH, + CONCAT, + MERGE, + EQEQ, + NE, + LT, + GT, + LE, + GE, + AND, + OR, + IMPL, + NOT, + EOF_ + } type; + std::string value; + size_t line; + size_t col; +}; + +class Lexer { +public: + explicit Lexer(std::string input); + std::vector tokenize(); + +private: + std::vector tokens; + std::string input; + size_t pos; + size_t line; + size_t col; + + void emit(const Token& t); + void skip_whitespace(); + void tokenize_string(); + void tokenize_indented_string(); + std::string strip_indentation(const std::string& s); + void tokenize_path(); + void tokenize_home_path(); + void tokenize_int(); + void tokenize_float(); + void tokenize_uri(); + void tokenize_ident(); +}; + +} // namespace nix_irc diff --git a/src/irc/main.cpp b/src/irc/main.cpp index c5c28bf..3c00d70 100644 --- a/src/irc/main.cpp +++ b/src/irc/main.cpp @@ -1,150 +1,297 @@ -#include -#include "parser.h" -#include "resolver.h" #include "ir_gen.h" +#include "parser.h" #include "serializer.h" +#include +#include +#include +#include +#include #include #include -#include namespace nix_irc { +namespace fs = std::filesystem; void print_usage(const char* prog) { - std::cout << "Usage: " << prog << " [options] [output.nixir]\n" - << "\nOptions:\n" - << " -I Add search path for imports\n" - << " --no-imports Disable import resolution\n" - << " --help Show this help\n"; + std::cout << "Usage: " << prog << " [options] [output.nixir]\n" + << "\nOptions:\n" + << " -I Add search path for imports\n" + << " --no-imports Disable import resolution\n" + << " --help Show this help\n"; +} + +static bool is_flake_reference(const std::string& input) { + return input.find('#') != std::string::npos; +} + +static std::string sanitize_output_stem(const std::string& input) { + std::string stem; + stem.reserve(input.size()); + + for (char ch : input) { + if (std::isalnum(static_cast(ch))) { + stem.push_back(ch); + } else if (stem.empty() || stem.back() != '-') { + stem.push_back('-'); + } + } + + while (!stem.empty() && stem.back() == '-') { + stem.pop_back(); + } + + return stem.empty() ? "bundle" : stem; +} + +static std::string default_output_path_for(const std::string& input) { + if (!is_flake_reference(input)) { + return input + "ir"; + } + + return sanitize_output_stem(input) + ".nixir"; +} + +static std::string normalize_local_flake_path(const std::string& raw_path) { + fs::path path = raw_path.empty() ? fs::current_path() : fs::path(raw_path); + fs::path absolute = path.is_absolute() ? path : fs::absolute(path); + fs::path normalized = absolute.lexically_normal(); + + if (!fs::exists(normalized)) { + throw std::runtime_error("Flake path does not exist: " + normalized.string()); + } + + if (fs::is_directory(normalized) && !fs::exists(normalized / "flake.nix")) { + throw std::runtime_error("Flake directory does not contain flake.nix: " + normalized.string()); + } + + return normalized.string(); +} + +static std::string normalize_flake_ref_source(const std::string& ref) { + if (ref.empty()) { + return normalize_local_flake_path("."); + } + + if (ref.rfind("path:", 0) == 0) { + return "path:" + normalize_local_flake_path(ref.substr(5)); + } + + if (ref[0] == '.' || ref[0] == '/') { + return normalize_local_flake_path(ref); + } + + if (fs::exists(ref)) { + return normalize_local_flake_path(ref); + } + + return ref; +} + +static std::vector parse_flake_attr_path(const std::string& raw_attr_path) { + if (raw_attr_path.empty()) { + throw std::runtime_error("Flake reference is missing an attribute path after '#'"); + } + + std::vector segments; + std::string current; + bool in_quotes = false; + bool escaping = false; + + for (char ch : raw_attr_path) { + if (escaping) { + current.push_back(ch); + escaping = false; + continue; + } + + if (in_quotes) { + if (ch == '\\') { + escaping = true; + } else if (ch == '"') { + in_quotes = false; + } else { + current.push_back(ch); + } + continue; + } + + if (ch == '"') { + in_quotes = true; + } else if (ch == '.') { + if (current.empty()) { + throw std::runtime_error("Flake attribute path contains an empty segment"); + } + segments.push_back(current); + current.clear(); + } else { + current.push_back(ch); + } + } + + if (escaping || in_quotes) { + throw std::runtime_error("Unterminated quoted segment in flake attribute path"); + } + + if (current.empty()) { + throw std::runtime_error("Flake attribute path contains an empty segment"); + } + + segments.push_back(current); + return segments; +} + +static std::shared_ptr build_flake_ref_ast(const std::string& input) { + size_t hash_pos = input.find('#'); + if (hash_pos == std::string::npos) { + throw std::runtime_error("Not a flake reference: " + input); + } + + std::string flake_source = normalize_flake_ref_source(input.substr(0, hash_pos)); + auto attr_path = parse_flake_attr_path(input.substr(hash_pos + 1)); + + auto expr = std::make_shared(BuiltinCallNode( + "getFlake", + std::vector>{std::make_shared(ConstStringNode(flake_source))})); + + for (const auto& attr : attr_path) { + expr = std::make_shared(SelectNode(expr, std::make_shared(ConstStringNode(attr)))); + } + + return expr; } int run_compile(int argc, char** argv) { - std::string input_file; - std::string output_file; - std::vector search_paths; - bool resolve_imports = true; - - int i = 1; - while (i < argc) { - std::string arg = argv[i]; - if (arg == "-I") { - if (i + 1 >= argc) { - std::cerr << "Error: -I requires a path argument\n"; - return 1; - } - search_paths.push_back(argv[++i]); - } else if (arg == "--no-imports") { - resolve_imports = false; - } else if (arg == "--help" || arg == "-h") { - print_usage(argv[0]); - return 0; - } else if (arg[0] != '-') { - input_file = arg; - if (i + 1 < argc && argv[i + 1][0] != '-') { - output_file = argv[++i]; - } - } else { - std::cerr << "Unknown option: " << arg << "\n"; - print_usage(argv[0]); - return 1; - } - i++; - } - - if (input_file.empty()) { - std::cerr << "Error: No input file specified\n"; - print_usage(argv[0]); - return 1; - } - - if (output_file.empty()) { - output_file = input_file + "r"; - } - - try { - Parser parser; - Resolver resolver; - - for (const auto& path : search_paths) { - resolver.add_search_path(path); - } - - std::cout << "Parsing: " << input_file << "\n"; - auto ast = parser.parse_file(input_file); - - if (!ast) { - std::cerr << "Error: Failed to parse input\n"; - return 1; - } - - std::cout << "Resolving imports...\n"; - - IRGenerator ir_gen; - - std::cout << "Generating IR...\n"; - auto ir = ir_gen.generate(ast); + std::string input_file; + std::string output_file; + std::vector search_paths; + bool resolve_imports = true; - IRModule module; - module.version = IR_VERSION; - module.entry = ir; - - std::cout << "Serializing to: " << output_file << "\n"; - Serializer serializer; - serializer.serialize(module, output_file); - - std::cout << "Done!\n"; - return 0; - - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; + int i = 1; + while (i < argc) { + std::string arg = argv[i]; + if (arg == "-I") { + if (i + 1 >= argc) { + std::cerr << "Error: -I requires a path argument\n"; return 1; + } + search_paths.push_back(argv[++i]); + } else if (arg == "--no-imports") { + resolve_imports = false; + } else if (arg == "--help" || arg == "-h") { + print_usage(argv[0]); + return 0; + } else if (arg[0] != '-') { + input_file = arg; + if (i + 1 < argc && argv[i + 1][0] != '-') { + output_file = argv[++i]; + } + } else { + std::cerr << "Unknown option: " << arg << "\n"; + print_usage(argv[0]); + return 1; } + i++; + } + + if (input_file.empty()) { + std::cerr << "Error: No input file specified\n"; + print_usage(argv[0]); + return 1; + } + + if (output_file.empty()) { + output_file = default_output_path_for(input_file); + } + + try { + Parser parser; + (void) search_paths; + (void) resolve_imports; + + std::shared_ptr ast; + + if (is_flake_reference(input_file)) { + std::cout << "Compiling flake reference: " << input_file << "\n"; + ast = build_flake_ref_ast(input_file); + } else { + std::cout << "Parsing: " << input_file << "\n"; + ast = parser.parse_file(input_file); + } + + if (!ast) { + std::cerr << "Error: Failed to parse input\n"; + return 1; + } + + std::cout << "Resolving imports...\n"; + + IRGenerator ir_gen; + + std::cout << "Generating IR...\n"; + auto ir = ir_gen.generate(ast); + + IRModule module; + module.version = IR_VERSION; + module.entry = ir; + + std::cout << "Serializing to: " << output_file << "\n"; + Serializer serializer; + serializer.serialize(module, output_file); + + std::cout << "Done!\n"; + return 0; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } } void print_decompile_usage(const char* prog) { - std::cout << "Usage: " << prog << " decompile \n"; + std::cout << "Usage: " << prog << " decompile \n"; } int run_decompile(int argc, char** argv) { - if (argc < 3) { - print_decompile_usage(argv[0]); - return 1; - } - - std::string input_file = argv[2]; - - try { - Deserializer deserializer; - auto module = deserializer.deserialize(input_file); - - std::cout << "IR Version: " << module.version << "\n"; - std::cout << "Sources: " << module.sources.size() << "\n"; - std::cout << "Imports: " << module.imports.size() << "\n"; - - return 0; - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << "\n"; - return 1; - } + if (argc < 3) { + print_decompile_usage(argv[0]); + return 1; + } + + std::string input_file = argv[2]; + + try { + Deserializer deserializer; + auto module = deserializer.deserialize(input_file); + + std::cout << "IR Version: " << module.version << "\n"; + std::cout << "Sources: " << module.sources.size() << "\n"; + std::cout << "Imports: " << module.imports.size() << "\n"; + + return 0; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << "\n"; + return 1; + } } -} +} // namespace nix_irc int main(int argc, char** argv) { - if (argc < 2) { - nix_irc::print_usage(argv[0]); - return 1; - } - - std::string cmd = argv[1]; - - if (cmd == "compile" || cmd == "c") { - return nix_irc::run_compile(argc - 1, argv + 1); - } else if (cmd == "decompile" || cmd == "d") { - return nix_irc::run_decompile(argc, argv); - } else if (cmd == "help" || cmd == "--help" || cmd == "-h") { - nix_irc::print_usage(argv[0]); - return 0; - } else { - return nix_irc::run_compile(argc, argv); - } + if (argc < 2) { + nix_irc::print_usage(argv[0]); + return 1; + } + + std::string cmd = argv[1]; + + if (cmd == "compile" || cmd == "c") { + return nix_irc::run_compile(argc - 1, argv + 1); + } else if (cmd == "decompile" || cmd == "d") { + return nix_irc::run_decompile(argc, argv); + } else if (cmd == "help" || cmd == "--help" || cmd == "-h") { + nix_irc::print_usage(argv[0]); + return 0; + } else { + return nix_irc::run_compile(argc, argv); + } } diff --git a/src/irc/parser.cpp b/src/irc/parser.cpp index 8a47e2a..7180c8e 100644 --- a/src/irc/parser.cpp +++ b/src/irc/parser.cpp @@ -1,950 +1,984 @@ #include "parser.h" -#include +#include "lexer.h" +#include #include #include +#include #include #include -#include #include -#include -#include namespace nix_irc { static std::string trim(const std::string& s) { - size_t start = s.find_first_not_of(" \t\n\r"); - if (start == std::string::npos) return ""; - size_t end = s.find_last_not_of(" \t\n\r"); - return s.substr(start, end - start + 1); + size_t start = s.find_first_not_of(" \t\n\r"); + if (start == std::string::npos) + return ""; + size_t end = s.find_last_not_of(" \t\n\r"); + return s.substr(start, end - start + 1); +} + +static const char* token_type_name(Token::Type type) { + switch (type) { + case Token::LPAREN: + return "LPAREN"; + case Token::RPAREN: + return "RPAREN"; + case Token::LBRACE: + return "LBRACE"; + case Token::RBRACE: + return "RBRACE"; + case Token::LBRACKET: + return "LBRACKET"; + case Token::RBRACKET: + return "RBRACKET"; + case Token::IDENT: + return "IDENT"; + case Token::STRING: + return "STRING"; + case Token::STRING_INTERP: + return "STRING_INTERP"; + case Token::INDENTED_STRING: + return "INDENTED_STRING"; + case Token::INDENTED_STRING_INTERP: + return "INDENTED_STRING_INTERP"; + case Token::PATH: + return "PATH"; + case Token::LOOKUP_PATH: + return "LOOKUP_PATH"; + case Token::INT: + return "INT"; + case Token::FLOAT: + return "FLOAT"; + case Token::URI: + return "URI"; + case Token::BOOL: + return "BOOL"; + case Token::LET: + return "LET"; + case Token::IN: + return "IN"; + case Token::REC: + return "REC"; + case Token::IF: + return "IF"; + case Token::THEN: + return "THEN"; + case Token::ELSE: + return "ELSE"; + case Token::ASSERT: + return "ASSERT"; + case Token::WITH: + return "WITH"; + case Token::INHERIT: + return "INHERIT"; + case Token::IMPORT: + return "IMPORT"; + case Token::DOT: + return "DOT"; + case Token::SEMICOLON: + return "SEMICOLON"; + case Token::COLON: + return "COLON"; + case Token::EQUALS: + return "EQUALS"; + case Token::AT: + return "AT"; + case Token::COMMA: + return "COMMA"; + case Token::QUESTION: + return "QUESTION"; + case Token::ELLIPSIS: + return "ELLIPSIS"; + case Token::PLUS: + return "PLUS"; + case Token::MINUS: + return "MINUS"; + case Token::STAR: + return "STAR"; + case Token::SLASH: + return "SLASH"; + case Token::CONCAT: + return "CONCAT"; + case Token::MERGE: + return "MERGE"; + case Token::EQEQ: + return "EQEQ"; + case Token::NE: + return "NE"; + case Token::LT: + return "LT"; + case Token::GT: + return "GT"; + case Token::LE: + return "LE"; + case Token::GE: + return "GE"; + case Token::AND: + return "AND"; + case Token::OR: + return "OR"; + case Token::IMPL: + return "IMPL"; + case Token::NOT: + return "NOT"; + case Token::EOF_: + return "EOF"; + } + return "UNKNOWN"; } static std::string read_file(const std::string& path) { - FILE* f = fopen(path.c_str(), "r"); - if (!f) { - throw std::runtime_error("Cannot open file: " + path); - } - fseek(f, 0, SEEK_END); - long size = ftell(f); - fseek(f, 0, SEEK_SET); - std::string content(size, '\0'); - if (fread(content.data(), 1, size, f) != static_cast(size)) { - fclose(f); - throw std::runtime_error("Failed to read file: " + path); - } - fclose(f); - return content; + FILE* f = fopen(path.c_str(), "r"); + if (!f) { + throw std::runtime_error("Cannot open file: " + path); + } + + // Ensure FILE* is always closed + auto file_closer = [](FILE* fp) { + if (fp) + fclose(fp); + }; + std::unique_ptr file_guard(f, file_closer); + + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + std::string content(size, '\0'); + if (fread(content.data(), 1, size, f) != static_cast(size)) { + throw std::runtime_error("Failed to read file: " + path); + } + return content; } static std::pair run_command(const std::string& cmd) { - std::array buffer; - std::string result; - std::string error; + std::array buffer; + std::string result; - FILE* pipe = popen(cmd.c_str(), "r"); - if (!pipe) throw std::runtime_error("popen failed"); + FILE* pipe = popen(cmd.c_str(), "r"); + if (!pipe) + throw std::runtime_error("popen failed"); - while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { - result += buffer.data(); - } + while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { + result += buffer.data(); + } - int status = pclose(pipe); - if (status != 0) { - throw std::runtime_error("Command failed: " + cmd); - } - return {result, error}; + int status = pclose(pipe); + if (status != 0) { + throw std::runtime_error("Command failed: " + cmd); + } + return {result, ""}; } -struct Token { - enum Type { - LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET, - IDENT, STRING, STRING_INTERP, PATH, INT, BOOL, - LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH, INHERIT, - DOT, SEMICOLON, COLON, EQUALS, AT, COMMA, QUESTION, ELLIPSIS, - // Operators - PLUS, MINUS, STAR, SLASH, CONCAT, - EQEQ, NE, LT, GT, LE, GE, - AND, OR, IMPL, NOT, - EOF_ - } type; - std::string value; - size_t line; - size_t col; -}; - -class Lexer { -public: - Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {} - - std::vector tokenize() { - #define TOKEN(t) Token{Token::t, "", line, col} - - while (pos < input.size()) { - skip_whitespace(); - if (pos >= input.size()) break; - - char c = input[pos]; - - if (c == '(') { emit(TOKEN(LPAREN)); } - else if (c == ')') { emit(TOKEN(RPAREN)); } - else if (c == '{') { emit(TOKEN(LBRACE)); } - else if (c == '}') { emit(TOKEN(RBRACE)); } - else if (c == '[') { emit(TOKEN(LBRACKET)); } - else if (c == ']') { emit(TOKEN(RBRACKET)); } - else if (c == ';') { emit(TOKEN(SEMICOLON)); } - else if (c == ':') { emit(TOKEN(COLON)); } - else if (c == '@') { emit(TOKEN(AT)); } - else if (c == ',') { emit(TOKEN(COMMA)); } - else if (c == '"') { tokenize_string(); } - // Two-char operators - else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(EQEQ)); - pos += 2; col += 2; - } - else if (c == '=') { emit(TOKEN(EQUALS)); } - else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(NE)); - pos += 2; col += 2; - } - else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(LE)); - pos += 2; col += 2; - } - else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') { - tokens.push_back(TOKEN(GE)); - pos += 2; col += 2; - } - else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') { - tokens.push_back(TOKEN(CONCAT)); - pos += 2; col += 2; - } - else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') { - tokens.push_back(TOKEN(AND)); - pos += 2; col += 2; - } - else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') { - tokens.push_back(TOKEN(OR)); - pos += 2; col += 2; - } - else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') { - tokens.push_back(TOKEN(IMPL)); - pos += 2; col += 2; - } - // Single-char operators - else if (c == '+') { emit(TOKEN(PLUS)); } - else if (c == '*') { emit(TOKEN(STAR)); } - else if (c == '/') { - // Check if it's a path or division - if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) { - tokenize_path(); - } else { - emit(TOKEN(SLASH)); - } - } - else if (c == '<') { emit(TOKEN(LT)); } - else if (c == '>') { emit(TOKEN(GT)); } - else if (c == '!') { emit(TOKEN(NOT)); } - else if (c == '.') { - // Check for ellipsis (...) - if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') { - tokens.push_back(TOKEN(ELLIPSIS)); - pos += 3; col += 3; - } else { - emit(TOKEN(DOT)); - } - } - else if (c == '?') { emit(TOKEN(QUESTION)); } - else if (c == '-') { - // Check if it's a negative number or minus operator - if (pos + 1 < input.size() && isdigit(input[pos + 1])) { - tokenize_int(); - } else { - emit(TOKEN(MINUS)); - } - } - else if (isdigit(c)) { tokenize_int(); } - else if (isalpha(c) || c == '_') { tokenize_ident(); } - else { pos++; col++; } - } - tokens.push_back({Token::EOF_, "", line, col}); - - #undef TOKEN - return tokens; - } - -private: - std::vector tokens; - const std::string& input; - size_t pos; - size_t line; - size_t col; - - void emit(Token t) { - tokens.push_back(t); - pos++; - col++; - } - - void skip_whitespace() { - while (pos < input.size()) { - char c = input[pos]; - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { - if (c == '\n') { line++; col = 1; } - else { col++; } - pos++; - } else if (c == '#') { - while (pos < input.size() && input[pos] != '\n') pos++; - } else { - break; - } - } - } - - void tokenize_string() { - pos++; - std::string s; - bool has_interp = false; - - while (pos < input.size() && input[pos] != '"') { - if (input[pos] == '\\' && pos + 1 < input.size()) { - pos++; - switch (input[pos]) { - case 'n': s += '\n'; break; - case 't': s += '\t'; break; - case 'r': s += '\r'; break; - case '"': s += '"'; break; - case '\\': s += '\\'; break; - case '$': s += '$'; break; // Escaped $ - default: s += input[pos]; break; - } - pos++; - } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { - // Found interpolation marker - has_interp = true; - s += input[pos]; // Keep $ in raw string - pos++; - } else { - s += input[pos]; - pos++; - } - } - pos++; - - Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING; - tokens.push_back({type, s, line, col}); - col += s.size() + 2; - } - - void tokenize_path() { - size_t start = pos; - while (pos < input.size() && !isspace(input[pos]) && - input[pos] != '(' && input[pos] != ')' && - input[pos] != '{' && input[pos] != '}' && - input[pos] != '[' && input[pos] != ']') { - pos++; - } - std::string path = input.substr(start, pos - start); - tokens.push_back({Token::PATH, path, line, col}); - col += path.size(); - } - - void tokenize_int() { - size_t start = pos; - if (input[pos] == '-') pos++; - while (pos < input.size() && isdigit(input[pos])) pos++; - std::string num = input.substr(start, pos - start); - tokens.push_back({Token::INT, num, line, col}); - col += num.size(); - } - - void tokenize_ident() { - size_t start = pos; - while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++; - std::string ident = input.substr(start, pos - start); - - Token::Type type = Token::IDENT; - if (ident == "let") type = Token::LET; - else if (ident == "in") type = Token::IN; - else if (ident == "rec") type = Token::REC; - else if (ident == "if") type = Token::IF; - else if (ident == "then") type = Token::THEN; - else if (ident == "else") type = Token::ELSE; - else if (ident == "assert") type = Token::ASSERT; - else if (ident == "with") type = Token::WITH; - else if (ident == "inherit") type = Token::INHERIT; - else if (ident == "true") type = Token::BOOL; - else if (ident == "false") type = Token::BOOL; - - tokens.push_back({type, ident, line, col}); - col += ident.size(); - } -}; - class Parser::Impl { public: - std::vector tokens; - size_t pos = 0; - std::string current_file; + std::vector tokens; + size_t pos = 0; + std::string current_file; - const Token& current() { - if (pos < tokens.size()) return tokens[pos]; - static Token eof{Token::EOF_, "", 0, 0}; - return eof; + const Token& current() { + if (pos < tokens.size()) + return tokens[pos]; + static Token eof{Token::EOF_, "", 0, 0}; + return eof; + } + + void advance() { pos++; } + + bool consume(Token::Type type) { + if (current().type == type) { + advance(); + return true; } + return false; + } - void advance() { pos++; } - - bool consume(Token::Type type) { - if (current().type == type) { - advance(); - return true; - } - return false; + bool expect(Token::Type type) { + if (current().type != type) { + throw std::runtime_error("Expected token " + std::string(token_type_name(type)) + + " but got " + token_type_name(current().type) + " at " + + std::to_string(current().line) + ":" + + std::to_string(current().col)); } + advance(); + return true; + } - bool expect(Token::Type type) { - if (current().type != type) { - std::cerr << "Expected token " << type << " but got " << current().type - << " at " << current().line << ":" << current().col << "\n"; - return false; - } - advance(); - return true; + bool is_right_associative(Token::Type type) { return type == Token::IMPL; } + + // Get operator precedence (higher = tighter binding) + int get_precedence(Token::Type type) { + switch (type) { + case Token::MERGE: + return 1; // low precedence - binds loosely, but must be > 0 to be recognized as binary op + case Token::OR: + return 1; + case Token::AND: + return 2; + case Token::IMPL: + return 3; + case Token::EQEQ: + case Token::NE: + return 4; + case Token::LT: + case Token::GT: + case Token::LE: + case Token::GE: + return 5; + case Token::CONCAT: + return 6; + case Token::PLUS: + case Token::MINUS: + return 7; + case Token::STAR: + case Token::SLASH: + return 8; + default: + return 0; } + } - // Get operator precedence (higher = tighter binding) - int get_precedence(Token::Type type) { - switch (type) { - case Token::OR: return 1; - case Token::AND: return 2; - case Token::IMPL: return 3; - case Token::EQEQ: case Token::NE: return 4; - case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5; - case Token::CONCAT: return 6; - case Token::PLUS: case Token::MINUS: return 7; - case Token::STAR: case Token::SLASH: return 8; - default: return 0; - } + // Convert token type to binary operator + BinaryOp token_to_binop(Token::Type type) { + switch (type) { + case Token::PLUS: + return BinaryOp::ADD; + case Token::MINUS: + return BinaryOp::SUB; + case Token::STAR: + return BinaryOp::MUL; + case Token::SLASH: + return BinaryOp::DIV; + case Token::CONCAT: + return BinaryOp::CONCAT; + case Token::MERGE: + return BinaryOp::MERGE; + case Token::EQEQ: + return BinaryOp::EQ; + case Token::NE: + return BinaryOp::NE; + case Token::LT: + return BinaryOp::LT; + case Token::GT: + return BinaryOp::GT; + case Token::LE: + return BinaryOp::LE; + case Token::GE: + return BinaryOp::GE; + case Token::AND: + return BinaryOp::AND; + case Token::OR: + return BinaryOp::OR; + case Token::IMPL: + return BinaryOp::IMPL; + default: + throw std::runtime_error("Invalid binary operator"); } + } - // Convert token type to binary operator - BinaryOp token_to_binop(Token::Type type) { - switch (type) { - case Token::PLUS: return BinaryOp::ADD; - case Token::MINUS: return BinaryOp::SUB; - case Token::STAR: return BinaryOp::MUL; - case Token::SLASH: return BinaryOp::DIV; - case Token::CONCAT: return BinaryOp::CONCAT; - case Token::EQEQ: return BinaryOp::EQ; - case Token::NE: return BinaryOp::NE; - case Token::LT: return BinaryOp::LT; - case Token::GT: return BinaryOp::GT; - case Token::LE: return BinaryOp::LE; - case Token::GE: return BinaryOp::GE; - case Token::AND: return BinaryOp::AND; - case Token::OR: return BinaryOp::OR; - case Token::IMPL: return BinaryOp::IMPL; - default: throw std::runtime_error("Invalid binary operator"); - } + std::shared_ptr parse_expr() { + // Try to parse lambda + auto lambda = try_parse_lambda(); + if (lambda) + return lambda; + + if (consume(Token::IF)) { + auto cond = parse_expr(); + expect(Token::THEN); + auto then = parse_expr(); + expect(Token::ELSE); + auto else_ = parse_expr(); + return std::make_shared(IfNode(cond, then, else_)); } - - std::shared_ptr parse_expr() { - // Try to parse lambda - auto lambda = try_parse_lambda(); - if (lambda) return lambda; - - if (consume(Token::IF)) { - auto cond = parse_expr(); - expect(Token::THEN); - auto then = parse_expr(); - expect(Token::ELSE); - auto else_ = parse_expr(); - return std::make_shared(IfNode(cond, then, else_)); - } - if (consume(Token::LET)) { - bool is_rec = consume(Token::REC); - std::vector>> bindings; - parse_bindings(bindings); - expect(Token::IN); - auto body = parse_expr(); - - if (is_rec) { - auto letrec = LetRecNode(body); - letrec.bindings = std::move(bindings); - return std::make_shared(std::move(letrec)); - } else { - auto let = LetNode(body); - let.bindings = std::move(bindings); - return std::make_shared(std::move(let)); - } - } - if (consume(Token::ASSERT)) { - auto cond = parse_expr(); - expect(Token::SEMICOLON); - auto body = parse_expr(); - return std::make_shared(AssertNode(cond, body)); - } - if (consume(Token::WITH)) { - auto attrs = parse_expr(); - expect(Token::SEMICOLON); - auto body = parse_expr(); - return std::make_shared(WithNode(attrs, body)); - } - - return parse_expr1(); - } - - std::shared_ptr parse_expr1() { - return parse_binary_op(0); - } - - // Precedence climbing for binary operators - std::shared_ptr parse_binary_op(int min_prec) { - auto left = parse_selection(); - - while (true) { - int prec = get_precedence(current().type); - if (prec == 0 || prec < min_prec) break; - - Token op_token = current(); - advance(); - - auto right = parse_binary_op(prec + 1); - left = std::make_shared(BinaryOpNode( - token_to_binop(op_token.type), - left, - right - )); - } - - return left; - } - - std::shared_ptr parse_selection() { - auto left = parse_expr2(); - - while (current().type == Token::DOT) { - advance(); - Token name = current(); - if (name.type == Token::IDENT) { - advance(); - auto attr = std::make_shared(ConstStringNode(name.value)); - auto result = std::make_shared(SelectNode(left, attr)); - - if (consume(Token::DOT)) { - Token name2 = current(); - if (name2.type == Token::IDENT) { - advance(); - auto attr2 = std::make_shared(ConstStringNode(name2.value)); - auto* curr = result->get_if(); - while (curr && consume(Token::DOT)) { - Token n = current(); - expect(Token::IDENT); - auto a = std::make_shared(ConstStringNode(n.value)); - curr->attr = std::make_shared(AppNode( - std::make_shared(AppNode(curr->attr, a)), - std::make_shared(ConstNullNode()) - )); - } - } - } - return result; - } else if (consume(Token::LBRACE)) { - auto result = std::make_shared(SelectNode(left, std::make_shared(ConstStringNode(name.value)))); - parse_expr_attrs(result); - expect(Token::RBRACE); - return result; - } - return left; - } - - return left; - } - - void parse_expr_attrs(std::shared_ptr&) { - // Extended selection syntax - } - - std::shared_ptr parse_expr2() { - std::shared_ptr left = parse_expr3(); - - while (true) { - if (current().type == Token::LBRACKET) { - advance(); - auto arg = parse_expr(); - expect(Token::RBRACKET); - left = std::make_shared(AppNode(left, arg)); - } else if (current().type == Token::STRING) { - Token s = current(); - advance(); - auto arg = std::make_shared(ConstStringNode(s.value)); - left = std::make_shared(AppNode(left, arg)); - } else { - break; - } - } - - return left; - } - - std::shared_ptr parse_expr3() { - // Handle unary operators - if (consume(Token::MINUS)) { - auto operand = parse_expr3(); - return std::make_shared(UnaryOpNode(UnaryOp::NEG, operand)); - } - - if (consume(Token::NOT)) { - auto operand = parse_expr3(); - return std::make_shared(UnaryOpNode(UnaryOp::NOT, operand)); - } - - if (consume(Token::LPAREN)) { - auto expr = parse_expr(); - expect(Token::RPAREN); - return expr; - } - - if (consume(Token::LBRACE)) { - return parse_attrs(); - } - - if (consume(Token::LBRACKET)) { - return parse_list(); - } - - Token t = current(); - - if (t.type == Token::IDENT) { - advance(); - return std::make_shared(VarNode(0, t.value)); - } - - if (t.type == Token::INT) { - advance(); - return std::make_shared(ConstIntNode(std::stoll(t.value))); - } - - if (t.type == Token::STRING) { - advance(); - return std::make_shared(ConstStringNode(t.value)); - } - - if (t.type == Token::STRING_INTERP) { - Token str_token = current(); - advance(); - return parse_string_interp(str_token.value); - } - - if (t.type == Token::PATH) { - advance(); - return std::make_shared(ConstPathNode(t.value)); - } - - if (t.type == Token::BOOL) { - advance(); - return std::make_shared(ConstBoolNode(t.value == "true")); - } - - std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n"; - advance(); - return std::make_shared(ConstNullNode()); - } - - std::shared_ptr parse_attrs() { - auto attrs = AttrsetNode(false); + if (consume(Token::LET)) { + // Check for ancient let syntax: let { x = 1; body = x; } + if (current().type == Token::LBRACE) { + advance(); // consume { + std::vector>> bindings; + std::shared_ptr body_expr; while (current().type != Token::RBRACE && current().type != Token::EOF_) { - if (consume(Token::REC)) { - attrs.recursive = true; - continue; - } + if (current().type != Token::IDENT && current().type != Token::STRING && + current().type != Token::INDENTED_STRING) { + throw std::runtime_error("Expected identifier in ancient let"); + } - // Handle inherit keyword - if (consume(Token::INHERIT)) { - std::shared_ptr source; + std::string name = current().value; + advance(); + expect(Token::EQUALS); + auto value = parse_expr(); + expect(Token::SEMICOLON); - // Check for (expr) form - if (consume(Token::LPAREN)) { - source = parse_expr(); - expect(Token::RPAREN); - } - - // Parse identifier list - while (current().type == Token::IDENT) { - Token name = current(); - advance(); - - if (source) { - // inherit (expr) x → x = expr.x - auto select = std::make_shared(SelectNode( - source, - std::make_shared(ConstStringNode(name.value)) - )); - attrs.attrs.push_back({name.value, select}); - } else { - // inherit x → x = x - auto var = std::make_shared(VarNode(0, name.value)); - attrs.attrs.push_back({name.value, var}); - } - } - - expect(Token::SEMICOLON); - continue; - } - - if (current().type == Token::IDENT || current().type == Token::STRING) { - Token key = current(); - advance(); - std::string key_str = key.value; - - if (consume(Token::EQUALS)) { - auto value = parse_expr(); - attrs.attrs.push_back({key_str, value}); - } else if (consume(Token::AT)) { - auto pattern = parse_expr(); - auto value = parse_expr(); - attrs.attrs.push_back({key_str, value}); - } - } - - if (consume(Token::COMMA)) continue; - if (consume(Token::SEMICOLON)) continue; - - // If we get here and haven't handled the token, break - if (current().type != Token::RBRACE && current().type != Token::EOF_) { - break; - } + // Check if this is the special 'body' binding + if (name == "body") { + body_expr = value; + } else { + bindings.push_back({name, value}); + } } expect(Token::RBRACE); - return std::make_shared(std::move(attrs)); + + if (!body_expr) { + throw std::runtime_error("Ancient let syntax requires 'body' attribute"); + } + + // Ancient let is always recursive + auto letrec = LetRecNode(body_expr); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } + + // Modern let syntax: let x = 1; in x + bool is_rec = consume(Token::REC); + std::vector>> bindings; + parse_bindings(bindings); + expect(Token::IN); + auto body = parse_expr(); + + if (is_rec) { + auto letrec = LetRecNode(body); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } else { + auto let = LetNode(body); + let.bindings = std::move(bindings); + return std::make_shared(std::move(let)); + } + } + if (consume(Token::ASSERT)) { + auto cond = parse_expr(); + expect(Token::SEMICOLON); + auto body = parse_expr(); + return std::make_shared(AssertNode(cond, body)); + } + if (consume(Token::WITH)) { + auto attrs = parse_expr(); + expect(Token::SEMICOLON); + auto body = parse_expr(); + return std::make_shared(WithNode(attrs, body)); } - std::shared_ptr parse_list() { - std::shared_ptr list = std::make_shared(ConstNullNode()); + return parse_expr1(); + } - if (consume(Token::RBRACKET)) { - return list; - } + std::shared_ptr parse_expr1() { return parse_binary_op(0); } - std::vector> elements; - while (current().type != Token::RBRACKET) { - elements.push_back(parse_expr()); - if (!consume(Token::COMMA)) break; - } - expect(Token::RBRACKET); + // Precedence climbing for binary operators + std::shared_ptr parse_binary_op(int min_prec) { + auto left = parse_selection(); - for (auto it = elements.rbegin(); it != elements.rend(); ++it) { - list = std::make_shared(AppNode( - std::make_shared(AppNode( - std::make_shared(VarNode(0, "__list")), - *it - )), - list - )); - } + while (true) { + int prec = get_precedence(current().type); + if (prec == 0 || prec < min_prec) + break; - return list; + Token op_token = current(); + advance(); + + int next_prec = is_right_associative(op_token.type) ? prec : prec + 1; + auto right = parse_binary_op(next_prec); + left = std::make_shared(BinaryOpNode(token_to_binop(op_token.type), left, right)); } - void parse_bindings(std::vector>>& bindings) { - while (current().type == Token::IDENT || current().type == Token::INHERIT) { - // Handle inherit keyword - if (consume(Token::INHERIT)) { - std::shared_ptr source; + return left; + } - // Check for (expr) form - if (consume(Token::LPAREN)) { - source = parse_expr(); - expect(Token::RPAREN); - } + std::shared_ptr parse_selection() { + auto left = parse_expr2(); - // Parse identifier list - while (current().type == Token::IDENT) { - Token name = current(); - advance(); - - if (source) { - // inherit (expr) x → x = expr.x - auto select = std::make_shared(SelectNode( - source, - std::make_shared(ConstStringNode(name.value)) - )); - bindings.push_back({name.value, select}); - } else { - // inherit x → x = x - auto var = std::make_shared(VarNode(0, name.value)); - bindings.push_back({name.value, var}); - } - } - - expect(Token::SEMICOLON); - continue; - } - - if (current().type != Token::IDENT) break; - Token key = current(); - advance(); - - if (consume(Token::AT)) { - auto pattern = parse_expr(); - auto value = parse_expr(); - bindings.push_back({key.value, value}); - } else { - expect(Token::EQUALS); - auto value = parse_expr(); - bindings.push_back({key.value, value}); - } - - if (!consume(Token::SEMICOLON)) break; - } + while (current().type == Token::DOT) { + advance(); + Token name = current(); + if (name.type == Token::IDENT) { + advance(); + auto attr = std::make_shared(ConstStringNode(name.value)); + left = std::make_shared(SelectNode(left, attr)); + // Continue loop to handle multi-dot selections (a.b.c) + continue; + } + // If we get here, the token after DOT was not IDENT + // This is a parse error, but we'll just return what we have + break; } - // Try to parse lambda, return nullptr if not a lambda - std::shared_ptr try_parse_lambda() { - size_t saved_pos = pos; + // Check for 'or' default value: a.b or default + // This is checked after all selections, so works for any selection depth + // 'or' is contextual - only special after a selection expression + if (left->get_if() && current().type == Token::IDENT && current().value == "or") { + advance(); + // Parse default as a primary expression + auto default_expr = parse_expr3(); + // Update the SelectNode with the default expression + auto* select = left->get_if(); + select->default_expr = default_expr; + } - // Check for named pattern: arg@{ ... }: - std::optional named_arg; - if (current().type == Token::IDENT) { - Token name = current(); - advance(); - if (consume(Token::AT)) { - named_arg = name.value; - } else if (consume(Token::COLON)) { - // Simple lambda: x: body - auto body = parse_expr(); - auto lambda = LambdaNode(1, body); - lambda.param_name = name.value; - return std::make_shared(std::move(lambda)); - } else { - // Not a lambda, restore position - pos = saved_pos; - return nullptr; - } + return left; + } + + std::shared_ptr parse_expr2() { + std::shared_ptr left = parse_expr3(); + + while (true) { + if (current().type == Token::STRING) { + Token s = current(); + advance(); + auto arg = std::make_shared(ConstStringNode(s.value)); + left = std::make_shared(AppNode(left, arg)); + } else if (current().type == Token::LPAREN) { + // Function application with parenthesized argument: func (expr) + advance(); + auto arg = parse_expr(); + expect(Token::RPAREN); + left = std::make_shared(AppNode(left, arg)); + } else if (current().type == Token::IDENT || current().type == Token::INT || + current().type == Token::FLOAT || current().type == Token::BOOL || + current().type == Token::PATH || current().type == Token::LOOKUP_PATH || + current().type == Token::URI || current().type == Token::LBRACKET || + current().type == Token::LBRACE) { + // Juxtaposition application: f x + // Parse the argument as a primary expression (which handles lists, attrsets, etc.) + auto arg = parse_expr3(); + left = std::make_shared(AppNode(left, arg)); + } else { + break; + } + } + + return left; + } + + std::shared_ptr parse_expr3() { + // Handle import expression + if (consume(Token::IMPORT)) { + auto path_expr = parse_expr3(); + return std::make_shared(ImportNode(path_expr)); + } + + // Handle unary operators + if (consume(Token::MINUS)) { + auto operand = parse_expr3(); + return std::make_shared(UnaryOpNode(UnaryOp::NEG, operand)); + } + + if (consume(Token::NOT)) { + auto operand = parse_expr3(); + return std::make_shared(UnaryOpNode(UnaryOp::NOT, operand)); + } + + if (consume(Token::LPAREN)) { + auto expr = parse_expr(); + expect(Token::RPAREN); + return expr; + } + + // Handle rec { ... } syntax + if (consume(Token::REC)) { + expect(Token::LBRACE); + auto attrs = parse_attrs(); + if (auto* attrset = attrs->get_if()) { + attrset->recursive = true; + } + return attrs; + } + + if (consume(Token::LBRACE)) { + return parse_attrs(); + } + + if (consume(Token::LBRACKET)) { + return parse_list(); + } + + Token t = current(); + + if (t.type == Token::IDENT) { + advance(); + return std::make_shared(VarNode(0, t.value)); + } + + if (t.type == Token::INT) { + advance(); + return std::make_shared(ConstIntNode(std::stoll(t.value))); + } + + if (t.type == Token::FLOAT) { + advance(); + return std::make_shared(ConstFloatNode(std::stod(t.value))); + } + + if (t.type == Token::URI) { + advance(); + return std::make_shared(ConstURINode(t.value)); + } + + if (t.type == Token::STRING) { + advance(); + return std::make_shared(ConstStringNode(t.value)); + } + + if (t.type == Token::STRING_INTERP) { + Token str_token = current(); + advance(); + return parse_string_interp(str_token.value); + } + + if (t.type == Token::INDENTED_STRING) { + advance(); + return std::make_shared(ConstStringNode(t.value)); + } + + if (t.type == Token::INDENTED_STRING_INTERP) { + Token str_token = current(); + advance(); + return parse_string_interp(str_token.value); + } + + if (t.type == Token::PATH) { + advance(); + return std::make_shared(ConstPathNode(t.value)); + } + + if (t.type == Token::LOOKUP_PATH) { + advance(); + return std::make_shared(ConstLookupPathNode(t.value)); + } + + if (t.type == Token::BOOL) { + advance(); + return std::make_shared(ConstBoolNode(t.value == "true")); + } + + throw std::runtime_error("Unknown token: " + t.value + " (type " + std::to_string(t.type) + + ")"); + } + + std::shared_ptr parse_attrs() { + auto attrs = AttrsetNode(false); + + while (current().type != Token::RBRACE && current().type != Token::EOF_) { + if (consume(Token::REC)) { + attrs.recursive = true; + continue; + } + + // Handle inherit keyword + if (consume(Token::INHERIT)) { + std::shared_ptr source; + + // Check for (expr) form + if (consume(Token::LPAREN)) { + source = parse_expr(); + expect(Token::RPAREN); } - // Check for pattern: { ... }: - if (current().type == Token::LBRACE) { - advance(); + // Parse identifier list + while (current().type == Token::IDENT) { + Token name = current(); + advance(); - // Parse pattern fields - struct Field { - std::string name; - std::optional> default_val; - }; - std::vector fields; - bool has_ellipsis = false; - - while (current().type != Token::RBRACE && current().type != Token::EOF_) { - if (consume(Token::ELLIPSIS)) { - has_ellipsis = true; - if (consume(Token::COMMA)) continue; - break; - } - - if (current().type == Token::IDENT) { - Token field_name = current(); - advance(); - - Field field; - field.name = field_name.value; - - // Check for default value - if (consume(Token::QUESTION)) { - field.default_val = parse_expr(); - } - - fields.push_back(field); - - if (consume(Token::COMMA)) continue; - break; - } else { - break; - } - } - - if (!consume(Token::RBRACE)) { - // Not a lambda pattern, restore - pos = saved_pos; - return nullptr; - } - - if (!consume(Token::COLON)) { - // Not a lambda, restore - pos = saved_pos; - return nullptr; - } - - // Parse body - auto body = parse_expr(); - - // Desugar pattern to lambda with let bindings - // { a, b ? x }: body → arg: let a = arg.a; b = if arg ? a then arg.a else x; in body - - std::string arg_name = named_arg.value_or("_arg"); - auto arg_var = std::make_shared(VarNode(0, arg_name)); - - std::vector>> bindings; - - for (const auto& field : fields) { - // Create arg.field selection - auto select = std::make_shared(SelectNode( - arg_var, - std::make_shared(ConstStringNode(field.name)) - )); - - if (field.default_val) { - // if arg ? field then arg.field else default - auto has_attr = std::make_shared(HasAttrNode( - arg_var, - std::make_shared(ConstStringNode(field.name)) - )); - auto if_node = std::make_shared(IfNode( - has_attr, - select, - *field.default_val - )); - bindings.push_back({field.name, if_node}); - } else { - bindings.push_back({field.name, select}); - } - } - - // If named pattern, also bind the argument name - if (named_arg) { - bindings.push_back({*named_arg, arg_var}); - } - - // Create let expression - auto let = LetNode(body); - let.bindings = std::move(bindings); - auto let_node = std::make_shared(std::move(let)); - - // Create lambda - auto lambda = LambdaNode(1, let_node); - lambda.param_name = arg_name; - lambda.strict_pattern = !has_ellipsis; - return std::make_shared(std::move(lambda)); + if (source) { + // inherit (expr) x → x = expr.x + auto select = std::make_shared( + SelectNode(source, std::make_shared(ConstStringNode(name.value)))); + attrs.attrs.push_back(AttrBinding(name.value, select)); + } else { + // inherit x → x = x + auto var = std::make_shared(VarNode(0, name.value)); + attrs.attrs.push_back(AttrBinding(name.value, var)); + } } - // Not a lambda + expect(Token::SEMICOLON); + continue; + } + + // Check for dynamic attribute name: ${expr} = value + if (current().type == Token::STRING_INTERP || + current().type == Token::INDENTED_STRING_INTERP) { + Token str_token = current(); + advance(); + auto name_expr = parse_string_interp(str_token.value); + + if (consume(Token::EQUALS)) { + auto value = parse_expr(); + // Dynamic attribute - name is evaluated at runtime + attrs.attrs.push_back(AttrBinding(name_expr, value)); + } + } else if (current().type == Token::IDENT || current().type == Token::STRING || + current().type == Token::INDENTED_STRING) { + // Parse attribute path: a.b.c = value + std::vector path; + path.push_back(current().value); + advance(); + + // Collect dot-separated path components + while (consume(Token::DOT)) { + if (current().type == Token::IDENT || current().type == Token::STRING || + current().type == Token::INDENTED_STRING) { + path.push_back(current().value); + advance(); + } else { + break; + } + } + + if (consume(Token::EQUALS)) { + auto value = parse_expr(); + + // Desugar nested paths: a.b.c = v becomes a = { b = { c = v; }; } + if (path.size() == 1) { + // Simple case: just one key + attrs.attrs.push_back(AttrBinding(path[0], value)); + } else { + // Nested case: build nested attrsets from right to left + auto nested = value; + for (int i = path.size() - 1; i > 0; i--) { + auto inner_attrs = AttrsetNode(false); + inner_attrs.attrs.push_back(AttrBinding(path[i], nested)); + nested = std::make_shared(std::move(inner_attrs)); + } + attrs.attrs.push_back(AttrBinding(path[0], nested)); + } + } else if (consume(Token::AT)) { + // @ pattern - not affected by nested paths + auto pattern = parse_expr(); + auto value = parse_expr(); + attrs.attrs.push_back(AttrBinding(path[0], value)); + } + } + + if (consume(Token::COMMA)) + continue; + if (consume(Token::SEMICOLON)) + continue; + + // If we get here and haven't handled the token, break + if (current().type != Token::RBRACE && current().type != Token::EOF_) { + break; + } + } + + expect(Token::RBRACE); + return std::make_shared(std::move(attrs)); + } + + // Parse a list element: supports selections but NOT juxtaposition application + // This prevents [1 2 3] from being parsed as ((1 2) 3) + std::shared_ptr parse_list_element() { + auto left = parse_expr3(); + + // Handle selections (a.b.c) + while (current().type == Token::DOT) { + advance(); + Token name = current(); + if (name.type == Token::IDENT) { + advance(); + auto attr = std::make_shared(ConstStringNode(name.value)); + left = std::make_shared(SelectNode(left, attr)); + continue; + } + break; + } + + // Check for 'or' default value + if (left->get_if() && current().type == Token::IDENT && current().value == "or") { + advance(); + auto default_expr = parse_expr3(); + auto* select = left->get_if(); + select->default_expr = default_expr; + } + + return left; + } + + std::shared_ptr parse_list() { + std::vector> elements; + + if (consume(Token::RBRACKET)) { + return std::make_shared(ListNode(elements)); + } + + while (current().type != Token::RBRACKET && current().type != Token::EOF_) { + elements.push_back(parse_list_element()); + if (current().type == Token::RBRACKET) { + break; + } + } + + expect(Token::RBRACKET); + return std::make_shared(ListNode(elements)); + } + + void parse_bindings(std::vector>>& bindings) { + while (current().type == Token::IDENT || current().type == Token::INHERIT) { + // Handle inherit keyword + if (consume(Token::INHERIT)) { + std::shared_ptr source; + + // Check for (expr) form + if (consume(Token::LPAREN)) { + source = parse_expr(); + expect(Token::RPAREN); + } + + // Parse identifier list + while (current().type == Token::IDENT) { + Token name = current(); + advance(); + + if (source) { + // inherit (expr) x → x = expr.x + auto select = std::make_shared( + SelectNode(source, std::make_shared(ConstStringNode(name.value)))); + bindings.push_back({name.value, select}); + } else { + // inherit x → x = x + auto var = std::make_shared(VarNode(0, name.value)); + bindings.push_back({name.value, var}); + } + } + + expect(Token::SEMICOLON); + continue; + } + + if (current().type != Token::IDENT) + break; + Token key = current(); + advance(); + + if (consume(Token::AT)) { + auto pattern = parse_expr(); + auto value = parse_expr(); + bindings.push_back({key.value, value}); + } else { + expect(Token::EQUALS); + auto value = parse_expr(); + bindings.push_back({key.value, value}); + } + + if (!consume(Token::SEMICOLON)) + break; + } + } + + // Try to parse lambda, return nullptr if not a lambda + std::shared_ptr try_parse_lambda() { + size_t saved_pos = pos; + + // Check for named pattern: arg@{ ... }: + std::optional named_arg; + if (current().type == Token::IDENT) { + Token name = current(); + advance(); + if (consume(Token::AT)) { + named_arg = name.value; + } else if (consume(Token::COLON)) { + // Simple lambda: x: body + auto body = parse_expr(); + auto lambda = LambdaNode(1, body); + lambda.param_name = name.value; + return std::make_shared(std::move(lambda)); + } else { + // Not a lambda, restore position pos = saved_pos; return nullptr; + } } - std::shared_ptr parse_string_interp(const std::string& raw) { - std::vector> parts; - size_t i = 0; - std::string current_str; + // Check for pattern: { ... }: + if (current().type == Token::LBRACE) { + advance(); - while (i < raw.size()) { - if (raw[i] == '$' && i + 1 < raw.size() && raw[i + 1] == '{') { - // Save current string part if any - if (!current_str.empty()) { - parts.push_back(std::make_shared(ConstStringNode(current_str))); - current_str.clear(); - } + // Parse pattern fields + struct Field { + std::string name; + std::optional> default_val; + }; + std::vector fields; + bool has_ellipsis = false; - // Find matching } - i += 2; // Skip ${ - int depth = 1; - size_t expr_start = i; - bool in_string = false; - char string_quote = 0; - - while (i < raw.size() && depth > 0) { - if (!in_string) { - if (raw[i] == '"' || raw[i] == '\'') { - in_string = true; - string_quote = raw[i]; - } else if (raw[i] == '{') { - depth++; - } else if (raw[i] == '}') { - depth--; - } - } else { - if (raw[i] == string_quote && (i == 0 || raw[i-1] != '\\')) { - in_string = false; - } else if (raw[i] == '\\') { - i++; - } - } - if (depth > 0) i++; - } - - if (depth > 0) { - throw std::runtime_error("unterminated ${ in string interpolation"); - } - - // Parse the expression - std::string expr_str = raw.substr(expr_start, i - expr_start); - - // Tokenize and parse the expression - Lexer lexer(expr_str); - auto expr_tokens = lexer.tokenize(); - - // Save current state - auto saved_tokens = tokens; - auto saved_pos = pos; - - // Parse expression - tokens = expr_tokens; - pos = 0; - auto expr = parse_expr(); - - // Restore state - tokens = saved_tokens; - pos = saved_pos; - - // Convert to string using toString builtin - auto to_string = std::make_shared(VarNode(0, "toString")); - auto str_expr = std::make_shared(AppNode(to_string, expr)); - parts.push_back(str_expr); - - i++; // Skip } - } else { - current_str += raw[i]; - i++; - } + while (current().type != Token::RBRACE && current().type != Token::EOF_) { + if (consume(Token::ELLIPSIS)) { + has_ellipsis = true; + if (consume(Token::COMMA)) + continue; + break; } - // Add remaining string part + if (current().type == Token::IDENT) { + Token field_name = current(); + advance(); + + Field field; + field.name = field_name.value; + + // Check for default value + if (consume(Token::QUESTION)) { + field.default_val = parse_expr(); + } + + fields.push_back(field); + + if (consume(Token::COMMA)) + continue; + break; + } else { + break; + } + } + + if (!consume(Token::RBRACE)) { + // Not a lambda pattern, restore + pos = saved_pos; + return nullptr; + } + + if (!consume(Token::COLON)) { + // Not a lambda, restore + pos = saved_pos; + return nullptr; + } + + // Parse body + auto body = parse_expr(); + + // Create LambdaPatternNode instead of desugaring + auto pattern = LambdaPatternNode(body); + pattern.allow_extra = has_ellipsis; + pattern.at_binding = named_arg; + + // Separate required and optional fields + for (const auto& field : fields) { + PatternField pf(field.name, field.default_val); + if (field.default_val) { + pattern.optional_fields.push_back(std::move(pf)); + } else { + pattern.required_fields.push_back(std::move(pf)); + } + } + + return std::make_shared(std::move(pattern)); + } + + // Not a lambda + pos = saved_pos; + return nullptr; + } + + std::shared_ptr parse_string_interp(const std::string& raw) { + std::vector parts; + size_t i = 0; + std::string current_str; + + while (i < raw.size()) { + if (raw[i] == '$' && i + 1 < raw.size() && raw[i + 1] == '{') { + // Save current string part if any if (!current_str.empty()) { - parts.push_back(std::make_shared(ConstStringNode(current_str))); + parts.push_back(StringPart::make_literal(current_str)); + current_str.clear(); } - // Build concatenation tree - if (parts.empty()) { - return std::make_shared(ConstStringNode("")); + // Find matching } + i += 2; // Skip ${ + int depth = 1; + size_t expr_start = i; + bool in_string = false; + char string_quote = 0; + + while (i < raw.size() && depth > 0) { + if (!in_string) { + if (raw[i] == '"' || raw[i] == '\'') { + in_string = true; + string_quote = raw[i]; + } else if (raw[i] == '{') { + depth++; + } else if (raw[i] == '}') { + depth--; + } + } else { + if (raw[i] == string_quote && (i == 0 || raw[i - 1] != '\\')) { + in_string = false; + } else if (raw[i] == '\\') { + i++; + } + } + if (depth > 0) + i++; } - auto result = parts[0]; - for (size_t j = 1; j < parts.size(); j++) { - // Use ADD (+) for string concatenation; CONCAT (++) is Nix list concatenation - result = std::make_shared(BinaryOpNode(BinaryOp::ADD, result, parts[j])); + if (depth > 0) { + throw std::runtime_error("unterminated ${ in string interpolation"); } - return result; + // Parse the expression + std::string expr_str = raw.substr(expr_start, i - expr_start); + + // Tokenize and parse the expression + Lexer lexer(expr_str); + auto expr_tokens = lexer.tokenize(); + + // Save current state + auto saved_tokens = tokens; + auto saved_pos = pos; + + // Parse expression + tokens = expr_tokens; + pos = 0; + auto expr = parse_expr(); + + // Restore state + tokens = saved_tokens; + pos = saved_pos; + + // Add expression part (will be coerced to string during evaluation) + parts.push_back(StringPart::make_expr(expr)); + + i++; // Skip } + } else { + current_str += raw[i]; + i++; + } } + + // Add remaining string part + if (!current_str.empty()) { + parts.push_back(StringPart::make_literal(current_str)); + } + + // Return StringInterpolationNode + if (parts.empty()) { + return std::make_shared(ConstStringNode("")); + } + + // If only one literal part, return it directly as ConstStringNode + if (parts.size() == 1 && parts[0].type == StringPart::Type::LITERAL) { + return std::make_shared(ConstStringNode(parts[0].literal)); + } + + // Otherwise return StringInterpolationNode + return std::make_shared(StringInterpolationNode(std::move(parts))); + } }; Parser::Parser() : pImpl(std::make_unique()) {} Parser::~Parser() = default; std::shared_ptr Parser::parse(const std::string& source, const std::string& path) { - pImpl->current_file = path; + pImpl->current_file = path; - Lexer lexer(source); - pImpl->tokens = lexer.tokenize(); - pImpl->pos = 0; + Lexer lexer(source); + pImpl->tokens = lexer.tokenize(); + pImpl->pos = 0; - return pImpl->parse_expr(); + return pImpl->parse_expr(); } std::shared_ptr Parser::parse_file(const std::string& path) { - std::string content = read_file(path); - return parse(content, path); + std::string content = read_file(path); + return parse(content, path); } -} +} // namespace nix_irc diff --git a/src/irc/parser.h b/src/irc/parser.h index 7bb97c8..a5918dc 100644 --- a/src/irc/parser.h +++ b/src/irc/parser.h @@ -2,24 +2,24 @@ #define NIX_IRC_PARSER_H #include "types.h" -#include #include +#include namespace nix_irc { class Parser { public: - Parser(); - ~Parser(); - - std::shared_ptr parse(const std::string& source, const std::string& path = ""); - std::shared_ptr parse_file(const std::string& path); - + Parser(); + ~Parser(); + + std::shared_ptr parse(const std::string& source, const std::string& path = ""); + std::shared_ptr parse_file(const std::string& path); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/resolver.cpp b/src/irc/resolver.cpp index f57cf64..37dffcc 100644 --- a/src/irc/resolver.cpp +++ b/src/irc/resolver.cpp @@ -1,111 +1,114 @@ #include "resolver.h" #include "parser.h" -#include -#include -#include #include +#include +#include #include +#include namespace nix_irc { namespace fs = std::filesystem; struct Resolver::Impl { - ResolverConfig config; - std::vector> resolved_imports; - std::unordered_set visited; - Parser parser; - - Impl(const ResolverConfig& cfg) : config(cfg) {} - - std::string resolve_path(const std::string& path, const std::string& from_file) { - fs::path p(path); - - if (p.is_absolute()) { - if (fs::exists(p)) return path; - return ""; - } - - fs::path from_dir = fs::path(from_file).parent_path(); - fs::path candidate = from_dir / p; - if (fs::exists(candidate)) return candidate.string(); - - for (const auto& search : config.search_paths) { - candidate = fs::path(search) / p; - if (fs::exists(candidate)) return candidate.string(); - } - - return ""; + ResolverConfig config; + std::vector> resolved_imports; + std::unordered_set visited; + Parser parser; + + Impl(const ResolverConfig& cfg) : config(cfg) {} + + std::string resolve_path(const std::string& path, const std::string& from_file) { + fs::path p(path); + + if (p.is_absolute()) { + if (fs::exists(p)) + return path; + return ""; } - - ImportResult do_resolve(const std::string& path, const std::string& from_file) { - std::string resolved = resolve_path(path, from_file); - - if (resolved.empty()) { - return {false, "", "Cannot find file: " + path, nullptr}; - } - - if (visited.count(resolved)) { - return {true, resolved, "", nullptr}; - } - visited.insert(resolved); - - try { - auto ast = parser.parse_file(resolved); - return {true, resolved, "", ast}; - } catch (const std::exception& e) { - return {false, "", e.what(), nullptr}; - } + + fs::path from_dir = fs::path(from_file).parent_path(); + fs::path candidate = from_dir / p; + if (fs::exists(candidate)) + return candidate.string(); + + for (const auto& search : config.search_paths) { + candidate = fs::path(search) / p; + if (fs::exists(candidate)) + return candidate.string(); } + + return ""; + } + + ImportResult do_resolve(const std::string& path, const std::string& from_file) { + std::string resolved = resolve_path(path, from_file); + + if (resolved.empty()) { + return {false, "", "Cannot find file: " + path, nullptr}; + } + + if (visited.count(resolved)) { + return {true, resolved, "", nullptr}; + } + visited.insert(resolved); + + try { + auto ast = parser.parse_file(resolved); + return {true, resolved, "", ast}; + } catch (const std::exception& e) { + return {false, "", e.what(), nullptr}; + } + } }; Resolver::Resolver(const ResolverConfig& config) : pImpl(std::make_unique(config)) {} Resolver::~Resolver() = default; void Resolver::add_search_path(const std::string& path) { - pImpl->config.search_paths.push_back(path); + pImpl->config.search_paths.push_back(path); } void Resolver::set_search_paths(const std::vector& paths) { - pImpl->config.search_paths = paths; + pImpl->config.search_paths = paths; } ImportResult Resolver::resolve_import(const std::string& path, const std::string& from_file) { - auto result = pImpl->do_resolve(path, from_file); - if (result.success && result.ast) { - pImpl->resolved_imports.push_back({path, result.path}); - } - return result; + auto result = pImpl->do_resolve(path, from_file); + if (result.success && result.ast) { + pImpl->resolved_imports.push_back({path, result.path}); + } + return result; } ImportResult Resolver::resolve_import(const Node& import_node, const std::string& from_file) { - const ConstPathNode* path_node = import_node.get_if(); - if (!path_node) { - return {false, "", "Dynamic import not supported", nullptr}; - } - return resolve_import(path_node->value, from_file); + const ConstPathNode* path_node = import_node.get_if(); + if (!path_node) { + return {false, "", "Dynamic import not supported", nullptr}; + } + return resolve_import(path_node->value, from_file); } std::vector Resolver::get_resolved_files() const { - std::vector files; - for (const auto& [orig, resolved] : pImpl->resolved_imports) { - (void)orig; - files.push_back(resolved); - } - return files; + std::vector files; + for (const auto& [orig, resolved] : pImpl->resolved_imports) { + (void) orig; + files.push_back(resolved); + } + return files; } std::vector> Resolver::get_imports() const { - return pImpl->resolved_imports; + return pImpl->resolved_imports; } bool is_static_import(const Node& node) { - return node.holds(); + return node.holds(); } std::string normalize_path(const std::string& path) { - fs::path p(path); - return fs::absolute(p).string(); + fs::path p(path); + return fs::absolute(p).string(); } -} +} // namespace nix_irc diff --git a/src/irc/resolver.h b/src/irc/resolver.h index 39f8d40..89167d7 100644 --- a/src/irc/resolver.h +++ b/src/irc/resolver.h @@ -2,47 +2,47 @@ #define NIX_IRC_RESOLVER_H #include "types.h" -#include -#include -#include #include +#include +#include +#include namespace nix_irc { struct ImportResult { - bool success; - std::string path; - std::string error; - std::shared_ptr ast; + bool success; + std::string path; + std::string error; + std::shared_ptr ast; }; struct ResolverConfig { - std::vector search_paths; - bool resolve_imports = true; + std::vector search_paths; + bool resolve_imports = true; }; class Resolver { public: - Resolver(const ResolverConfig& config = {}); - ~Resolver(); - - void add_search_path(const std::string& path); - void set_search_paths(const std::vector& paths); - - ImportResult resolve_import(const std::string& path, const std::string& from_file); - ImportResult resolve_import(const Node& import_node, const std::string& from_file); - - std::vector get_resolved_files() const; - std::vector> get_imports() const; - + Resolver(const ResolverConfig& config = {}); + ~Resolver(); + + void add_search_path(const std::string& path); + void set_search_paths(const std::vector& paths); + + ImportResult resolve_import(const std::string& path, const std::string& from_file); + ImportResult resolve_import(const Node& import_node, const std::string& from_file); + + std::vector get_resolved_files() const; + std::vector> get_imports() const; + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; bool is_static_import(const Node& node); std::string normalize_path(const std::string& path); -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/serializer.cpp b/src/irc/serializer.cpp index 8819789..a2c894a 100644 --- a/src/irc/serializer.cpp +++ b/src/irc/serializer.cpp @@ -1,392 +1,632 @@ #include "serializer.h" #include -#include #include namespace nix_irc { struct Serializer::Impl { - std::vector buffer; + std::vector buffer; - void write_u32(uint32_t val) { - buffer.push_back((val >> 0) & 0xFF); - buffer.push_back((val >> 8) & 0xFF); - buffer.push_back((val >> 16) & 0xFF); - buffer.push_back((val >> 24) & 0xFF); + void write_u32(uint32_t val) { + buffer.push_back((val >> 0) & 0xFF); + buffer.push_back((val >> 8) & 0xFF); + buffer.push_back((val >> 16) & 0xFF); + buffer.push_back((val >> 24) & 0xFF); + } + + void write_u64(uint64_t val) { + for (int i = 0; i < 8; i++) { + buffer.push_back((val >> (i * 8)) & 0xFF); } + } - void write_u64(uint64_t val) { - for (int i = 0; i < 8; i++) { - buffer.push_back((val >> (i * 8)) & 0xFF); + void write_u8(uint8_t val) { buffer.push_back(val); } + + void write_string(const std::string& str) { + write_u32(str.size()); + buffer.insert(buffer.end(), str.begin(), str.end()); + } + + NodeType get_node_type(const Node& node) { + if (node.holds()) + return NodeType::CONST_INT; + if (node.holds()) + return NodeType::CONST_FLOAT; + if (node.holds()) + return NodeType::CONST_STRING; + if (node.holds()) + return NodeType::CONST_PATH; + if (node.holds()) + return NodeType::CONST_BOOL; + if (node.holds()) + return NodeType::CONST_NULL; + if (node.holds()) + return NodeType::CONST_URI; + if (node.holds()) + return NodeType::CONST_LOOKUP_PATH; + if (node.holds()) + return NodeType::VAR; + if (node.holds()) + return NodeType::LAMBDA; + if (node.holds()) + return NodeType::APP; + if (node.holds()) + return NodeType::BINARY_OP; + if (node.holds()) + return NodeType::UNARY_OP; + if (node.holds()) + return NodeType::IMPORT; + if (node.holds()) + return NodeType::ATTRSET; + if (node.holds()) + return NodeType::SELECT; + if (node.holds()) + return NodeType::HAS_ATTR; + if (node.holds()) + return NodeType::WITH; + if (node.holds()) + return NodeType::LIST; + if (node.holds()) + return NodeType::IF; + if (node.holds()) + return NodeType::LET; + if (node.holds()) + return NodeType::LETREC; + if (node.holds()) + return NodeType::ASSERT; + if (node.holds()) + return NodeType::LAMBDA_PATTERN; + if (node.holds()) + return NodeType::STRING_INTERPOLATION; + if (node.holds()) + return NodeType::BUILTIN_CALL; + return NodeType::ERROR; + } + + uint32_t get_node_line(const Node& node) { + return std::visit([](const auto& n) { return n.line; }, node.data); + } + + void write_node(const Node& node) { + write_u8(static_cast(get_node_type(node))); + write_u32(get_node_line(node)); + + if (auto* n = node.get_if()) { + write_u64(static_cast(n->value)); + } else if (auto* n = node.get_if()) { + double val = n->value; + uint64_t bits = 0; + std::memcpy(&bits, &val, sizeof(bits)); + write_u64(bits); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_u8(n->value ? 1 : 0); + } else if (auto* n = node.get_if()) { + // No data for null + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_string(n->value); + } else if (auto* n = node.get_if()) { + write_u32(n->index); + } else if (auto* n = node.get_if()) { + write_u32(n->arity); + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->func) + write_node(*n->func); + if (n->arg) + write_node(*n->arg); + } else if (auto* n = node.get_if()) { + write_u8(static_cast(n->op)); + if (n->left) + write_node(*n->left); + if (n->right) + write_node(*n->right); + } else if (auto* n = node.get_if()) { + write_u8(static_cast(n->op)); + if (n->operand) + write_node(*n->operand); + } else if (auto* n = node.get_if()) { + if (n->path) + write_node(*n->path); + } else if (auto* n = node.get_if()) { + write_u8(n->recursive ? 1 : 0); + write_u32(n->attrs.size()); + for (const auto& binding : n->attrs) { + if (binding.is_dynamic()) { + write_u8(1); // Dynamic flag + write_node(*binding.dynamic_name); + } else { + write_u8(0); // Static flag + write_string(binding.static_name.value()); } - } + if (binding.value) + write_node(*binding.value); + } + } else if (auto* n = node.get_if()) { + if (n->expr) + write_node(*n->expr); + if (n->attr) + write_node(*n->attr); + if (n->default_expr && *n->default_expr) { + write_u8(1); + write_node(**n->default_expr); + } else { + write_u8(0); + } + } else if (auto* n = node.get_if()) { + if (n->expr) + write_node(*n->expr); + if (n->attr) + write_node(*n->attr); + } else if (auto* n = node.get_if()) { + if (n->attrs) + write_node(*n->attrs); + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + write_u32(n->elements.size()); + for (const auto& elem : n->elements) { + if (elem) + write_node(*elem); + } + } else if (auto* n = node.get_if()) { + if (n->cond) + write_node(*n->cond); + if (n->then_branch) + write_node(*n->then_branch); + if (n->else_branch) + write_node(*n->else_branch); + } else if (auto* n = node.get_if()) { + write_u32(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + write_string(key); + if (val) + write_node(*val); + } + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + write_u32(n->bindings.size()); + for (const auto& [key, val] : n->bindings) { + write_string(key); + if (val) + write_node(*val); + } + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + if (n->cond) + write_node(*n->cond); + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + // Required fields + write_u32(n->required_fields.size()); + for (const auto& field : n->required_fields) { + write_string(field.name); + write_u8(0); // No default + } - void write_u8(uint8_t val) { - buffer.push_back(val); - } - - void write_string(const std::string& str) { - write_u32(str.size()); - buffer.insert(buffer.end(), str.begin(), str.end()); - } - - NodeType get_node_type(const Node& node) { - if (node.holds()) return NodeType::CONST_INT; - if (node.holds()) return NodeType::CONST_STRING; - if (node.holds()) return NodeType::CONST_PATH; - if (node.holds()) return NodeType::CONST_BOOL; - if (node.holds()) return NodeType::CONST_NULL; - if (node.holds()) return NodeType::VAR; - if (node.holds()) return NodeType::LAMBDA; - if (node.holds()) return NodeType::APP; - if (node.holds()) return NodeType::BINARY_OP; - if (node.holds()) return NodeType::UNARY_OP; - if (node.holds()) return NodeType::ATTRSET; - if (node.holds()) return NodeType::SELECT; - if (node.holds()) return NodeType::HAS_ATTR; - if (node.holds()) return NodeType::WITH; - if (node.holds()) return NodeType::IF; - if (node.holds()) return NodeType::LET; - if (node.holds()) return NodeType::LETREC; - if (node.holds()) return NodeType::ASSERT; - return NodeType::ERROR; - } - - uint32_t get_node_line(const Node& node) { - return std::visit([](const auto& n) { return n.line; }, node.data); - } - - void write_node(const Node& node) { - write_u8(static_cast(get_node_type(node))); - write_u32(get_node_line(node)); - - if (auto* n = node.get_if()) { - write_u64(static_cast(n->value)); - } else if (auto* n = node.get_if()) { - write_string(n->value); - } else if (auto* n = node.get_if()) { - write_string(n->value); - } else if (auto* n = node.get_if()) { - write_u8(n->value ? 1 : 0); - } else if (auto* n = node.get_if()) { - // No data for null - } else if (auto* n = node.get_if()) { - write_u32(n->index); - } else if (auto* n = node.get_if()) { - write_u32(n->arity); - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - if (n->func) write_node(*n->func); - if (n->arg) write_node(*n->arg); - } else if (auto* n = node.get_if()) { - write_u8(static_cast(n->op)); - if (n->left) write_node(*n->left); - if (n->right) write_node(*n->right); - } else if (auto* n = node.get_if()) { - write_u8(static_cast(n->op)); - if (n->operand) write_node(*n->operand); - } else if (auto* n = node.get_if()) { - write_u8(n->recursive ? 1 : 0); - write_u32(n->attrs.size()); - for (const auto& [key, val] : n->attrs) { - write_string(key); - if (val) write_node(*val); - } - } else if (auto* n = node.get_if()) { - if (n->expr) write_node(*n->expr); - if (n->attr) write_node(*n->attr); - if (n->default_expr && *n->default_expr) { - write_u8(1); - write_node(**n->default_expr); - } else { - write_u8(0); - } - } else if (auto* n = node.get_if()) { - if (n->expr) write_node(*n->expr); - if (n->attr) write_node(*n->attr); - } else if (auto* n = node.get_if()) { - if (n->attrs) write_node(*n->attrs); - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - if (n->cond) write_node(*n->cond); - if (n->then_branch) write_node(*n->then_branch); - if (n->else_branch) write_node(*n->else_branch); - } else if (auto* n = node.get_if()) { - write_u32(n->bindings.size()); - for (const auto& [key, val] : n->bindings) { - write_string(key); - if (val) write_node(*val); - } - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - write_u32(n->bindings.size()); - for (const auto& [key, val] : n->bindings) { - write_string(key); - if (val) write_node(*val); - } - if (n->body) write_node(*n->body); - } else if (auto* n = node.get_if()) { - if (n->cond) write_node(*n->cond); - if (n->body) write_node(*n->body); + // Optional fields + write_u32(n->optional_fields.size()); + for (const auto& field : n->optional_fields) { + write_string(field.name); + if (field.default_value && *field.default_value) { + write_u8(1); + write_node(**field.default_value); + } else { + write_u8(0); } + } + + // At-binding + if (n->at_binding) { + write_u8(1); + write_string(*n->at_binding); + } else { + write_u8(0); + } + + // Allow extra + write_u8(n->allow_extra ? 1 : 0); + + // Body + if (n->body) + write_node(*n->body); + } else if (auto* n = node.get_if()) { + write_u32(n->parts.size()); + + for (const auto& part : n->parts) { + write_u8(static_cast(part.type)); + + if (part.type == StringPart::Type::LITERAL) { + write_string(part.literal); + } else { // EXPR + if (part.expr) + write_node(*part.expr); + } + } + } else if (auto* n = node.get_if()) { + write_string(n->builtin_name); + write_u32(n->args.size()); + for (const auto& arg : n->args) { + if (arg) + write_node(*arg); + } } + } }; Serializer::Serializer() : pImpl(std::make_unique()) {} Serializer::~Serializer() = default; void Serializer::serialize(const IRModule& module, const std::string& path) { - auto bytes = serialize_to_bytes(module); - std::ofstream out(path, std::ios::binary); - out.write(reinterpret_cast(bytes.data()), bytes.size()); + auto bytes = serialize_to_bytes(module); + std::ofstream out(path, std::ios::binary); + out.write(reinterpret_cast(bytes.data()), bytes.size()); } std::vector Serializer::serialize_to_bytes(const IRModule& module) { - pImpl->buffer.clear(); + pImpl->buffer.clear(); - pImpl->write_u32(IR_MAGIC); - pImpl->write_u32(IR_VERSION); + pImpl->write_u32(IR_MAGIC); + pImpl->write_u32(IR_VERSION); - pImpl->write_u32(module.sources.size()); - for (const auto& src : module.sources) { - pImpl->write_string(src.path); - pImpl->write_string(src.content); - } + pImpl->write_u32(module.sources.size()); + for (const auto& src : module.sources) { + pImpl->write_string(src.path); + pImpl->write_string(src.content); + } - pImpl->write_u32(module.imports.size()); - for (const auto& [from, to] : module.imports) { - pImpl->write_string(from); - pImpl->write_string(to); - } + pImpl->write_u32(module.imports.size()); + for (const auto& [from, to] : module.imports) { + pImpl->write_string(from); + pImpl->write_string(to); + } - pImpl->write_u32(module.string_table.size()); - for (const auto& [str, id] : module.string_table) { - pImpl->write_string(str); - pImpl->write_u32(id); - } + pImpl->write_u32(module.string_table.size()); + for (const auto& [str, id] : module.string_table) { + pImpl->write_string(str); + pImpl->write_u32(id); + } - if (module.entry && module.entry != nullptr) { - pImpl->write_u8(1); - pImpl->write_node(*module.entry); - } else { - pImpl->write_u8(0); - } + if (module.entry && module.entry != nullptr) { + pImpl->write_u8(1); + pImpl->write_node(*module.entry); + } else { + pImpl->write_u8(0); + } - return pImpl->buffer; + return pImpl->buffer; } struct Deserializer::Impl { - std::vector buffer; - size_t pos = 0; + std::vector buffer; + size_t pos = 0; - uint32_t read_u32() { - uint32_t val = 0; - val |= buffer[pos + 0]; - val |= (uint32_t)buffer[pos + 1] << 8; - val |= (uint32_t)buffer[pos + 2] << 16; - val |= (uint32_t)buffer[pos + 3] << 24; - pos += 4; - return val; + uint32_t read_u32() { + uint32_t val = 0; + val |= buffer[pos + 0]; + val |= (uint32_t) buffer[pos + 1] << 8; + val |= (uint32_t) buffer[pos + 2] << 16; + val |= (uint32_t) buffer[pos + 3] << 24; + pos += 4; + return val; + } + + uint64_t read_u64() { + uint64_t val = 0; + for (int i = 0; i < 8; i++) { + val |= (uint64_t) buffer[pos + i] << (i * 8); } + pos += 8; + return val; + } - uint64_t read_u64() { - uint64_t val = 0; - for (int i = 0; i < 8; i++) { - val |= (uint64_t)buffer[pos + i] << (i * 8); + uint8_t read_u8() { return buffer[pos++]; } + + std::string read_string() { + uint32_t len = read_u32(); + std::string str(reinterpret_cast(&buffer[pos]), len); + pos += len; + return str; + } + + std::shared_ptr read_node() { + NodeType type = static_cast(read_u8()); + uint32_t line = read_u32(); + + switch (type) { + case NodeType::CONST_INT: { + int64_t val = static_cast(read_u64()); + return std::make_shared(ConstIntNode(val, line)); + } + case NodeType::CONST_FLOAT: { + uint64_t bits = read_u64(); + double val = 0.0; + std::memcpy(&val, &bits, sizeof(val)); + return std::make_shared(ConstFloatNode(val, line)); + } + case NodeType::CONST_STRING: { + std::string val = read_string(); + return std::make_shared(ConstStringNode(val, line)); + } + case NodeType::CONST_PATH: { + std::string val = read_string(); + return std::make_shared(ConstPathNode(val, line)); + } + case NodeType::CONST_BOOL: { + bool val = read_u8() != 0; + return std::make_shared(ConstBoolNode(val, line)); + } + case NodeType::CONST_NULL: + return std::make_shared(ConstNullNode(line)); + case NodeType::CONST_URI: { + std::string val = read_string(); + return std::make_shared(ConstURINode(val, line)); + } + case NodeType::CONST_LOOKUP_PATH: { + std::string val = read_string(); + return std::make_shared(ConstLookupPathNode(val, line)); + } + case NodeType::BUILTIN_CALL: { + std::string builtin_name = read_string(); + uint32_t num_args = read_u32(); + std::vector> args; + args.reserve(num_args); + for (uint32_t i = 0; i < num_args; i++) { + args.push_back(read_node()); + } + return std::make_shared( + BuiltinCallNode(std::move(builtin_name), std::move(args), line)); + } + case NodeType::VAR: { + uint32_t index = read_u32(); + return std::make_shared(VarNode(index, "", line)); + } + case NodeType::LAMBDA: { + uint32_t arity = read_u32(); + auto body = read_node(); + return std::make_shared(LambdaNode(arity, body, line)); + } + case NodeType::APP: { + auto func = read_node(); + auto arg = read_node(); + return std::make_shared(AppNode(func, arg, line)); + } + case NodeType::BINARY_OP: { + BinaryOp op = static_cast(read_u8()); + auto left = read_node(); + auto right = read_node(); + return std::make_shared(BinaryOpNode(op, left, right, line)); + } + case NodeType::UNARY_OP: { + UnaryOp op = static_cast(read_u8()); + auto operand = read_node(); + return std::make_shared(UnaryOpNode(op, operand, line)); + } + case NodeType::IMPORT: { + auto path = read_node(); + return std::make_shared(ImportNode(path, line)); + } + case NodeType::ATTRSET: { + bool recursive = read_u8() != 0; + uint32_t num_attrs = read_u32(); + AttrsetNode attrs(recursive, line); + for (uint32_t i = 0; i < num_attrs; i++) { + uint8_t is_dynamic = read_u8(); + if (is_dynamic) { + auto key_expr = read_node(); + auto val = read_node(); + attrs.attrs.push_back(AttrBinding(key_expr, val)); + } else { + std::string key = read_string(); + auto val = read_node(); + attrs.attrs.push_back(AttrBinding(key, val)); } - pos += 8; - return val; + } + return std::make_shared(std::move(attrs)); } - - uint8_t read_u8() { - return buffer[pos++]; + case NodeType::SELECT: { + auto expr = read_node(); + auto attr = read_node(); + uint8_t has_default = read_u8(); + std::optional> default_expr; + if (has_default) { + default_expr = read_node(); + } + SelectNode select_node(expr, attr, line); + select_node.default_expr = default_expr; + return std::make_shared(std::move(select_node)); } - - std::string read_string() { - uint32_t len = read_u32(); - std::string str(reinterpret_cast(&buffer[pos]), len); - pos += len; - return str; + case NodeType::HAS_ATTR: { + auto expr = read_node(); + auto attr = read_node(); + return std::make_shared(HasAttrNode(expr, attr, line)); } + case NodeType::WITH: { + auto attrs = read_node(); + auto body = read_node(); + return std::make_shared(WithNode(attrs, body, line)); + } + case NodeType::LIST: { + uint32_t num_elements = read_u32(); + std::vector> elements; + elements.reserve(num_elements); + for (uint32_t i = 0; i < num_elements; i++) { + elements.push_back(read_node()); + } + return std::make_shared(ListNode(std::move(elements), line)); + } + case NodeType::IF: { + auto cond = read_node(); + auto then_branch = read_node(); + auto else_branch = read_node(); + return std::make_shared(IfNode(cond, then_branch, else_branch, line)); + } + case NodeType::LET: { + uint32_t num_bindings = read_u32(); + std::vector>> bindings; + for (uint32_t i = 0; i < num_bindings; i++) { + std::string key = read_string(); + auto val = read_node(); + bindings.push_back({key, val}); + } + auto body = read_node(); + LetNode let(body, line); + let.bindings = std::move(bindings); + return std::make_shared(std::move(let)); + } + case NodeType::LETREC: { + uint32_t num_bindings = read_u32(); + std::vector>> bindings; + for (uint32_t i = 0; i < num_bindings; i++) { + std::string key = read_string(); + auto val = read_node(); + bindings.push_back({key, val}); + } + auto body = read_node(); + LetRecNode letrec(body, line); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } + case NodeType::ASSERT: { + auto cond = read_node(); + auto body = read_node(); + return std::make_shared(AssertNode(cond, body, line)); + } + case NodeType::LAMBDA_PATTERN: { + // Read required fields + uint32_t num_required = read_u32(); + std::vector required_fields; + required_fields.reserve(num_required); + for (uint32_t i = 0; i < num_required; i++) { + std::string name = read_string(); + read_u8(); // Discard has_default (always 0) + required_fields.emplace_back(name, std::nullopt); + } - std::shared_ptr read_node() { - NodeType type = static_cast(read_u8()); - uint32_t line = read_u32(); - - switch (type) { - case NodeType::CONST_INT: { - int64_t val = static_cast(read_u64()); - return std::make_shared(ConstIntNode(val, line)); - } - case NodeType::CONST_STRING: { - std::string val = read_string(); - return std::make_shared(ConstStringNode(val, line)); - } - case NodeType::CONST_PATH: { - std::string val = read_string(); - return std::make_shared(ConstPathNode(val, line)); - } - case NodeType::CONST_BOOL: { - bool val = read_u8() != 0; - return std::make_shared(ConstBoolNode(val, line)); - } - case NodeType::CONST_NULL: - return std::make_shared(ConstNullNode(line)); - case NodeType::VAR: { - uint32_t index = read_u32(); - return std::make_shared(VarNode(index, "", line)); - } - case NodeType::LAMBDA: { - uint32_t arity = read_u32(); - auto body = read_node(); - return std::make_shared(LambdaNode(arity, body, line)); - } - case NodeType::APP: { - auto func = read_node(); - auto arg = read_node(); - return std::make_shared(AppNode(func, arg, line)); - } - case NodeType::BINARY_OP: { - BinaryOp op = static_cast(read_u8()); - auto left = read_node(); - auto right = read_node(); - return std::make_shared(BinaryOpNode(op, left, right, line)); - } - case NodeType::UNARY_OP: { - UnaryOp op = static_cast(read_u8()); - auto operand = read_node(); - return std::make_shared(UnaryOpNode(op, operand, line)); - } - case NodeType::ATTRSET: { - bool recursive = read_u8() != 0; - uint32_t num_attrs = read_u32(); - AttrsetNode attrs(recursive, line); - for (uint32_t i = 0; i < num_attrs; i++) { - std::string key = read_string(); - auto val = read_node(); - attrs.attrs.push_back({key, val}); - } - return std::make_shared(std::move(attrs)); - } - case NodeType::SELECT: { - auto expr = read_node(); - auto attr = read_node(); - uint8_t has_default = read_u8(); - std::optional> default_expr; - if (has_default) { - default_expr = read_node(); - } - SelectNode select_node(expr, attr, line); - select_node.default_expr = default_expr; - return std::make_shared(std::move(select_node)); - } - case NodeType::HAS_ATTR: { - auto expr = read_node(); - auto attr = read_node(); - return std::make_shared(HasAttrNode(expr, attr, line)); - } - case NodeType::WITH: { - auto attrs = read_node(); - auto body = read_node(); - return std::make_shared(WithNode(attrs, body, line)); - } - case NodeType::IF: { - auto cond = read_node(); - auto then_branch = read_node(); - auto else_branch = read_node(); - return std::make_shared(IfNode(cond, then_branch, else_branch, line)); - } - case NodeType::LET: { - uint32_t num_bindings = read_u32(); - std::vector>> bindings; - for (uint32_t i = 0; i < num_bindings; i++) { - std::string key = read_string(); - auto val = read_node(); - bindings.push_back({key, val}); - } - auto body = read_node(); - LetNode let(body, line); - let.bindings = std::move(bindings); - return std::make_shared(std::move(let)); - } - case NodeType::LETREC: { - uint32_t num_bindings = read_u32(); - std::vector>> bindings; - for (uint32_t i = 0; i < num_bindings; i++) { - std::string key = read_string(); - auto val = read_node(); - bindings.push_back({key, val}); - } - auto body = read_node(); - LetRecNode letrec(body, line); - letrec.bindings = std::move(bindings); - return std::make_shared(std::move(letrec)); - } - case NodeType::ASSERT: { - auto cond = read_node(); - auto body = read_node(); - return std::make_shared(AssertNode(cond, body, line)); - } - default: - throw std::runtime_error("Unknown node type in IR"); + // Read optional fields + uint32_t num_optional = read_u32(); + std::vector optional_fields; + optional_fields.reserve(num_optional); + for (uint32_t i = 0; i < num_optional; i++) { + std::string name = read_string(); + uint8_t has_default = read_u8(); + std::optional> default_val; + if (has_default) { + default_val = read_node(); } + optional_fields.emplace_back(name, default_val); + } + + // Read at-binding + std::optional at_binding; + if (read_u8()) { + at_binding = read_string(); + } + + // Read allow_extra + bool allow_extra = read_u8() != 0; + + // Read body + auto body = read_node(); + + // Construct node + LambdaPatternNode lambda_pattern(body, line); + lambda_pattern.required_fields = std::move(required_fields); + lambda_pattern.optional_fields = std::move(optional_fields); + lambda_pattern.at_binding = at_binding; + lambda_pattern.allow_extra = allow_extra; + + return std::make_shared(std::move(lambda_pattern)); } + case NodeType::STRING_INTERPOLATION: { + uint32_t num_parts = read_u32(); + std::vector parts; + parts.reserve(num_parts); + + for (uint32_t i = 0; i < num_parts; i++) { + uint8_t type_byte = read_u8(); + StringPart::Type type = static_cast(type_byte); + + if (type == StringPart::Type::LITERAL) { + std::string literal = read_string(); + parts.push_back(StringPart::make_literal(std::move(literal))); + } else { // EXPR + auto expr = read_node(); + parts.push_back(StringPart::make_expr(expr)); + } + } + + return std::make_shared(StringInterpolationNode(std::move(parts), line)); + } + default: + throw std::runtime_error("Unknown node type in IR"); + } + } }; Deserializer::Deserializer() : pImpl(std::make_unique()) {} Deserializer::~Deserializer() = default; IRModule Deserializer::deserialize(const std::string& path) { - std::ifstream in(path, std::ios::binary | std::ios::ate); - size_t size = in.tellg(); - in.seekg(0); - pImpl->buffer.resize(size); - in.read(reinterpret_cast(pImpl->buffer.data()), size); - pImpl->pos = 0; - return deserialize(pImpl->buffer); + std::ifstream in(path, std::ios::binary | std::ios::ate); + size_t size = in.tellg(); + in.seekg(0); + pImpl->buffer.resize(size); + in.read(reinterpret_cast(pImpl->buffer.data()), size); + pImpl->pos = 0; + return deserialize(pImpl->buffer); } IRModule Deserializer::deserialize(const std::vector& data) { - pImpl->buffer = data; - pImpl->pos = 0; + pImpl->buffer = data; + pImpl->pos = 0; - IRModule module; + IRModule module; - uint32_t magic = pImpl->read_u32(); - if (magic != IR_MAGIC) { - throw std::runtime_error("Invalid IR file"); - } + uint32_t magic = pImpl->read_u32(); + if (magic != IR_MAGIC) { + throw std::runtime_error("Invalid IR file"); + } - uint32_t version = pImpl->read_u32(); - if (version != IR_VERSION) { - throw std::runtime_error("Unsupported IR version"); - } + uint32_t version = pImpl->read_u32(); + if (version != IR_VERSION) { + throw std::runtime_error("Unsupported IR version"); + } - uint32_t num_sources = pImpl->read_u32(); - for (uint32_t i = 0; i < num_sources; i++) { - SourceFile src; - src.path = pImpl->read_string(); - src.content = pImpl->read_string(); - module.sources.push_back(src); - } + uint32_t num_sources = pImpl->read_u32(); + for (uint32_t i = 0; i < num_sources; i++) { + SourceFile src; + src.path = pImpl->read_string(); + src.content = pImpl->read_string(); + module.sources.push_back(src); + } - uint32_t num_imports = pImpl->read_u32(); - for (uint32_t i = 0; i < num_imports; i++) { - module.imports.push_back({pImpl->read_string(), pImpl->read_string()}); - } + uint32_t num_imports = pImpl->read_u32(); + for (uint32_t i = 0; i < num_imports; i++) { + module.imports.push_back({pImpl->read_string(), pImpl->read_string()}); + } - uint32_t num_strings = pImpl->read_u32(); - for (uint32_t i = 0; i < num_strings; i++) { - std::string str = pImpl->read_string(); - uint32_t id = pImpl->read_u32(); - module.string_table[str] = id; - } + uint32_t num_strings = pImpl->read_u32(); + for (uint32_t i = 0; i < num_strings; i++) { + std::string str = pImpl->read_string(); + uint32_t id = pImpl->read_u32(); + module.string_table[str] = id; + } - if (pImpl->read_u8()) { - module.entry = pImpl->read_node(); - } + if (pImpl->read_u8()) { + module.entry = pImpl->read_node(); + } - return module; + return module; } -} +} // namespace nix_irc diff --git a/src/irc/serializer.h b/src/irc/serializer.h index 67e72b5..a6785ab 100644 --- a/src/irc/serializer.h +++ b/src/irc/serializer.h @@ -2,38 +2,38 @@ #define NIX_IRC_SERIALIZER_H #include "types.h" +#include #include #include -#include namespace nix_irc { class Serializer { public: - Serializer(); - ~Serializer(); - - void serialize(const IRModule& module, const std::string& path); - std::vector serialize_to_bytes(const IRModule& module); - + Serializer(); + ~Serializer(); + + void serialize(const IRModule& module, const std::string& path); + std::vector serialize_to_bytes(const IRModule& module); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; class Deserializer { public: - Deserializer(); - ~Deserializer(); - - IRModule deserialize(const std::string& path); - IRModule deserialize(const std::vector& data); - + Deserializer(); + ~Deserializer(); + + IRModule deserialize(const std::string& path); + IRModule deserialize(const std::vector& data); + private: - struct Impl; - std::unique_ptr pImpl; + struct Impl; + std::unique_ptr pImpl; }; -} +} // namespace nix_irc #endif \ No newline at end of file diff --git a/src/irc/types.cpp b/src/irc/types.cpp new file mode 100644 index 0000000..82960b9 --- /dev/null +++ b/src/irc/types.cpp @@ -0,0 +1,62 @@ +#include "types.h" + +namespace nix_irc { + +// LambdaNode constructor +LambdaNode::LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l) + : arity(a), body(std::move(b)), line(l) {} + +// AppNode constructor +AppNode::AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l) + : func(std::move(f)), arg(std::move(a)), line(l) {} + +// BinaryOpNode constructor +BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, + uint32_t ln) + : op(o), left(std::move(l)), right(std::move(r)), line(ln) {} + +// UnaryOpNode constructor +UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr operand_ptr, uint32_t l) + : op(o), operand(std::move(operand_ptr)), line(l) {} + +// SelectNode constructor +SelectNode::SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l) + : expr(std::move(e)), attr(std::move(a)), line(l) {} + +// HasAttrNode constructor +HasAttrNode::HasAttrNode(std::shared_ptr e, std::shared_ptr a, uint32_t l) + : expr(std::move(e)), attr(std::move(a)), line(l) {} + +// WithNode constructor +WithNode::WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l) + : attrs(std::move(a)), body(std::move(b)), line(l) {} + +// IfNode constructor +IfNode::IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, + uint32_t l) + : cond(std::move(c)), then_branch(std::move(t)), else_branch(std::move(e)), line(l) {} + +// LetNode constructor +LetNode::LetNode(std::shared_ptr b, uint32_t l) : body(std::move(b)), line(l) {} + +// LetRecNode constructor +LetRecNode::LetRecNode(std::shared_ptr b, uint32_t l) : body(std::move(b)), line(l) {} + +// AssertNode constructor +AssertNode::AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l) + : cond(std::move(c)), body(std::move(b)), line(l) {} + +// ImportNode constructor +ImportNode::ImportNode(std::shared_ptr p, uint32_t l) : path(std::move(p)), line(l) {} + +// ThunkNode constructor +ThunkNode::ThunkNode(std::shared_ptr e, uint32_t l) : expr(std::move(e)), line(l) {} + +// ForceNode constructor +ForceNode::ForceNode(std::shared_ptr e, uint32_t l) : expr(std::move(e)), line(l) {} + +// LambdaPatternNode constructor +LambdaPatternNode::LambdaPatternNode(std::shared_ptr b, uint32_t l) + : allow_extra(false), body(std::move(b)), line(l) {} + +} // namespace nix_irc diff --git a/src/irc/types.h b/src/irc/types.h index d10acf1..328bf82 100644 --- a/src/irc/types.h +++ b/src/irc/types.h @@ -2,289 +2,369 @@ #define NIX_IRC_TYPES_H #include -#include -#include -#include -#include #include +#include +#include +#include +#include #include -#include -#include +#include namespace nix_irc { constexpr uint32_t IR_MAGIC = 0x4E495258; -constexpr uint32_t IR_VERSION = 2; +constexpr uint32_t IR_VERSION = 3; enum class NodeType : uint8_t { - CONST_INT = 0x01, - CONST_STRING = 0x02, - CONST_PATH = 0x03, - CONST_BOOL = 0x04, - CONST_NULL = 0x05, - VAR = 0x10, - LAMBDA = 0x20, - APP = 0x21, - BINARY_OP = 0x22, - UNARY_OP = 0x23, - ATTRSET = 0x30, - SELECT = 0x31, - HAS_ATTR = 0x34, - WITH = 0x32, - IF = 0x40, - LET = 0x50, - LETREC = 0x51, - ASSERT = 0x52, - THUNK = 0x60, - FORCE = 0x61, - ERROR = 0xFF + CONST_INT = 0x01, + CONST_FLOAT = 0x06, + CONST_STRING = 0x02, + CONST_PATH = 0x03, + CONST_BOOL = 0x04, + CONST_NULL = 0x05, + CONST_URI = 0x07, + CONST_LOOKUP_PATH = 0x08, + VAR = 0x10, + LAMBDA = 0x20, + APP = 0x21, + BINARY_OP = 0x22, + UNARY_OP = 0x23, + IMPORT = 0x24, + ATTRSET = 0x30, + SELECT = 0x31, + HAS_ATTR = 0x34, + WITH = 0x32, + LIST = 0x33, + IF = 0x40, + LET = 0x50, + LETREC = 0x51, + ASSERT = 0x52, + THUNK = 0x60, + FORCE = 0x61, + LAMBDA_PATTERN = 0x70, + INHERIT = 0x71, + INHERIT_FROM = 0x72, + STRING_INTERPOLATION = 0x73, + BUILTIN_CALL = 0x74, + ERROR = 0xFF }; enum class BinaryOp : uint8_t { - ADD, SUB, MUL, DIV, CONCAT, - EQ, NE, LT, GT, LE, GE, - AND, OR, IMPL + ADD, + SUB, + MUL, + DIV, + CONCAT, + EQ, + NE, + LT, + GT, + LE, + GE, + AND, + OR, + IMPL, + MERGE }; -enum class UnaryOp : uint8_t { - NEG, NOT -}; +enum class UnaryOp : uint8_t { NEG, NOT }; // Forward declare Node for use in shared_ptr class Node; struct ConstIntNode { - int64_t value; - uint32_t line = 0; - ConstIntNode(int64_t v = 0, uint32_t l = 0) : value(v), line(l) {} + int64_t value; + uint32_t line = 0; + ConstIntNode(int64_t v = 0, uint32_t l = 0) : value(v), line(l) {} }; struct ConstStringNode { - std::string value; - uint32_t line = 0; - ConstStringNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} + std::string value; + uint32_t line = 0; + ConstStringNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} }; struct ConstPathNode { - std::string value; - uint32_t line = 0; - ConstPathNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} + std::string value; + uint32_t line = 0; + ConstPathNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} }; struct ConstBoolNode { - bool value; - uint32_t line = 0; - ConstBoolNode(bool v = false, uint32_t l = 0) : value(v), line(l) {} + bool value; + uint32_t line = 0; + ConstBoolNode(bool v = false, uint32_t l = 0) : value(v), line(l) {} }; struct ConstNullNode { - uint32_t line = 0; - ConstNullNode(uint32_t l = 0) : line(l) {} + uint32_t line = 0; + ConstNullNode(uint32_t l = 0) : line(l) {} +}; + +struct ConstFloatNode { + double value; + uint32_t line = 0; + ConstFloatNode(double v = 0.0, uint32_t l = 0) : value(v), line(l) {} +}; + +struct ConstURINode { + std::string value; + uint32_t line = 0; + ConstURINode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} +}; + +struct ConstLookupPathNode { + std::string value; // e.g., "nixpkgs" or "nixpkgs/lib" + uint32_t line = 0; + ConstLookupPathNode(std::string v = "", uint32_t l = 0) : value(std::move(v)), line(l) {} }; struct VarNode { - uint32_t index = 0; - std::optional name; - uint32_t line = 0; - VarNode(uint32_t idx = 0, std::string n = "", uint32_t l = 0) - : index(idx), name(n.empty() ? std::nullopt : std::optional(n)), line(l) {} + uint32_t index = 0; + std::optional name; + uint32_t line = 0; + VarNode(uint32_t idx = 0, std::string n = "", uint32_t l = 0) + : index(idx), name(n.empty() ? std::nullopt : std::optional(n)), line(l) {} }; struct LambdaNode { - uint32_t arity = 1; - std::shared_ptr body; - std::optional param_name; - bool strict_pattern = true; - uint32_t line = 0; - LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l = 0); + uint32_t arity = 1; + std::shared_ptr body; + std::optional param_name; + bool strict_pattern = true; + uint32_t line = 0; + LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l = 0); +}; + +struct PatternField { + std::string name; + std::optional> default_value; + + PatternField(std::string n, std::optional> def = std::nullopt) + : name(std::move(n)), default_value(std::move(def)) {} +}; + +struct LambdaPatternNode { + std::vector required_fields; + std::vector optional_fields; + std::optional at_binding; + bool allow_extra; + std::shared_ptr body; + uint32_t line = 0; + + LambdaPatternNode(std::shared_ptr b, uint32_t l = 0); }; struct AppNode { - std::shared_ptr func; - std::shared_ptr arg; - uint32_t line = 0; - AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l = 0); + std::shared_ptr func; + std::shared_ptr arg; + uint32_t line = 0; + AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l = 0); }; struct BinaryOpNode { - BinaryOp op; - std::shared_ptr left; - std::shared_ptr right; - uint32_t line = 0; - BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln = 0); + BinaryOp op; + std::shared_ptr left; + std::shared_ptr right; + uint32_t line = 0; + BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln = 0); }; struct UnaryOpNode { - UnaryOp op; - std::shared_ptr operand; - uint32_t line = 0; - UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l = 0); + UnaryOp op; + std::shared_ptr operand; + uint32_t line = 0; + UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l = 0); +}; + +struct AttrBinding { + std::optional static_name; // Static key like "foo" + std::shared_ptr dynamic_name; // Dynamic key like ${expr} + std::shared_ptr value; + + // Static attribute + AttrBinding(std::string name, std::shared_ptr val) + : static_name(std::move(name)), value(std::move(val)) {} + + // Dynamic attribute + AttrBinding(std::shared_ptr name_expr, std::shared_ptr val) + : dynamic_name(std::move(name_expr)), value(std::move(val)) {} + + bool is_dynamic() const { return !static_name.has_value(); } }; struct AttrsetNode { - std::vector>> attrs; - bool recursive = false; - uint32_t line = 0; - AttrsetNode(bool rec = false, uint32_t l = 0) : recursive(rec), line(l) {} + std::vector attrs; + bool recursive = false; + uint32_t line = 0; + AttrsetNode(bool rec = false, uint32_t l = 0) : recursive(rec), line(l) {} }; struct SelectNode { - std::shared_ptr expr; - std::shared_ptr attr; - std::optional> default_expr; - uint32_t line = 0; - SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); + std::shared_ptr expr; + std::shared_ptr attr; + std::optional> default_expr; + uint32_t line = 0; + SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); }; struct HasAttrNode { - std::shared_ptr expr; - std::shared_ptr attr; - uint32_t line = 0; - HasAttrNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); + std::shared_ptr expr; + std::shared_ptr attr; + uint32_t line = 0; + HasAttrNode(std::shared_ptr e, std::shared_ptr a, uint32_t l = 0); }; struct WithNode { - std::shared_ptr attrs; - std::shared_ptr body; - uint32_t line = 0; - WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l = 0); + std::shared_ptr attrs; + std::shared_ptr body; + uint32_t line = 0; + WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l = 0); }; struct IfNode { - std::shared_ptr cond; - std::shared_ptr then_branch; - std::shared_ptr else_branch; - uint32_t line = 0; - IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l = 0); + std::shared_ptr cond; + std::shared_ptr then_branch; + std::shared_ptr else_branch; + uint32_t line = 0; + IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l = 0); }; struct LetNode { - std::vector>> bindings; - std::shared_ptr body; - uint32_t line = 0; - LetNode(std::shared_ptr b, uint32_t l = 0); + std::vector>> bindings; + std::shared_ptr body; + uint32_t line = 0; + LetNode(std::shared_ptr b, uint32_t l = 0); }; struct LetRecNode { - std::vector>> bindings; - std::shared_ptr body; - uint32_t line = 0; - LetRecNode(std::shared_ptr b, uint32_t l = 0); + std::vector>> bindings; + std::shared_ptr body; + uint32_t line = 0; + LetRecNode(std::shared_ptr b, uint32_t l = 0); }; struct AssertNode { - std::shared_ptr cond; - std::shared_ptr body; - uint32_t line = 0; - AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l = 0); + std::shared_ptr cond; + std::shared_ptr body; + uint32_t line = 0; + AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l = 0); +}; + +struct ImportNode { + std::shared_ptr path; // Path expression to import + uint32_t line = 0; + ImportNode(std::shared_ptr p, uint32_t l = 0); }; struct ThunkNode { - std::shared_ptr expr; - uint32_t line = 0; - ThunkNode(std::shared_ptr e, uint32_t l = 0); + std::shared_ptr expr; + uint32_t line = 0; + ThunkNode(std::shared_ptr e, uint32_t l = 0); }; struct ForceNode { - std::shared_ptr expr; - uint32_t line = 0; - ForceNode(std::shared_ptr e, uint32_t l = 0); + std::shared_ptr expr; + uint32_t line = 0; + ForceNode(std::shared_ptr e, uint32_t l = 0); +}; + +struct ListNode { + std::vector> elements; + uint32_t line = 0; + ListNode(std::vector> elems = {}, uint32_t l = 0) + : elements(std::move(elems)), line(l) {} +}; + +struct InheritNode { + std::vector names; + uint32_t line = 0; + + InheritNode(std::vector n = {}, uint32_t l = 0) : names(std::move(n)), line(l) {} +}; + +struct InheritFromNode { + std::shared_ptr source; + std::vector names; + uint32_t line = 0; + + InheritFromNode(std::shared_ptr src, std::vector n, uint32_t l = 0) + : source(std::move(src)), names(std::move(n)), line(l) {} +}; + +struct StringPart { + enum class Type { LITERAL, EXPR }; + Type type; + std::string literal; + std::shared_ptr expr; + + static StringPart make_literal(std::string lit) { + StringPart part; + part.type = Type::LITERAL; + part.literal = std::move(lit); + return part; + } + + static StringPart make_expr(std::shared_ptr e) { + StringPart part; + part.type = Type::EXPR; + part.expr = std::move(e); + return part; + } +}; + +struct StringInterpolationNode { + std::vector parts; + uint32_t line = 0; + + StringInterpolationNode(std::vector p = {}, uint32_t l = 0) + : parts(std::move(p)), line(l) {} +}; + +struct BuiltinCallNode { + std::string builtin_name; + std::vector> args; + uint32_t line = 0; + + BuiltinCallNode(std::string name, std::vector> a = {}, uint32_t l = 0) + : builtin_name(std::move(name)), args(std::move(a)), line(l) {} }; // Node wraps a variant for type-safe AST class Node { public: - using Variant = std::variant< - ConstIntNode, - ConstStringNode, - ConstPathNode, - ConstBoolNode, - ConstNullNode, - VarNode, - LambdaNode, - AppNode, - BinaryOpNode, - UnaryOpNode, - AttrsetNode, - SelectNode, - HasAttrNode, - WithNode, - IfNode, - LetNode, - LetRecNode, - AssertNode, - ThunkNode, - ForceNode - >; + using Variant = + std::variant; - Variant data; + Variant data; - template - Node(T&& value) : data(std::forward(value)) {} + template Node(T&& value) : data(std::forward(value)) {} - template - T* get_if() { return std::get_if(&data); } + template T* get_if() { return std::get_if(&data); } - template - const T* get_if() const { return std::get_if(&data); } + template const T* get_if() const { return std::get_if(&data); } - template - bool holds() const { return std::holds_alternative(data); } + template bool holds() const { return std::holds_alternative(data); } }; -// Constructor implementations -inline LambdaNode::LambdaNode(uint32_t a, std::shared_ptr b, uint32_t l) - : arity(a), body(b), line(l) {} - -inline AppNode::AppNode(std::shared_ptr f, std::shared_ptr a, uint32_t l) - : func(f), arg(a), line(l) {} - -inline BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr l, std::shared_ptr r, uint32_t ln) - : op(o), left(l), right(r), line(ln) {} - -inline UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr operand, uint32_t l) - : op(o), operand(operand), line(l) {} - -inline SelectNode::SelectNode(std::shared_ptr e, std::shared_ptr a, uint32_t l) - : expr(e), attr(a), line(l) {} - -inline HasAttrNode::HasAttrNode(std::shared_ptr e, std::shared_ptr a, uint32_t l) - : expr(e), attr(a), line(l) {} - -inline WithNode::WithNode(std::shared_ptr a, std::shared_ptr b, uint32_t l) - : attrs(a), body(b), line(l) {} - -inline IfNode::IfNode(std::shared_ptr c, std::shared_ptr t, std::shared_ptr e, uint32_t l) - : cond(c), then_branch(t), else_branch(e), line(l) {} - -inline LetNode::LetNode(std::shared_ptr b, uint32_t l) - : body(b), line(l) {} - -inline LetRecNode::LetRecNode(std::shared_ptr b, uint32_t l) - : body(b), line(l) {} - -inline AssertNode::AssertNode(std::shared_ptr c, std::shared_ptr b, uint32_t l) - : cond(c), body(b), line(l) {} - -inline ThunkNode::ThunkNode(std::shared_ptr e, uint32_t l) - : expr(e), line(l) {} - -inline ForceNode::ForceNode(std::shared_ptr e, uint32_t l) - : expr(e), line(l) {} - struct SourceFile { - std::string path; - std::string content; - std::shared_ptr ast; + std::string path; + std::string content; + std::shared_ptr ast; }; struct IRModule { - uint32_t version = IR_VERSION; - std::vector sources; - std::vector> imports; - std::shared_ptr entry; - std::unordered_map string_table; + uint32_t version = IR_VERSION; + std::vector sources; + std::vector> imports; + std::shared_ptr entry; + std::unordered_map string_table; }; -} +} // namespace nix_irc #endif diff --git a/src/plugin.cpp b/src/plugin.cpp index bad00fd..4a821f5 100644 --- a/src/plugin.cpp +++ b/src/plugin.cpp @@ -5,20 +5,15 @@ #include "nix/expr/eval.hh" #include "nix/expr/primops.hh" #include "nix/expr/value.hh" -#include "nix/store/store-api.hh" -#include "nix/util/source-path.hh" +#include "irc/evaluator.h" #include "irc/ir_gen.h" #include "irc/parser.h" -#include "irc/resolver.h" #include "irc/serializer.h" #include "irc/types.h" -#include "irc/evaluator.h" -#include +#include #include -#include -#include namespace nix_ir_plugin { @@ -29,50 +24,52 @@ using namespace nix_irc; * Load and evaluate a pre-compiled IR bundle * Usage: builtins.nixIR.loadIR "/path/to/file.nixir" */ -static void prim_loadIR(EvalState &state, const PosIdx pos, Value **args, - Value &v) { +static void prim_loadIR(EvalState& state, const PosIdx pos, Value** args, Value& v) { auto path = state.forceStringNoCtx( - *args[0], pos, - "while evaluating the first argument to builtins.nixIR.loadIR"); + *args[0], pos, "while evaluating the first argument to builtins.nixIR.loadIR"); std::string pathStr(path); + auto t_start = std::chrono::high_resolution_clock::now(); + Deserializer deserializer; IRModule module; try { module = deserializer.deserialize(pathStr); - } catch (const std::exception &e) { - state.error("failed to deserialize IR bundle: %s", e.what()) - .atPos(pos) - .debugThrow(); + } catch (const std::exception& e) { + state.error("failed to deserialize IR bundle: %s", e.what()).atPos(pos).debugThrow(); } + auto t_deser = std::chrono::high_resolution_clock::now(); + if (!module.entry) { - state.error("IR bundle has no entry point") - .atPos(pos) - .debugThrow(); + state.error("IR bundle has no entry point").atPos(pos).debugThrow(); } try { Evaluator evaluator(state); evaluator.eval_to_nix(module.entry, v); - } catch (const std::exception &e) { - state.error("failed to evaluate IR: %s", e.what()) - .atPos(pos) - .debugThrow(); + } catch (const std::exception& e) { + state.error("failed to evaluate IR: %s", e.what()).atPos(pos).debugThrow(); } + + auto t_eval = std::chrono::high_resolution_clock::now(); + + auto deser_us = std::chrono::duration_cast(t_deser - t_start).count(); + auto eval_us = std::chrono::duration_cast(t_eval - t_deser).count(); + + std::cerr << "nixIR timing: deser=" << deser_us << "us eval=" << eval_us + << "us total=" << (deser_us + eval_us) << "us" << std::endl; } /** * Compile Nix source to IR on-the-fly * Usage: builtins.nixIR.compile "{ x = 1; }" */ -static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, - Value &v) { +static void prim_compileNix(EvalState& state, const PosIdx pos, Value** args, Value& v) { auto source = state.forceStringNoCtx( - *args[0], pos, - "while evaluating the first argument to builtins.nixIR.compile"); + *args[0], pos, "while evaluating the first argument to builtins.nixIR.compile"); std::string sourceStr(source); @@ -81,9 +78,7 @@ static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, auto ast = parser.parse(sourceStr, ""); if (!ast) { - state.error("failed to parse Nix expression") - .atPos(pos) - .debugThrow(); + state.error("failed to parse Nix expression").atPos(pos).debugThrow(); } IRGenerator ir_gen; @@ -92,10 +87,8 @@ static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, Evaluator evaluator(state); evaluator.eval_to_nix(ir, v); - } catch (const std::exception &e) { - state.error("IR compilation failed: %s", e.what()) - .atPos(pos) - .debugThrow(); + } catch (const std::exception& e) { + state.error("IR compilation failed: %s", e.what()).atPos(pos).debugThrow(); } } @@ -103,19 +96,18 @@ static void prim_compileNix(EvalState &state, const PosIdx pos, Value **args, * Get information about the IR plugin * Usage: builtins.nixIR.info */ -static void prim_info(EvalState &state, const PosIdx pos, Value **args, - Value &v) { +static void prim_info(EvalState& state, const PosIdx pos, Value** args, Value& v) { auto bindings = state.buildBindings(3); - Value *vName = state.allocValue(); + Value* vName = state.allocValue(); vName->mkString("nix-ir-plugin"); bindings.insert(state.symbols.create("name"), vName); - Value *vVersion = state.allocValue(); + Value* vVersion = state.allocValue(); vVersion->mkString("0.1.0"); bindings.insert(state.symbols.create("version"), vVersion); - Value *vStatus = state.allocValue(); + Value* vStatus = state.allocValue(); vStatus->mkString("runtime-active"); bindings.insert(state.symbols.create("status"), vStatus); @@ -160,7 +152,7 @@ static RegisterPrimOp rp_info({ } // namespace nix_ir_plugin -// Plugin initialization message +// Plugin initialization __attribute__((constructor)) static void init_plugin() { - std::cerr << "nix-ir-plugin loaded" << std::endl; + // Plugin loads silently... } diff --git a/tests/attrset.nixir b/tests/attrset.nixir deleted file mode 100644 index 708f5dd..0000000 Binary files a/tests/attrset.nixir and /dev/null differ diff --git a/tests/attrset_var.nix b/tests/attrset_var.nix deleted file mode 100644 index 11e1da6..0000000 --- a/tests/attrset_var.nix +++ /dev/null @@ -1,4 +0,0 @@ -let - x = 10; -in - { a = x; } diff --git a/tests/benchmark/large.nix b/tests/benchmark/large.nix new file mode 100644 index 0000000..bf29a77 --- /dev/null +++ b/tests/benchmark/large.nix @@ -0,0 +1,237 @@ +# Large benchmark for comprehensive stress testing +let + range = start: end: + if start >= end + then [] + else [start] ++ range (start + 1) end; + + concat = a: b: a ++ b; + + factorial = n: + if n <= 1 + then 1 + else n * factorial (n - 1); + + # Ackermann function (highly recursive) + ackermann = m: n: + if m == 0 + then n + 1 + else if n == 0 + then ackermann (m - 1) 1 + else ackermann (m - 1) (ackermann m (n - 1)); + + # Greatest common divisor + gcd = a: b: + if b == 0 + then a + else gcd b (a - (a / b) * b); + + # Power function + pow = base: exp: + if exp == 0 + then 1 + else if exp == 1 + then base + else base * pow base (exp - 1); + + compose = f: g: x: f (g x); + double = x: x * 2; + addTen = x: x + 10; + square = x: x * x; + + pipeline = compose square (compose double addTen); + + list_100 = range 1 101; + list_50 = range 1 51; + list_25 = range 1 26; + + largeAttrs = { + a1 = 1; + a2 = 2; + a3 = 3; + a4 = 4; + a5 = 5; + a6 = 6; + a7 = 7; + a8 = 8; + a9 = 9; + a10 = 10; + b1 = 11; + b2 = 12; + b3 = 13; + b4 = 14; + b5 = 15; + b6 = 16; + b7 = 17; + b8 = 18; + b9 = 19; + b10 = 20; + c1 = 21; + c2 = 22; + c3 = 23; + c4 = 24; + c5 = 25; + c6 = 26; + c7 = 27; + c8 = 28; + c9 = 29; + c10 = 30; + d1 = 31; + d2 = 32; + d3 = 33; + d4 = 34; + d5 = 35; + d6 = 36; + d7 = 37; + d8 = 38; + d9 = 39; + d10 = 40; + e1 = 41; + e2 = 42; + e3 = 43; + e4 = 44; + e5 = 45; + e6 = 46; + e7 = 47; + e8 = 48; + e9 = 49; + e10 = 50; + }; + + # Very deep nesting (10 levels) + deepNest = { + level1 = { + level2 = { + level3 = { + level4 = { + level5 = { + level6 = { + level7 = { + level8 = { + level9 = { + level10 = { + treasure = "found"; + value = 12345; + }; + }; + }; + }; + }; + }; + }; + }; + }; + }; + }; + + recursiveComplex = rec { + base = 10; + doubled = base * 2; + tripled = base * 3; + + sum = doubled + tripled; + product = doubled * tripled; + + x = base * 4; + y = x + doubled; + z = y * tripled; + + total = sum + product + z; + final = total * base; + }; + + config1 = rec { + multiplier = 5; + base = 100; + result = base * multiplier; + }; + + config2 = rec { + offset = 50; + scaled = config1.result + offset; + doubled = scaled * 2; + }; + + config3 = rec { + factor = 3; + combined = config2.doubled * factor; + final = combined + config1.multiplier; + }; + + baseConfig = { + system = { + arch = "x86_64"; + os = "linux"; + }; + settings = { + enabled = true; + level = 5; + }; + }; + + overrides = { + system = { + kernel = "6.1"; + }; + settings = { + level = 10; + extra = "custom"; + }; + newSection = { + value = 42; + }; + }; + + merged = + baseConfig + // overrides + // { + system = baseConfig.system // overrides.system; + settings = + baseConfig.settings + // overrides.settings + // { + combined = baseConfig.settings.level + overrides.settings.level; + }; + }; + + fact10 = factorial 10; + fact7 = factorial 7; + ack_3_3 = ackermann 3 3; + gcd_48_18 = gcd 48 18; + gcd_100_35 = gcd 100 35; + pow_2_10 = pow 2 10; + pow_3_5 = pow 3 5; + + pipelineResult = pipeline 5; # ((5 + 10) * 2)^2 = 900 + + # List operations + concatenated = concat [1 2 3] [4 5 6]; + multilevel = concat (concat [1] [2 3]) [4 5]; +in { + # Lists + inherit list_100 list_50 list_25 concatenated multilevel; + + # Math results + inherit fact10 fact7 ack_3_3 gcd_48_18 gcd_100_35 pow_2_10 pow_3_5 pipelineResult; + + # Data structures + inherit largeAttrs merged; + deepValue = deepNest.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.value; + deepTreasure = deepNest.level1.level2.level3.level4.level5.level6.level7.level8.level9.level10.treasure; + + # Recursive attrsets + recursiveTotal = recursiveComplex.total; + recursiveFinal = recursiveComplex.final; + computedZ = recursiveComplex.z; + + # Config chain + config1Result = config1.result; + config2Doubled = config2.doubled; + config3Final = config3.final; + + # Merged config + mergedCombined = merged.settings.combined; + mergedArch = merged.system.arch; + mergedKernel = merged.system.kernel; +} diff --git a/tests/benchmark/medium.nix b/tests/benchmark/medium.nix new file mode 100644 index 0000000..6234dca --- /dev/null +++ b/tests/benchmark/medium.nix @@ -0,0 +1,75 @@ +let + # Recursive factorial + factorial = n: + if n <= 1 + then 1 + else n * factorial (n - 1); + + # Fibonacci sequence generator + fib = n: + if n <= 1 + then n + else fib (n - 1) + fib (n - 2); + + # List concatenation test + range = start: end: + if start >= end + then [] + else [start] ++ range (start + 1) end; + + # Curried function application + add = x: y: x + y; + add5 = add 5; + + # Complex computation + compute = x: y: let + a = x * 2; + b = y + 10; + c = a * b; + in + c / 2; + + # Data structures + numbers = range 1 11; + + # Nested attribute operations + base = { + config = { + enable = true; + value = 42; + }; + data = { + items = [1 2 3]; + }; + }; + + extended = + base + // { + config = + base.config + // { + extra = "test"; + multiplied = base.config.value * 2; + }; + computed = base.config.value + 100; + }; + + # Recursive attrset with selections + recursive = rec { + x = 10; + y = x * 2; + z = y + x; + result = z * 3; + final = result + x; + }; +in { + fact5 = factorial 5; + fib7 = fib 7; + sum15 = add5 10; + computed = compute 10 20; + inherit numbers extended; + deepValue = extended.config.multiplied; + recursiveResult = recursive.result; + recursiveFinal = recursive.final; +} diff --git a/tests/benchmark/run.sh b/tests/benchmark/run.sh new file mode 100755 index 0000000..c392139 --- /dev/null +++ b/tests/benchmark/run.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +set -e + +echo "# Running benchmarks..." +echo "" + +BENCH_DIR="$(pwd)/tests/benchmark" +IRC_BIN="$(pwd)/build/nix-irc" + +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[0;33m' +NC='\033[0m' + +get_ms() { + local time_str="$1" + if [[ $time_str =~ ([0-9]+)m([0-9.]+)s ]]; then + local mins="${BASH_REMATCH[1]}" + local secs="${BASH_REMATCH[2]}" + local ms + ms=$(awk "BEGIN {printf \"%.1f\", ($mins * 60000) + ($secs * 1000)}") + echo "$ms" + else + echo "0" + fi +} + +run_benchmark() { + local name="$1" + local file="$2" + + echo -e "${BLUE}=== $name ===${NC}" + echo "" + + # Measure compilation time only + echo -n " Compilation only: " + local compile_start + compile_start=$(date +%s%N) + "$IRC_BIN" "$file" /tmp/bench.nixir >/dev/null 2>&1 + local compile_end + compile_end=$(date +%s%N) + local compile_ms=$(((compile_end - compile_start) / 1000000)) + echo -e "${YELLOW}${compile_ms}ms${NC}" + + # Measure IR loading only (deserialization + evaluation) + echo -n " IR load only: " + PLUGIN_PATH="$(pwd)/build/nix-ir-plugin.so" + if [ ! -f "$PLUGIN_PATH" ]; then + echo -e "${YELLOW}skipped${NC} (plugin not built)" + else + # Pre-compile the IR + "$IRC_BIN" "$file" /tmp/bench.nixir >/dev/null 2>&1 + + # Measure just the loading (average of 10 runs to reduce noise) + local total_load_us=0 + for _ in {1..10}; do + local load_output + load_output=$(nix-instantiate --plugin-files "$PLUGIN_PATH" --eval --expr "builtins.nixIR_loadIR \"/tmp/bench.nixir\"" 2>&1 >/dev/null | grep "nixIR timing" | grep -oP 'total=\K[0-9]+') + total_load_us=$((total_load_us + load_output)) + done + local avg_load_us=$((total_load_us / 10)) + local avg_load_ms_frac=$(awk "BEGIN {printf \"%.3f\", $avg_load_us / 1000}") + echo -e "${GREEN}${avg_load_ms_frac}ms${NC} avg (10 runs)" + fi + + # Measure full pipeline (compile + nix-instantiate overhead + IR load) + echo -n " Full pipeline: " + if [ ! -f "$PLUGIN_PATH" ]; then + echo -e "${YELLOW}skipped${NC}" + else + local pipeline_start + pipeline_start=$(date +%s%N) + "$IRC_BIN" "$file" /tmp/bench.nixir >/dev/null 2>&1 + nix-instantiate --plugin-files "$PLUGIN_PATH" --eval --expr "builtins.nixIR_loadIR \"/tmp/bench.nixir\"" >/dev/null 2>&1 + local pipeline_end + pipeline_end=$(date +%s%N) + local pipeline_ms=$(((pipeline_end - pipeline_start) / 1000000)) + echo -e "${YELLOW}${pipeline_ms}ms${NC}" + fi + + # Source and IR sizes + local src_size + src_size=$(stat -c%s "$file" 2>/dev/null || stat -f%z "$file" 2>/dev/null) + local ir_size + ir_size=$(stat -c%s /tmp/bench.nixir 2>/dev/null || stat -f%z /tmp/bench.nixir 2>/dev/null) + local ratio=0 + if [[ "$src_size" -gt 0 ]]; then + ratio=$((ir_size * 100 / src_size)) + fi + echo -e " Source size: ${src_size}B" + echo -e " IR bundle size: ${ir_size}B (${ratio}% of source)" + + echo "" + + # Native Nix evaluation (baseline) + echo -n " Native Nix eval: " + local native_total=0 + for _ in {1..5}; do + local t + t=$( (time nix-instantiate --eval --strict "$file" >/dev/null 2>&1) 2>&1 | grep "real" | awk '{print $2}') + local ms + ms=$(get_ms "$t") + native_total=$(awk "BEGIN {print $native_total + $ms}") + done + local native_avg + native_avg=$(awk "BEGIN {printf \"%.1f\", $native_total / 5}") + echo -e "${GREEN}${native_avg}ms${NC} avg (5 runs)" + + echo "" +} + +echo "Measuring IR compilation speed and bundle size characteristics." +echo "" + +run_benchmark "Simple Expression" "$BENCH_DIR/simple.nix" +run_benchmark "Medium Complexity" "$BENCH_DIR/medium.nix" +run_benchmark "Large Expression" "$BENCH_DIR/large.nix" + +# Overall statistics +echo -e "${BLUE}=== Overall Statistics ===${NC}" +echo "" + +testdir=$(mktemp -d) +total_nix=0 +total_ir=0 +total_compile_time=0 + +for f in "$BENCH_DIR"/*.nix; do + nixsize=$(stat -c%s "$f" 2>/dev/null || stat -f%z "$f" 2>/dev/null) + base=$(basename "$f" .nix) + irfile="${testdir}/${base}.nixir" + + start=$(date +%s%N) + "$IRC_BIN" "$f" "$irfile" >/dev/null 2>&1 + end=$(date +%s%N) + compile_time=$(((end - start) / 1000000)) + + if [ -f "$irfile" ]; then + irsize=$(stat -c%s "$irfile" 2>/dev/null || stat -f%z "$irfile" 2>/dev/null) + total_nix=$((total_nix + nixsize)) + total_ir=$((total_ir + irsize)) + total_compile_time=$((total_compile_time + compile_time)) + fi +done + +total_ratio=$((total_ir * 100 / total_nix)) +avg_compile_time=$((total_compile_time / 3)) + +# TBH those are entirely unnecessary. However, I'm a sucker for data +# and those are trivial to compile. Might as well. Who knows, maybe it'll +# come in handy in the future. +echo " Total source size: ${total_nix}B" +echo " Total IR size: ${total_ir}B" +echo " Compression ratio: ${total_ratio}% of source" +echo " Average compile time: ${avg_compile_time}ms" +echo "" + +rm -rf "$testdir" diff --git a/tests/benchmark/simple.nix b/tests/benchmark/simple.nix new file mode 100644 index 0000000..aec2fc9 --- /dev/null +++ b/tests/benchmark/simple.nix @@ -0,0 +1,13 @@ +let + x = 10; + y = 20; + z = x + y; +in { + result = z * 2; + list = [1 2 3 4 5]; + attrs = { + a = 1; + b = 2; + c = 3; + }; +} diff --git a/tests/comparison.nixir b/tests/comparison.nixir deleted file mode 100644 index fb7b4fd..0000000 Binary files a/tests/comparison.nixir and /dev/null differ diff --git a/tests/fixtures/ancient_let.nix b/tests/fixtures/ancient_let.nix new file mode 100644 index 0000000..3d4cfec --- /dev/null +++ b/tests/fixtures/ancient_let.nix @@ -0,0 +1,8 @@ +# Test ancient let syntax: let { bindings; body = expr; } +# This is equivalent to: let bindings in expr, but has been deprecated +# in newer Nix versions. +let { + x = 10; + y = 20; + body = x + y; +} diff --git a/tests/attrset.nix b/tests/fixtures/attrset.nix similarity index 100% rename from tests/attrset.nix rename to tests/fixtures/attrset.nix diff --git a/tests/fixtures/attrset_var.nix b/tests/fixtures/attrset_var.nix new file mode 100644 index 0000000..6f9db40 --- /dev/null +++ b/tests/fixtures/attrset_var.nix @@ -0,0 +1,3 @@ +let + x = 10; +in {a = x;} diff --git a/tests/fixtures/block_comments.nix b/tests/fixtures/block_comments.nix new file mode 100644 index 0000000..301297d --- /dev/null +++ b/tests/fixtures/block_comments.nix @@ -0,0 +1,24 @@ +# Test block comments /* */ +/* +This is a block comment +*/ +let + x = 42; + /* + inline block comment + */ + /* + Multi-line + block + comment + */ + y = 100; +in + /* + Comment before expression + */ + x + y +/* +Trailing comment +*/ + diff --git a/tests/comparison.nix b/tests/fixtures/comparison.nix similarity index 60% rename from tests/comparison.nix rename to tests/fixtures/comparison.nix index be42016..a73c226 100644 --- a/tests/comparison.nix +++ b/tests/fixtures/comparison.nix @@ -3,4 +3,6 @@ let a = 10; b = 20; in - if a < b then true else false + if a < b + then true + else false diff --git a/tests/fixtures/dynamic_attr_full.nix b/tests/fixtures/dynamic_attr_full.nix new file mode 100644 index 0000000..3c7b39b --- /dev/null +++ b/tests/fixtures/dynamic_attr_full.nix @@ -0,0 +1,14 @@ +# Test dynamic attribute names +let + key = "mykey"; + value = 42; +in { + # Dynamic attribute with string interpolation + "${key}" = value; + + # Another dynamic attribute + "${key}_suffix" = value + 1; + + # Static attribute for comparison + static = 100; +} diff --git a/tests/fixtures/dynamic_attrs.nix b/tests/fixtures/dynamic_attrs.nix new file mode 100644 index 0000000..3c32fd5 --- /dev/null +++ b/tests/fixtures/dynamic_attrs.nix @@ -0,0 +1,15 @@ +# Test dynamic attribute names +# Note: Full dynamic attrs require runtime evaluation +# For now, testing that syntax is recognized +let + key = "mykey"; +in { + # Static attribute for comparison + static = "value"; + + # Dynamic attribute name (basic string interpolation) + # "${key}" = "dynamic_value"; + + # For now, use workaround with static names + mykey = "works"; +} diff --git a/tests/fixtures/float_test.nix b/tests/fixtures/float_test.nix new file mode 100644 index 0000000..c239c60 --- /dev/null +++ b/tests/fixtures/float_test.nix @@ -0,0 +1 @@ +1.5 diff --git a/tests/fixtures/home_path.nix b/tests/fixtures/home_path.nix new file mode 100644 index 0000000..ccfb107 --- /dev/null +++ b/tests/fixtures/home_path.nix @@ -0,0 +1,11 @@ +# Test home-relative paths +# Note: This will resolve to the actual home directory at evaluation time +let + # Example home path (will be expanded by evaluator) + config = ~/..config; + file = ~/.bashrc; +in { + # These are just path values that will be expanded + configPath = config; + filePath = file; +} diff --git a/tests/fixtures/if.nix b/tests/fixtures/if.nix new file mode 100644 index 0000000..7e48f38 --- /dev/null +++ b/tests/fixtures/if.nix @@ -0,0 +1,4 @@ +# Conditional test +if true +then 1 +else 2 diff --git a/tests/fixtures/import_lookup.nix b/tests/fixtures/import_lookup.nix new file mode 100644 index 0000000..448b1ea --- /dev/null +++ b/tests/fixtures/import_lookup.nix @@ -0,0 +1,3 @@ +# Test import with lookup path +# Common pattern: import { } +import diff --git a/tests/fixtures/import_simple.nix b/tests/fixtures/import_simple.nix new file mode 100644 index 0000000..1e024cc --- /dev/null +++ b/tests/fixtures/import_simple.nix @@ -0,0 +1,9 @@ +# Test import expression +# Import evaluates the file and returns its value +# Import a file that returns a simple value (42) +import ./simple.nix +# Can also import lookup paths: +# import { } +# Import with path expressions: +# import (./dir + "/file.nix") + diff --git a/tests/fixtures/indented_string.nix b/tests/fixtures/indented_string.nix new file mode 100644 index 0000000..16c6026 --- /dev/null +++ b/tests/fixtures/indented_string.nix @@ -0,0 +1,31 @@ +# Test indented strings (multi-line strings with '' delimiters) +let + # Simple indented string + simple = '' + Hello + World + ''; + + # Indented string with interpolation + name = "Nix"; + greeting = '' + Welcome to ${name}! + This is indented. + ''; + + # Escape sequences + escapes = '' + Literal dollar: ''$ + Literal quotes: ''' + Regular text + ''; + + # Shell script example (common use case) + script = '' + #!/bin/bash + echo "Running script" + ls -la + ''; +in { + inherit simple greeting escapes script; +} diff --git a/tests/fixtures/inherit.nix b/tests/fixtures/inherit.nix new file mode 100644 index 0000000..3382979 --- /dev/null +++ b/tests/fixtures/inherit.nix @@ -0,0 +1,20 @@ +# Test inherit keyword +let + x = 10; + y = 20; + attrs = { + a = 1; + b = 2; + c = 3; + }; +in { + # Basic inherit from outer scope + inherit x y; + + # Inherit from expression + inherit (attrs) a b; + + # Mixed + z = 30; + inherit (attrs) c; +} diff --git a/tests/fixtures/inherit_from.nix b/tests/fixtures/inherit_from.nix new file mode 100644 index 0000000..31c2a00 --- /dev/null +++ b/tests/fixtures/inherit_from.nix @@ -0,0 +1,3 @@ +let + attrs = {a = 1;}; +in {inherit (attrs) a;} diff --git a/tests/fixtures/inherit_simple.nix b/tests/fixtures/inherit_simple.nix new file mode 100644 index 0000000..2806435 --- /dev/null +++ b/tests/fixtures/inherit_simple.nix @@ -0,0 +1,3 @@ +let + x = 10; +in {inherit x;} diff --git a/tests/fixtures/lambda_pattern.nix b/tests/fixtures/lambda_pattern.nix new file mode 100644 index 0000000..6fd3910 --- /dev/null +++ b/tests/fixtures/lambda_pattern.nix @@ -0,0 +1,61 @@ +# Test lambda patterns +let + # Basic destructuring + f1 = { + a, + b, + }: + a + b; + + # With default values + f2 = { + a, + b ? 10, + }: + a + b; + + # With ellipsis (extra fields allowed) + f3 = {a, ...}: a * 2; + + # Named pattern with ellipsis to allow extra fields + f4 = arg @ { + a, + b, + ... + }: + a + b + arg.c; + + # Simple lambda (not a pattern) + f5 = x: x + 1; +in { + # Test basic destructuring + test1 = f1 { + a = 3; + b = 4; + }; + + # Test with defaults (provide both) + test2a = f2 { + a = 5; + b = 6; + }; + + # Test with defaults (use default for b) + test2b = f2 {a = 5;}; + + # Test ellipsis (extra field ignored) + test3 = f3 { + a = 7; + extra = 999; + }; + + # Test named pattern + test4 = f4 { + a = 1; + b = 2; + c = 3; + }; + + # Test simple lambda + test5 = f5 10; +} diff --git a/tests/fixtures/lambda_pattern.nixr b/tests/fixtures/lambda_pattern.nixr new file mode 100644 index 0000000..4e34156 Binary files /dev/null and b/tests/fixtures/lambda_pattern.nixr differ diff --git a/tests/let.nix b/tests/fixtures/let.nix similarity index 86% rename from tests/let.nix rename to tests/fixtures/let.nix index d9c706b..04b80ce 100644 --- a/tests/let.nix +++ b/tests/fixtures/let.nix @@ -2,4 +2,5 @@ let x = 10; y = 20; -in x +in + x diff --git a/tests/fixtures/list_concat.nix b/tests/fixtures/list_concat.nix new file mode 100644 index 0000000..a1b09f1 --- /dev/null +++ b/tests/fixtures/list_concat.nix @@ -0,0 +1,15 @@ +# Test list concatenation operator ++ +let + list1 = [1 2 3]; + list2 = [4 5 6]; + empty = []; +in { + # Basic concatenation + combined = list1 ++ list2; + + # Concatenate with empty list + with_empty = list1 ++ empty; + + # Nested concatenation + triple = [1] ++ [2] ++ [3]; +} diff --git a/tests/fixtures/list_simple.nix b/tests/fixtures/list_simple.nix new file mode 100644 index 0000000..7167967 --- /dev/null +++ b/tests/fixtures/list_simple.nix @@ -0,0 +1,8 @@ +# Test basic list support +let + x = [1 2 3]; + y = [4 5 6]; + z = x ++ y; # List concatenation +in { + inherit x y z; +} diff --git a/tests/logical.nix b/tests/fixtures/logical.nix similarity index 50% rename from tests/logical.nix rename to tests/fixtures/logical.nix index e2e2dac..bae055f 100644 --- a/tests/logical.nix +++ b/tests/fixtures/logical.nix @@ -3,4 +3,8 @@ let x = true; y = false; in - if x && y then 1 else if x || y then 2 else 3 + if x && y + then 1 + else if x || y + then 2 + else 3 diff --git a/tests/fixtures/lookup_path.nix b/tests/fixtures/lookup_path.nix new file mode 100644 index 0000000..a770360 --- /dev/null +++ b/tests/fixtures/lookup_path.nix @@ -0,0 +1,8 @@ +# Test lookup path syntax +# Lookup paths resolve via NIX_PATH environment variable +# Example: -> /nix/var/nix/profiles/per-user/root/channels/nixpkgs +# Simple lookup path + +# Nested lookup path (common pattern) +# + diff --git a/tests/fixtures/lookup_path_nested.nix b/tests/fixtures/lookup_path_nested.nix new file mode 100644 index 0000000..0478b00 --- /dev/null +++ b/tests/fixtures/lookup_path_nested.nix @@ -0,0 +1,3 @@ +# Test nested lookup path +# Common pattern in Nix: or + diff --git a/tests/fixtures/merge.nix b/tests/fixtures/merge.nix new file mode 100644 index 0000000..221d0f6 --- /dev/null +++ b/tests/fixtures/merge.nix @@ -0,0 +1,2 @@ +# Test attrset merge operator (//) +{a = {x = 1;} // {y = 2;};} diff --git a/tests/fixtures/nested_attrs.nix b/tests/fixtures/nested_attrs.nix new file mode 100644 index 0000000..874d08b --- /dev/null +++ b/tests/fixtures/nested_attrs.nix @@ -0,0 +1,13 @@ +# Test nested attribute paths +{ + # Simple nested path + a.b.c = 42; + + # Multiple nested paths + x.y = 1; + x.z = 2; + + # Mix of nested and non-nested + foo = "bar"; + nested.deep.value = 100; +} diff --git a/tests/operators.nix b/tests/fixtures/operators.nix similarity index 100% rename from tests/operators.nix rename to tests/fixtures/operators.nix diff --git a/tests/fixtures/or_in_attrset.nix b/tests/fixtures/or_in_attrset.nix new file mode 100644 index 0000000..d1ab7f9 --- /dev/null +++ b/tests/fixtures/or_in_attrset.nix @@ -0,0 +1,6 @@ +# Test 'or' in attrset context +let + attrs = {a = 1;}; +in { + test = attrs.a or 999; +} diff --git a/tests/fixtures/or_simple.nix b/tests/fixtures/or_simple.nix new file mode 100644 index 0000000..d4de5be --- /dev/null +++ b/tests/fixtures/or_simple.nix @@ -0,0 +1,5 @@ +# Simplest 'or' test +let + x = {a = 1;}; +in + x.a or 2 diff --git a/tests/fixtures/path_concat.nix b/tests/fixtures/path_concat.nix new file mode 100644 index 0000000..682175c --- /dev/null +++ b/tests/fixtures/path_concat.nix @@ -0,0 +1,13 @@ +# Test path concatenation +let + # Path + string = path + p1 = ./foo + "/bar"; + + # String + path = path + p2 = "/prefix" + ./suffix; + + # Path + path = path + p3 = ./dir + ./file; +in { + inherit p1 p2 p3; +} diff --git a/tests/fixtures/precedence.nix b/tests/fixtures/precedence.nix new file mode 100644 index 0000000..3535775 --- /dev/null +++ b/tests/fixtures/precedence.nix @@ -0,0 +1,12 @@ +# Test operator precedence +let + a = 1 + 2 * 3; # Should be 1 + (2 * 3) = 7 + b = 10 - 5 - 2; # Should be (10 - 5) - 2 = 3 + c = true && false || true; # Should be (true && false) || true = true + d = 1 < 2 && 3 > 2; # Should be (1 < 2) && (3 > 2) = true +in { + a = a; + b = b; + c = c; + d = d; +} diff --git a/tests/fixtures/select_or_default.nix b/tests/fixtures/select_or_default.nix new file mode 100644 index 0000000..6144ff5 --- /dev/null +++ b/tests/fixtures/select_or_default.nix @@ -0,0 +1,19 @@ +# Test selection with 'or' default +let + attrs = { + a = 1; + b = 2; + }; +in { + # Attribute exists - should use value from attrs + has_attr = attrs.a or 999; + + # Attribute doesn't exist - should use default + missing_attr = attrs.c or 100; + + # Nested default expression + nested = attrs.d or (attrs.a + attrs.b); + + # Default with literal + with_string = attrs.name or "default_name"; +} diff --git a/tests/fixtures/shortcircuit.nix b/tests/fixtures/shortcircuit.nix new file mode 100644 index 0000000..2326f2c --- /dev/null +++ b/tests/fixtures/shortcircuit.nix @@ -0,0 +1,10 @@ +# Test short-circuit evaluation +let + alwaysFalse = false; + alwaysTrue = true; + x = 10; +in { + and_false = alwaysFalse && alwaysTrue; + or_true = alwaysTrue || alwaysFalse; + impl_false = alwaysFalse -> alwaysFalse; +} diff --git a/tests/shortcircuit2.nix b/tests/fixtures/shortcircuit2.nix similarity index 100% rename from tests/shortcircuit2.nix rename to tests/fixtures/shortcircuit2.nix diff --git a/tests/simple.nix b/tests/fixtures/simple.nix similarity index 100% rename from tests/simple.nix rename to tests/fixtures/simple.nix diff --git a/tests/fixtures/simple_op.nix b/tests/fixtures/simple_op.nix new file mode 100644 index 0000000..e0ef584 --- /dev/null +++ b/tests/fixtures/simple_op.nix @@ -0,0 +1 @@ +1 + 2 diff --git a/tests/fixtures/string_interp.nix b/tests/fixtures/string_interp.nix new file mode 100644 index 0000000..9edc11b --- /dev/null +++ b/tests/fixtures/string_interp.nix @@ -0,0 +1,20 @@ +# Test string interpolation +let + name = "world"; + x = 42; + bool_val = true; +in { + # Simple interpolation + greeting = "Hello ${name}!"; + + # Multiple interpolations + multi = "x is ${x} and name is ${name}"; + + # Expression evaluation in interpolation + computed = "x + 10 = ${x + 10}"; + + bool_check = "${bool_val} is true!"; + + # Just a string, no interpolation + plain = "plain text"; +} diff --git a/tests/fixtures/string_interp.nixr b/tests/fixtures/string_interp.nixr new file mode 100644 index 0000000..0c4d4a2 Binary files /dev/null and b/tests/fixtures/string_interp.nixr differ diff --git a/tests/unary.nix b/tests/fixtures/unary.nix similarity index 61% rename from tests/unary.nix rename to tests/fixtures/unary.nix index cc7ddab..b8e0e3e 100644 --- a/tests/unary.nix +++ b/tests/fixtures/unary.nix @@ -2,5 +2,7 @@ let x = 10; y = true; -in - { neg = -x; not = !y; } +in { + neg = -x; + not = !y; +} diff --git a/tests/fixtures/uri_test.nix b/tests/fixtures/uri_test.nix new file mode 100644 index 0000000..ad93389 --- /dev/null +++ b/tests/fixtures/uri_test.nix @@ -0,0 +1,3 @@ +https://example.com/path?query=1 +#frag + diff --git a/tests/if.nix b/tests/if.nix deleted file mode 100644 index 0ef94a5..0000000 --- a/tests/if.nix +++ /dev/null @@ -1,2 +0,0 @@ -# Conditional test -if true then 1 else 2 diff --git a/tests/if.nixir b/tests/if.nixir deleted file mode 100644 index 4ee0f59..0000000 Binary files a/tests/if.nixir and /dev/null differ diff --git a/tests/inherit.nix b/tests/inherit.nix deleted file mode 100644 index 470cccc..0000000 --- a/tests/inherit.nix +++ /dev/null @@ -1,17 +0,0 @@ -# Test inherit keyword -let - x = 10; - y = 20; - attrs = { a = 1; b = 2; c = 3; }; -in - { - # Basic inherit from outer scope - inherit x y; - - # Inherit from expression - inherit (attrs) a b; - - # Mixed - z = 30; - inherit (attrs) c; - } diff --git a/tests/inherit_from.nix b/tests/inherit_from.nix deleted file mode 100644 index e2d3797..0000000 --- a/tests/inherit_from.nix +++ /dev/null @@ -1,4 +0,0 @@ -let - attrs = { a = 1; }; -in - { inherit (attrs) a; } diff --git a/tests/inherit_simple.nix b/tests/inherit_simple.nix deleted file mode 100644 index 6c004ca..0000000 --- a/tests/inherit_simple.nix +++ /dev/null @@ -1,4 +0,0 @@ -let - x = 10; -in - { inherit x; } diff --git a/tests/integration/flake_ref/flake.nix b/tests/integration/flake_ref/flake.nix new file mode 100644 index 0000000..b820ce6 --- /dev/null +++ b/tests/integration/flake_ref/flake.nix @@ -0,0 +1,19 @@ +{ + description = "Local flake fixture for nixir integration tests"; + + inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + + outputs = { self, nixpkgs }: { + value = 42; + nixosConfigurations.demo = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + ({ ... }: { + networking.hostName = "nixir-demo"; + system.stateVersion = "24.11"; + services.openssh.enable = true; + }) + ]; + }; + }; +} diff --git a/tests/integration/import_test.nix b/tests/integration/import_test.nix new file mode 100644 index 0000000..7914eea --- /dev/null +++ b/tests/integration/import_test.nix @@ -0,0 +1,7 @@ +# Test that import builtin still works +let + imported = import ./imported_module.nix; +in { + value = imported.foo + 100; + nested = imported.bar.baz; +} diff --git a/tests/integration/imported_module.nix b/tests/integration/imported_module.nix new file mode 100644 index 0000000..a9b969c --- /dev/null +++ b/tests/integration/imported_module.nix @@ -0,0 +1,7 @@ +# Module to be imported +{ + foo = 42; + bar = { + baz = "hello"; + }; +} diff --git a/tests/integration/ir_builtins_test.nix b/tests/integration/ir_builtins_test.nix new file mode 100644 index 0000000..4aa28e3 --- /dev/null +++ b/tests/integration/ir_builtins_test.nix @@ -0,0 +1,13 @@ +# Test our custom IR builtins +let + # Test nixIR_info + info = builtins.nixIR_info; + + # Test nixIR_compile + compiled = builtins.nixIR_compile "let x = 10; in x + 5"; + + # Test that normal builtins still work + list = builtins.map (x: x * 2) [1 2 3]; +in { + inherit info compiled list; +} diff --git a/tests/integration/regression_normal_nix.nix b/tests/integration/regression_normal_nix.nix new file mode 100644 index 0000000..2eb73f6 --- /dev/null +++ b/tests/integration/regression_normal_nix.nix @@ -0,0 +1,39 @@ +# Test that normal Nix evaluation is not broken +# This file should work identically with or without the plugin +let + # Basic arithmetic + math = 1 + 2 * 3; + + # String operations + str = "hello" + " " + "world"; + + # List operations + list = [1 2 3] ++ [4 5 6]; + + # Attrset operations + attrs = + { + a = 1; + b = 2; + } + // {c = 3;}; + + # Functions + double = x: x * 2; + result = double 21; + + # Conditionals + cond = + if true + then "yes" + else "no"; + + # Let bindings + nested = let + x = 10; + y = 20; + in + x + y; +in { + inherit math str list attrs result cond nested; +} diff --git a/tests/integration/run.sh b/tests/integration/run.sh new file mode 100755 index 0000000..bbdea6c --- /dev/null +++ b/tests/integration/run.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "" + +PLUGIN_PATH="$(pwd)/build/nix-ir-plugin.so" +TEST_DIR="$(pwd)/tests/integration" + +if [ ! -f "$PLUGIN_PATH" ]; then + echo "ERROR: Plugin not found at $PLUGIN_PATH" + exit 1 +fi + +echo "Plugin path: $PLUGIN_PATH" +echo "" + +echo "Test 1: Plugin Loading" +echo "----------------------" +if nix-instantiate --plugin-files "$PLUGIN_PATH" --eval "$TEST_DIR/simple_eval.nix" 2>&1 | grep -q "30"; then + echo "[PASS] Plugin loads and evaluates correctly" +else + echo "[FAIL] Plugin failed to load or evaluate" + exit 1 +fi +echo "" + +echo "Test 2: Normal Nix Evaluation (No Plugin)" +echo "------------------------------------------" +result=$(nix-instantiate --eval --strict --json "$TEST_DIR/regression_normal_nix.nix" 2>&1) +if echo "$result" | grep -q '"math":7'; then + echo "[PASS] Normal Nix evaluation works without plugin" +else + echo "[FAIL] Normal Nix evaluation broken" + echo "$result" + exit 1 +fi +echo "" + +echo "Test 3: Normal Nix Evaluation (With Plugin)" +echo "--------------------------------------------" +result=$(nix-instantiate --plugin-files "$PLUGIN_PATH" --eval --strict --json "$TEST_DIR/regression_normal_nix.nix" 2>&1) +if echo "$result" | grep -q '"math":7'; then + echo "[PASS] Normal Nix evaluation works with plugin loaded" +else + echo "[FAIL] Plugin breaks normal Nix evaluation" + echo "$result" + exit 1 +fi +echo "" + +echo "Test 4: Import Builtin" +echo "----------------------" +cd "$TEST_DIR" +result=$(nix-instantiate --plugin-files "$PLUGIN_PATH" --eval --strict --json import_test.nix 2>&1) +if echo "$result" | grep -q '"value":142'; then + echo "[PASS] Import builtin works correctly" +else + echo "[FAIL] Import builtin broken" + echo "$result" + exit 1 +fi +cd - >/dev/null +echo "" + +echo "Test 5: IR Builtins Available" +echo "------------------------------" +result=$(nix-instantiate --plugin-files "$PLUGIN_PATH" --eval "$TEST_DIR/ir_builtins_test.nix" 2>&1) +if echo "$result" | grep -q "info.*="; then + echo "[PASS] IR builtins (nixIR_info, nixIR_compile, nixIR_loadIR) available" +else + echo "[WARN] IR builtins may not be available (check plugin initialization)" +fi +echo "" + +echo "Test 6: Flake Reference Compilation" +echo "-----------------------------------" +flake_ir=$(mktemp /tmp/nixir-flake-value-XXXXXX.nixir) +"$(pwd)/build/nix-irc" "$TEST_DIR/flake_ref#value" "$flake_ir" +result=$(nix-instantiate --plugin-files "$PLUGIN_PATH" --eval --strict --json --expr "builtins.nixIR_loadIR \"$flake_ir\"" 2>&1) +if echo "$result" | grep -q '^42$'; then + echo "[PASS] Flake reference compiles and evaluates correctly" +else + echo "[FAIL] Flake reference compilation broken" + echo "$result" + exit 1 +fi +echo "" + +echo "Test 7: NixOS Configuration Attribute Path" +echo "------------------------------------------" +config_ir=$(mktemp /tmp/nixir-flake-config-XXXXXX.nixir) +"$(pwd)/build/nix-irc" "$TEST_DIR/flake_ref#nixosConfigurations.demo.config.networking.hostName" "$config_ir" +result=$(nix-instantiate --plugin-files "$PLUGIN_PATH" --eval --strict --json --expr "builtins.nixIR_loadIR \"$config_ir\"" 2>&1) +if echo "$result" | grep -q '"nixir-demo"'; then + echo "[PASS] Nested flake attribute selection works for nixosConfigurations" +else + echo "[FAIL] NixOS configuration flake selection broken" + echo "$result" + exit 1 +fi +echo "" + +echo "Integration Tests Complete" diff --git a/tests/integration/simple_eval.nix b/tests/integration/simple_eval.nix new file mode 100644 index 0000000..9dc2744 --- /dev/null +++ b/tests/integration/simple_eval.nix @@ -0,0 +1,6 @@ +# Simple expression to test plugin loading +let + x = 10; + y = 20; +in + x + y diff --git a/tests/lambda_pattern.nix b/tests/lambda_pattern.nix deleted file mode 100644 index dfbef9b..0000000 --- a/tests/lambda_pattern.nix +++ /dev/null @@ -1,36 +0,0 @@ -# Test lambda patterns -let - # Basic destructuring - f1 = { a, b }: a + b; - - # With default values - f2 = { a, b ? 10 }: a + b; - - # With ellipsis (extra fields allowed) - f3 = { a, ... }: a * 2; - - # Named pattern with ellipsis to allow extra fields - f4 = arg@{ a, b, ... }: a + b + arg.c; - - # Simple lambda (not a pattern) - f5 = x: x + 1; -in - { - # Test basic destructuring - test1 = f1 { a = 3; b = 4; }; - - # Test with defaults (provide both) - test2a = f2 { a = 5; b = 6; }; - - # Test with defaults (use default for b) - test2b = f2 { a = 5; }; - - # Test ellipsis (extra field ignored) - test3 = f3 { a = 7; extra = 999; }; - - # Test named pattern - test4 = f4 { a = 1; b = 2; c = 3; }; - - # Test simple lambda - test5 = f5 10; - } diff --git a/tests/language/interp_test.nix b/tests/language/interp_test.nix new file mode 100644 index 0000000..73b4c16 --- /dev/null +++ b/tests/language/interp_test.nix @@ -0,0 +1,2 @@ +# Test string interpolation +let x = "world"; in "Hello ${x}!" diff --git a/tests/language/pattern_test.nix b/tests/language/pattern_test.nix new file mode 100644 index 0000000..e654f6c --- /dev/null +++ b/tests/language/pattern_test.nix @@ -0,0 +1,5 @@ +# Test lambda patterns +({ + name, + version ? "1.0", +}: "${name}-${version}") {name = "test";} diff --git a/tests/let.nixir b/tests/let.nixir deleted file mode 100644 index cb9dd41..0000000 Binary files a/tests/let.nixir and /dev/null differ diff --git a/tests/logical.nixir b/tests/logical.nixir deleted file mode 100644 index 010a5f5..0000000 Binary files a/tests/logical.nixir and /dev/null differ diff --git a/tests/operators.nixir b/tests/operators.nixir deleted file mode 100644 index f71f899..0000000 Binary files a/tests/operators.nixir and /dev/null differ diff --git a/tests/precedence.nix b/tests/precedence.nix deleted file mode 100644 index 2949308..0000000 --- a/tests/precedence.nix +++ /dev/null @@ -1,8 +0,0 @@ -# Test operator precedence -let - a = 1 + 2 * 3; # Should be 1 + (2 * 3) = 7 - b = 10 - 5 - 2; # Should be (10 - 5) - 2 = 3 - c = true && false || true; # Should be (true && false) || true = true - d = 1 < 2 && 3 > 2; # Should be (1 < 2) && (3 > 2) = true -in - { a = a; b = b; c = c; d = d; } diff --git a/tests/precedence.nixir b/tests/precedence.nixir deleted file mode 100644 index de1b0d4..0000000 Binary files a/tests/precedence.nixir and /dev/null differ diff --git a/tests/regression_test.cpp b/tests/regression_test.cpp index 10123a0..267837e 100644 --- a/tests/regression_test.cpp +++ b/tests/regression_test.cpp @@ -1,3 +1,5 @@ +#include "irc/lexer.h" +#include "irc/parser.h" #include "irc/serializer.h" #include "irc/types.h" #include @@ -7,21 +9,21 @@ using namespace nix_irc; int failures = 0; -#define TEST_CHECK(cond, msg) \ - do { \ - if (!(cond)) { \ - std::cerr << " FAIL: " << msg << std::endl; \ - failures++; \ - } else { \ - std::cout << " PASS: " << msg << std::endl; \ - } \ +#define TEST_CHECK(cond, msg) \ + do { \ + if (!(cond)) { \ + std::cerr << " FAIL: " << msg << std::endl; \ + failures++; \ + } else { \ + std::cout << " PASS: " << msg << std::endl; \ + } \ } while (0) #define TEST_PASS(msg) std::cout << " PASS: " << msg << std::endl -#define TEST_FAIL(msg) \ - do { \ - std::cerr << " FAIL: " << msg << std::endl; \ - failures++; \ +#define TEST_FAIL(msg) \ + do { \ + std::cerr << " FAIL: " << msg << std::endl; \ + failures++; \ } while (0) void test_enum_compatibility() { @@ -30,33 +32,27 @@ void test_enum_compatibility() { if (static_cast(NodeType::WITH) == 0x32) { std::cout << " PASS: WITH has correct value 0x32" << std::endl; } else { - std::cerr << " FAIL: WITH should be 0x32, got " - << static_cast(NodeType::WITH) << std::endl; + std::cerr << " FAIL: WITH should be 0x32, got " << static_cast(NodeType::WITH) + << std::endl; } if (static_cast(NodeType::HAS_ATTR) == 0x34) { - std::cout << " PASS: HAS_ATTR has value 0x34 (new slot after WITH bump)" - << std::endl; + std::cout << " PASS: HAS_ATTR has value 0x34 (new slot after WITH bump)" << std::endl; } else if (static_cast(NodeType::HAS_ATTR) == 0x33 && static_cast(NodeType::WITH) == 0x32) { std::cout << " PASS: HAS_ATTR has value 0x33 (restored original with WITH " "at 0x32)" << std::endl; } else { - std::cerr << " FAIL: HAS_ATTR value is " - << static_cast(NodeType::HAS_ATTR) + std::cerr << " FAIL: HAS_ATTR value is " << static_cast(NodeType::HAS_ATTR) << " (expected 0x34 or 0x33 with WITH=0x32)" << std::endl; } - if (IR_VERSION == 2) { - std::cout << " PASS: IR_VERSION bumped to 2 for breaking change" - << std::endl; - } else if (static_cast(NodeType::WITH) == 0x32) { - std::cout << " PASS: IR_VERSION unchanged but WITH restored to 0x32" - << std::endl; + if (IR_VERSION == 3) { + std::cout << " PASS: IR_VERSION is 3" << std::endl; } else { - std::cerr << " FAIL: Either bump IR_VERSION or fix enum values" - << std::endl; + std::cerr << " FAIL: IR_VERSION should be 3, got " << IR_VERSION << std::endl; + failures++; } } @@ -80,19 +76,16 @@ void test_serializer_select_with_default() { Deserializer deser; auto loaded = deser.deserialize(bytes); - auto *loaded_select = loaded.entry->get_if(); - if (loaded_select && loaded_select->default_expr && - *loaded_select->default_expr) { - auto *def_val = (*loaded_select->default_expr)->get_if(); + auto* loaded_select = loaded.entry->get_if(); + if (loaded_select && loaded_select->default_expr && *loaded_select->default_expr) { + auto* def_val = (*loaded_select->default_expr)->get_if(); if (def_val && def_val->value == 100) { - std::cout << " PASS: SELECT with default_expr round-trips correctly" - << std::endl; + std::cout << " PASS: SELECT with default_expr round-trips correctly" << std::endl; } else { std::cerr << " FAIL: default_expr value incorrect" << std::endl; } } else { - std::cerr << " FAIL: default_expr not deserialized (missing u8 flag read)" - << std::endl; + std::cerr << " FAIL: default_expr not deserialized (missing u8 flag read)" << std::endl; } } @@ -114,11 +107,9 @@ void test_serializer_select_without_default() { Deserializer deser; auto loaded = deser.deserialize(bytes); - auto *loaded_select = loaded.entry->get_if(); - if (loaded_select && - (!loaded_select->default_expr || !*loaded_select->default_expr)) { - std::cout << " PASS: SELECT without default_expr round-trips correctly" - << std::endl; + auto* loaded_select = loaded.entry->get_if(); + if (loaded_select && (!loaded_select->default_expr || !*loaded_select->default_expr)) { + std::cout << " PASS: SELECT without default_expr round-trips correctly" << std::endl; } else { std::cerr << " FAIL: default_expr should be null/absent" << std::endl; } @@ -127,38 +118,617 @@ void test_serializer_select_without_default() { void test_parser_brace_depth_in_strings() { std::cout << "> Parser brace depth handling in strings..." << std::endl; - std::string test_input = R"( - let s = "test}"; in ${s} - )"; + std::string test_input = R"(let s = "test}"; in s)"; - std::cout << " Test input contains '}' inside string - should not end " - "interpolation" - << std::endl; - std::cout << " NOTE: This test requires running through actual parser" - << std::endl; + try { + Parser parser; + auto ast = parser.parse(test_input); + TEST_PASS("Brace inside string does not confuse parser"); + } catch (const std::exception& e) { + TEST_FAIL("Parser should handle '}' inside strings"); + } } void test_parser_has_ellipsis_usage() { std::cout << "> Parser has_ellipsis usage..." << std::endl; - std::cout << " NOTE: LambdaNode should have strict_pattern field when " - "has_ellipsis is false" - << std::endl; - std::cout << " This requires checking the parser output for strict patterns" - << std::endl; + std::string with_ellipsis = "{ a, ... }: a"; + std::string without_ellipsis = "{ a, b }: a + b"; + + try { + Parser parser1; + auto ast1 = parser1.parse(with_ellipsis); + TEST_PASS("Pattern with ellipsis parses correctly"); + + Parser parser2; + auto ast2 = parser2.parse(without_ellipsis); + TEST_PASS("Pattern without ellipsis parses correctly"); + } catch (const std::exception& e) { + TEST_FAIL("Pattern parsing failed"); + } } void test_parser_expect_in_speculative_parsing() { std::cout << "> Parser expect() in speculative parsing..." << std::endl; - std::cout << " NOTE: try_parse_lambda should not throw on non-lambda input" - << std::endl; - std::cout << " This requires testing parser with invalid lambda patterns" - << std::endl; + std::string not_a_lambda = "1 + 2"; + std::string actual_lambda = "x: x + 1"; + + try { + Parser parser1; + auto ast1 = parser1.parse(not_a_lambda); + TEST_PASS("Non-lambda input does not cause parser to throw"); + + Parser parser2; + auto ast2 = parser2.parse(actual_lambda); + TEST_PASS("Actual lambda parses correctly"); + } catch (const std::exception& e) { + TEST_FAIL("Parser should handle both lambda and non-lambda input"); + } +} + +void test_implication_right_associativity() { + std::cout << "> Implication right associativity..." << std::endl; + + Parser parser; + auto ast = parser.parse("a -> b -> c"); + + auto* outer = ast->get_if(); + TEST_CHECK(outer != nullptr, "Top-level node is BinaryOpNode"); + TEST_CHECK(outer && outer->op == BinaryOp::IMPL, "Top-level operator is implication"); + + if (outer) { + auto* left = outer->left->get_if(); + auto* right = outer->right->get_if(); + TEST_CHECK(left != nullptr && left->name && *left->name == "a", "Left branch is variable 'a'"); + TEST_CHECK(right != nullptr && right->op == BinaryOp::IMPL, + "Right branch is nested implication"); + } +} + +void test_lookup_path_lexer_position() { + std::cout << "> Lookup path lexer position..." << std::endl; + + Lexer lexer(" x"); + auto tokens = lexer.tokenize(); + + TEST_CHECK(tokens.size() >= 3, "Lexer produced lookup path, identifier, and EOF"); + TEST_CHECK(tokens[0].type == Token::LOOKUP_PATH, "First token is LOOKUP_PATH"); + TEST_CHECK(tokens[1].type == Token::IDENT && tokens[1].value == "x", + "Second token is identifier 'x'"); + TEST_CHECK(tokens[1].col == 11, "Identifier column reflects consumed lookup path width"); +} + +void test_unterminated_block_comment_rejected() { + std::cout << "> Unterminated block comment rejection..." << std::endl; + + try { + Lexer lexer("/* unterminated"); + auto tokens = lexer.tokenize(); + (void) tokens; + TEST_FAIL("Lexer should reject unterminated block comments"); + } catch (const std::exception& e) { + TEST_PASS("Lexer rejects unterminated block comments"); + } +} + +void test_unknown_character_rejected() { + std::cout << "> Unknown character rejection..." << std::endl; + + try { + Lexer lexer("1 $ 2"); + auto tokens = lexer.tokenize(); + (void) tokens; + TEST_FAIL("Lexer should reject unexpected characters"); + } catch (const std::exception& e) { + TEST_PASS("Lexer rejects unexpected characters"); + } +} + +void test_lookup_path_node() { + std::cout << "> Lookup path serialization..." << std::endl; + + auto lookup = std::make_shared(ConstLookupPathNode("nixpkgs")); + IRModule module; + module.entry = lookup; + + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + auto* loaded_lookup = loaded.entry->get_if(); + TEST_CHECK(loaded_lookup != nullptr, "Deserialized node is ConstLookupPathNode"); + TEST_CHECK(loaded_lookup && loaded_lookup->value == "nixpkgs", "Lookup path value is 'nixpkgs'"); +} + +void test_import_node() { + std::cout << "> Import node serialization..." << std::endl; + + auto path = std::make_shared(ConstPathNode("./test.nix")); + auto import_node = std::make_shared(ImportNode(path)); + IRModule module; + module.entry = import_node; + + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + auto* loaded_import = loaded.entry->get_if(); + TEST_CHECK(loaded_import != nullptr, "Deserialized node is ImportNode"); + TEST_CHECK(loaded_import && loaded_import->path != nullptr, "Import node has path"); + + if (loaded_import && loaded_import->path) { + auto* path_node = loaded_import->path->get_if(); + TEST_CHECK(path_node != nullptr, "Import path is ConstPathNode"); + TEST_CHECK(path_node && path_node->value == "./test.nix", "Import path value is './test.nix'"); + } +} + +void test_import_with_lookup_path() { + std::cout << "> Import with lookup path..." << std::endl; + + auto lookup = std::make_shared(ConstLookupPathNode("nixpkgs")); + auto import_node = std::make_shared(ImportNode(lookup)); + IRModule module; + module.entry = import_node; + + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + auto* loaded_import = loaded.entry->get_if(); + TEST_CHECK(loaded_import != nullptr, "Deserialized node is ImportNode"); + + if (loaded_import && loaded_import->path) { + auto* lookup_node = loaded_import->path->get_if(); + TEST_CHECK(lookup_node != nullptr, "Import path is ConstLookupPathNode"); + TEST_CHECK(lookup_node && lookup_node->value == "nixpkgs", "Lookup path value is 'nixpkgs'"); + } +} + +void test_relative_path_import_parsing() { + std::cout << "> Relative path import parsing..." << std::endl; + + Parser parser; + auto ast = parser.parse("import ./simple.nix"); + + auto* import_node = ast->get_if(); + TEST_CHECK(import_node != nullptr, "Parsed expression is ImportNode"); + + if (import_node && import_node->path) { + auto* path_node = import_node->path->get_if(); + TEST_CHECK(path_node != nullptr, "Import argument is ConstPathNode"); + TEST_CHECK(path_node && path_node->value == "./simple.nix", + "Relative path is preserved as './simple.nix'"); + } +} + +void test_builtin_call_node() { + std::cout << "> BuiltinCallNode serialization..." << std::endl; + + auto arg = std::make_shared(ConstStringNode("/tmp/example-flake")); + auto builtin = + std::make_shared(BuiltinCallNode("getFlake", std::vector>{arg})); + + IRModule module; + module.entry = builtin; + + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + auto* loaded_builtin = loaded.entry->get_if(); + TEST_CHECK(loaded_builtin != nullptr, "Deserialized node is BuiltinCallNode"); + TEST_CHECK(loaded_builtin && loaded_builtin->builtin_name == "getFlake", + "Builtin name is 'getFlake'"); + TEST_CHECK(loaded_builtin && loaded_builtin->args.size() == 1, "Builtin has one argument"); + + if (loaded_builtin && loaded_builtin->args.size() == 1) { + auto* loaded_arg = loaded_builtin->args[0]->get_if(); + TEST_CHECK(loaded_arg != nullptr, "Builtin argument is ConstStringNode"); + TEST_CHECK(loaded_arg && loaded_arg->value == "/tmp/example-flake", + "Builtin argument value round-trips"); + } +} + +void test_uri_node() { + std::cout << "> URI node serialization..." << std::endl; + + auto uri = std::make_shared(ConstURINode("https://example.com")); + IRModule module; + module.entry = uri; + + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + auto* loaded_uri = loaded.entry->get_if(); + TEST_CHECK(loaded_uri != nullptr, "Deserialized node is ConstURINode"); + TEST_CHECK(loaded_uri && loaded_uri->value == "https://example.com", + "URI value is 'https://example.com'"); +} + +void test_float_node() { + std::cout << "> Float node serialization..." << std::endl; + + auto float_val = std::make_shared(ConstFloatNode(3.14159)); + IRModule module; + module.entry = float_val; + + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + auto* loaded_float = loaded.entry->get_if(); + TEST_CHECK(loaded_float != nullptr, "Deserialized node is ConstFloatNode"); + TEST_CHECK(loaded_float && loaded_float->value > 3.14 && loaded_float->value < 3.15, + "Float value is approximately 3.14159"); +} + +// LambdaPatternNode Tests +void test_lambda_pattern_simple() { + std::cout << "> LambdaPatternNode simple ({ a, b }: a + b)..." << std::endl; + + // Body: a + b (using VarNode for a and b) + auto var_a = std::make_shared(VarNode(0, "a")); + auto var_b = std::make_shared(VarNode(0, "b")); + auto body = std::make_shared(BinaryOpNode(BinaryOp::ADD, var_a, var_b)); + + // Create lambda pattern with two required fields + LambdaPatternNode lambda_pattern(body); + lambda_pattern.required_fields.emplace_back("a", std::nullopt); + lambda_pattern.required_fields.emplace_back("b", std::nullopt); + lambda_pattern.allow_extra = false; + + auto node = std::make_shared(std::move(lambda_pattern)); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is LambdaPatternNode"); + TEST_CHECK(loaded_node && loaded_node->required_fields.size() == 2, "Has 2 required fields"); + TEST_CHECK(loaded_node && loaded_node->optional_fields.size() == 0, "Has 0 optional fields"); + TEST_CHECK(loaded_node && loaded_node->required_fields[0].name == "a", "First field is 'a'"); + TEST_CHECK(loaded_node && loaded_node->required_fields[1].name == "b", "Second field is 'b'"); + TEST_CHECK(loaded_node && !loaded_node->at_binding.has_value(), "No at-binding"); + TEST_CHECK(loaded_node && !loaded_node->allow_extra, "No ellipsis"); + TEST_CHECK(loaded_node && loaded_node->body != nullptr, "Has body"); +} + +void test_lambda_pattern_with_defaults() { + std::cout << "> LambdaPatternNode with defaults ({ a, b ? 10 }: a + b)..." << std::endl; + + // Default value for b + auto default_b = std::make_shared(ConstIntNode(10)); + + // Body: a + b + auto var_a = std::make_shared(VarNode(0, "a")); + auto var_b = std::make_shared(VarNode(0, "b")); + auto body = std::make_shared(BinaryOpNode(BinaryOp::ADD, var_a, var_b)); + + // Create lambda pattern + LambdaPatternNode lambda_pattern(body); + lambda_pattern.required_fields.emplace_back("a", std::nullopt); + lambda_pattern.optional_fields.emplace_back("b", default_b); + lambda_pattern.allow_extra = false; + + auto node = std::make_shared(std::move(lambda_pattern)); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is LambdaPatternNode"); + TEST_CHECK(loaded_node && loaded_node->required_fields.size() == 1, "Has 1 required field"); + TEST_CHECK(loaded_node && loaded_node->optional_fields.size() == 1, "Has 1 optional field"); + TEST_CHECK(loaded_node && loaded_node->required_fields[0].name == "a", "Required field is 'a'"); + TEST_CHECK(loaded_node && loaded_node->optional_fields[0].name == "b", "Optional field is 'b'"); + TEST_CHECK(loaded_node && loaded_node->optional_fields[0].default_value.has_value(), + "Optional field has default"); + + if (loaded_node && loaded_node->optional_fields[0].default_value) { + auto* def_val = (*loaded_node->optional_fields[0].default_value)->get_if(); + TEST_CHECK(def_val && def_val->value == 10, "Default value is 10"); + } +} + +void test_lambda_pattern_at_binding() { + std::cout << "> LambdaPatternNode with at-binding (args@{ a, b }: args.a)..." << std::endl; + + // Body: args.a (select expression) + auto var_args = std::make_shared(VarNode(0, "args")); + auto attr = std::make_shared(ConstStringNode("a")); + auto body = std::make_shared(SelectNode(var_args, attr)); + + // Create lambda pattern with at-binding + LambdaPatternNode lambda_pattern(body); + lambda_pattern.required_fields.emplace_back("a", std::nullopt); + lambda_pattern.required_fields.emplace_back("b", std::nullopt); + lambda_pattern.at_binding = "args"; + lambda_pattern.allow_extra = false; + + auto node = std::make_shared(std::move(lambda_pattern)); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is LambdaPatternNode"); + TEST_CHECK(loaded_node && loaded_node->at_binding.has_value(), "Has at-binding"); + TEST_CHECK(loaded_node && loaded_node->at_binding.value() == "args", "At-binding is 'args'"); +} + +void test_lambda_pattern_ellipsis() { + std::cout << "> LambdaPatternNode with ellipsis ({ a, ... }: a)..." << std::endl; + + // Body: a + auto body = std::make_shared(VarNode(0, "a")); + + // Create lambda pattern with ellipsis + LambdaPatternNode lambda_pattern(body); + lambda_pattern.required_fields.emplace_back("a", std::nullopt); + lambda_pattern.allow_extra = true; + + auto node = std::make_shared(std::move(lambda_pattern)); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is LambdaPatternNode"); + TEST_CHECK(loaded_node && loaded_node->allow_extra, "Has ellipsis (allow_extra=true)"); +} + +void test_lambda_pattern_complete() { + std::cout << "> LambdaPatternNode complete (args@{ a, b ? 5, ... }: body)..." << std::endl; + + // Default value for b + auto default_b = std::make_shared(ConstIntNode(5)); + + // Body: simple var + auto body = std::make_shared(VarNode(0, "x")); + + // Create lambda pattern with all features + LambdaPatternNode lambda_pattern(body); + lambda_pattern.required_fields.emplace_back("a", std::nullopt); + lambda_pattern.optional_fields.emplace_back("b", default_b); + lambda_pattern.at_binding = "args"; + lambda_pattern.allow_extra = true; + + auto node = std::make_shared(std::move(lambda_pattern)); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify all fields + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is LambdaPatternNode"); + TEST_CHECK(loaded_node && loaded_node->required_fields.size() == 1, "Has 1 required field"); + TEST_CHECK(loaded_node && loaded_node->optional_fields.size() == 1, "Has 1 optional field"); + TEST_CHECK(loaded_node && loaded_node->at_binding.has_value(), "Has at-binding"); + TEST_CHECK(loaded_node && loaded_node->at_binding.value() == "args", "At-binding is 'args'"); + TEST_CHECK(loaded_node && loaded_node->allow_extra, "Has ellipsis"); +} + +void test_lambda_pattern_empty() { + std::cout << "> LambdaPatternNode empty ({ }: body)..." << std::endl; + + // Body: simple constant + auto body = std::make_shared(ConstIntNode(42)); + + // Create empty lambda pattern + LambdaPatternNode lambda_pattern(body); + lambda_pattern.allow_extra = false; + + auto node = std::make_shared(std::move(lambda_pattern)); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is LambdaPatternNode"); + TEST_CHECK(loaded_node && loaded_node->required_fields.size() == 0, "Has 0 required fields"); + TEST_CHECK(loaded_node && loaded_node->optional_fields.size() == 0, "Has 0 optional fields"); + TEST_CHECK(loaded_node && !loaded_node->at_binding.has_value(), "No at-binding"); + TEST_CHECK(loaded_node && !loaded_node->allow_extra, "No ellipsis"); +} + +// StringInterpolationNode Tests + +void test_string_interpolation_simple() { + std::cout << "> StringInterpolationNode simple (\"hello ${name}\")..." << std::endl; + + // "hello ${name}" = literal "hello " + expr(name) + std::vector parts; + parts.push_back(StringPart::make_literal("hello ")); + parts.push_back(StringPart::make_expr(std::make_shared(VarNode(0, "name")))); + + auto node = std::make_shared(StringInterpolationNode(std::move(parts))); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is StringInterpolationNode"); + TEST_CHECK(loaded_node && loaded_node->parts.size() == 2, "Has 2 parts"); + TEST_CHECK(loaded_node && loaded_node->parts[0].type == StringPart::Type::LITERAL, + "First part is LITERAL"); + TEST_CHECK(loaded_node && loaded_node->parts[0].literal == "hello ", "First part is 'hello '"); + TEST_CHECK(loaded_node && loaded_node->parts[1].type == StringPart::Type::EXPR, + "Second part is EXPR"); + TEST_CHECK(loaded_node && loaded_node->parts[1].expr != nullptr, "Second part has expression"); +} + +void test_string_interpolation_multiple() { + std::cout << "> StringInterpolationNode multiple (\"${a} and ${b}\")..." << std::endl; + + // "${a} and ${b}" = expr(a) + literal " and " + expr(b) + std::vector parts; + parts.push_back(StringPart::make_expr(std::make_shared(VarNode(0, "a")))); + parts.push_back(StringPart::make_literal(" and ")); + parts.push_back(StringPart::make_expr(std::make_shared(VarNode(0, "b")))); + + auto node = std::make_shared(StringInterpolationNode(std::move(parts))); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is StringInterpolationNode"); + TEST_CHECK(loaded_node && loaded_node->parts.size() == 3, "Has 3 parts"); + TEST_CHECK(loaded_node && loaded_node->parts[0].type == StringPart::Type::EXPR, "Part 0 is EXPR"); + TEST_CHECK(loaded_node && loaded_node->parts[1].type == StringPart::Type::LITERAL, + "Part 1 is LITERAL"); + TEST_CHECK(loaded_node && loaded_node->parts[1].literal == " and ", "Part 1 is ' and '"); + TEST_CHECK(loaded_node && loaded_node->parts[2].type == StringPart::Type::EXPR, "Part 2 is EXPR"); +} + +void test_string_interpolation_complex() { + std::cout << "> StringInterpolationNode complex (\"result: ${a + b}\")..." << std::endl; + + // "result: ${a + b}" = literal "result: " + expr(a + b) + auto expr_a = std::make_shared(VarNode(0, "a")); + auto expr_b = std::make_shared(VarNode(0, "b")); + auto add_expr = std::make_shared(BinaryOpNode(BinaryOp::ADD, expr_a, expr_b)); + + std::vector parts; + parts.push_back(StringPart::make_literal("result: ")); + parts.push_back(StringPart::make_expr(add_expr)); + + auto node = std::make_shared(StringInterpolationNode(std::move(parts))); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is StringInterpolationNode"); + TEST_CHECK(loaded_node && loaded_node->parts.size() == 2, "Has 2 parts"); + TEST_CHECK(loaded_node && loaded_node->parts[1].type == StringPart::Type::EXPR, "Part 1 is EXPR"); + + // Verify the expression is a BinaryOpNode + if (loaded_node && loaded_node->parts[1].expr) { + auto* bin_op = loaded_node->parts[1].expr->get_if(); + TEST_CHECK(bin_op != nullptr, "Expression is BinaryOpNode"); + TEST_CHECK(bin_op && bin_op->op == BinaryOp::ADD, "Operation is ADD"); + } +} + +void test_string_interpolation_nested() { + std::cout << "> StringInterpolationNode nested (\"${prefix}/${path}\")..." << std::endl; + + // "${prefix}/${path}" = expr(prefix) + literal "/" + expr(path) + std::vector parts; + parts.push_back(StringPart::make_expr(std::make_shared(VarNode(0, "prefix")))); + parts.push_back(StringPart::make_literal("/")); + parts.push_back(StringPart::make_expr(std::make_shared(VarNode(0, "path")))); + + auto node = std::make_shared(StringInterpolationNode(std::move(parts))); + + // Serialize + IRModule module; + module.entry = node; + Serializer ser; + auto bytes = ser.serialize_to_bytes(module); + + // Deserialize + Deserializer deser; + auto loaded = deser.deserialize(bytes); + + // Verify + auto* loaded_node = loaded.entry->get_if(); + TEST_CHECK(loaded_node != nullptr, "Type is StringInterpolationNode"); + TEST_CHECK(loaded_node && loaded_node->parts.size() == 3, "Has 3 parts"); + TEST_CHECK(loaded_node && loaded_node->parts[1].type == StringPart::Type::LITERAL, + "Middle part is LITERAL"); + TEST_CHECK(loaded_node && loaded_node->parts[1].literal == "/", "Middle part is '/'"); } int main() { - std::cout << "=== Regression Tests for Nixir ===" << std::endl << std::endl; + std::cout << "=== Regression Tests ===" << std::endl << std::endl; test_enum_compatibility(); std::cout << std::endl; @@ -178,6 +748,69 @@ int main() { test_parser_expect_in_speculative_parsing(); std::cout << std::endl; + test_implication_right_associativity(); + std::cout << std::endl; + + test_lookup_path_lexer_position(); + std::cout << std::endl; + + test_unterminated_block_comment_rejected(); + std::cout << std::endl; + + test_unknown_character_rejected(); + std::cout << std::endl; + + test_lookup_path_node(); + std::cout << std::endl; + + test_import_node(); + std::cout << std::endl; + + test_import_with_lookup_path(); + std::cout << std::endl; + + test_relative_path_import_parsing(); + std::cout << std::endl; + + test_builtin_call_node(); + std::cout << std::endl; + + test_uri_node(); + std::cout << std::endl; + + test_float_node(); + std::cout << std::endl; + + test_lambda_pattern_simple(); + std::cout << std::endl; + + test_lambda_pattern_with_defaults(); + std::cout << std::endl; + + test_lambda_pattern_at_binding(); + std::cout << std::endl; + + test_lambda_pattern_ellipsis(); + std::cout << std::endl; + + test_lambda_pattern_complete(); + std::cout << std::endl; + + test_lambda_pattern_empty(); + std::cout << std::endl; + + test_string_interpolation_simple(); + std::cout << std::endl; + + test_string_interpolation_multiple(); + std::cout << std::endl; + + test_string_interpolation_complex(); + std::cout << std::endl; + + test_string_interpolation_nested(); + std::cout << std::endl; + std::cout << "=== Tests Complete ===" << std::endl; std::cout << "Failures: " << failures << std::endl; return failures > 0 ? 1 : 0; diff --git a/tests/shortcircuit.nix b/tests/shortcircuit.nix deleted file mode 100644 index 063bf70..0000000 --- a/tests/shortcircuit.nix +++ /dev/null @@ -1,11 +0,0 @@ -# Test short-circuit evaluation -let - alwaysFalse = false; - alwaysTrue = true; - x = 10; -in - { - and_false = alwaysFalse && alwaysTrue; - or_true = alwaysTrue || alwaysFalse; - impl_false = alwaysFalse -> alwaysFalse; - } diff --git a/tests/simple.nixir b/tests/simple.nixir deleted file mode 100644 index 3e26f83..0000000 Binary files a/tests/simple.nixir and /dev/null differ diff --git a/tests/simple_op.nix b/tests/simple_op.nix deleted file mode 100644 index 193df0b..0000000 --- a/tests/simple_op.nix +++ /dev/null @@ -1 +0,0 @@ -1 + 2 \ No newline at end of file diff --git a/tests/simple_op.nixir b/tests/simple_op.nixir deleted file mode 100644 index 18ffbd3..0000000 Binary files a/tests/simple_op.nixir and /dev/null differ diff --git a/tests/string_interp.nix b/tests/string_interp.nix deleted file mode 100644 index b9ae519..0000000 --- a/tests/string_interp.nix +++ /dev/null @@ -1,19 +0,0 @@ -# Test string interpolation -let - name = "world"; - x = 42; - bool_val = true; -in - { - # Simple interpolation - greeting = "Hello ${name}!"; - - # Multiple interpolations - multi = "x is ${x} and name is ${name}"; - - # Nested expression - nested = "Result: ${if bool_val then "yes" else "no"}"; - - # Just a string (no interpolation) - plain = "plain text"; - } diff --git a/tests/unary.nixir b/tests/unary.nixir deleted file mode 100644 index 652fabc..0000000 Binary files a/tests/unary.nixir and /dev/null differ