irc/parser: fix lexer ownership, errors, and implication parsing

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I12a6b52ec1c0edff605d02393eafde896a6a6964
2026-04-24 18:35:16 +03:00 · 2026-04-24 18:35:16 +03:00 · b319ef6f3f
commit b319ef6f3f
parent 760094a2b7
4 changed files with 165 additions and 15 deletions
--- a/src/irc/lexer.cpp
+++ b/src/irc/lexer.cpp
@ -1,9 +1,10 @@
 #include "lexer.h"
 #include <cctype>
+#include <stdexcept>

 namespace nix_irc {

-Lexer::Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
+Lexer::Lexer(std::string input) : input(std::move(input)), pos(0), line(1), col(1) {}

 std::vector<Token> Lexer::tokenize() {
 #define TOKEN(t)                                                                                   \
@ -109,9 +110,10 @@ std::vector<Token> Lexer::tokenize() {
      // If we found > and there's content, it's a lookup path
      if (end < input.size() && input[end] == '>' && end > pos + 1) {
        std::string path = input.substr(pos + 1, end - pos - 1);
+        size_t consumed = end - pos + 1;
        tokens.push_back({Token::LOOKUP_PATH, path, line, col});
        pos = end + 1;
-        col += (end - pos + 1);
+        col += consumed;
        is_lookup_path = true;
      }

@ -123,8 +125,14 @@ std::vector<Token> Lexer::tokenize() {
    } else if (c == '!') {
      emit(TOKEN(NOT));
    } else if (c == '.') {
+      // Relative paths: ./foo and ../foo
+      if (pos + 1 < input.size() && input[pos + 1] == '/') {
+        tokenize_path();
+      } else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '/') {
+        tokenize_path();
+      }
      // Check for ellipsis (...)
-      if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
+      else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
        tokens.push_back(TOKEN(ELLIPSIS));
        pos += 3;
        col += 3;
@ -176,8 +184,8 @@ std::vector<Token> Lexer::tokenize() {
        tokenize_ident();
      }
    } else {
-      pos++;
-      col++;
+      throw std::runtime_error("Unexpected character '" + std::string(1, c) + "' at " +
+                               std::to_string(line) + ":" + std::to_string(col));
    }
  }
  tokens.push_back({Token::EOF_, "", line, col});
@ -210,10 +218,16 @@ void Lexer::skip_whitespace() {
    } else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
      // Block comment /* ... */
      // Note: Nix block comments do NOT nest
+      size_t start_line = line;
+      size_t start_col = col;
+      bool terminated = false;
      pos += 2; // Skip /*
+      col += 2;
      while (pos + 1 < input.size()) {
        if (input[pos] == '*' && input[pos + 1] == '/') {
          pos += 2; // Skip */
+          col += 2;
+          terminated = true;
          break;
        }
        if (input[pos] == '\n') {
@ -224,6 +238,10 @@ void Lexer::skip_whitespace() {
        }
        pos++;
      }
+      if (!terminated) {
+        throw std::runtime_error("Unterminated block comment at " + std::to_string(start_line) +
+                                 ":" + std::to_string(start_col));
+      }
    } else {
      break;
    }
@ -231,13 +249,17 @@ void Lexer::skip_whitespace() {
 }

 void Lexer::tokenize_string() {
+  size_t start_line = line;
+  size_t start_col = col;
  pos++;
+  col++;
  std::string s;
  bool has_interp = false;

  while (pos < input.size() && input[pos] != '"') {
    if (input[pos] == '\\' && pos + 1 < input.size()) {
      pos++;
+      col++;
      switch (input[pos]) {
      case 'n':
        s += '\n';
@ -262,21 +284,35 @@ void Lexer::tokenize_string() {
        break;
      }
      pos++;
+      col++;
    } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
      // Found interpolation marker
      has_interp = true;
      s += input[pos]; // Keep $ in raw string
      pos++;
+      col++;
    } else {
+      if (input[pos] == '\n') {
+        s += input[pos];
+        pos++;
+        line++;
+        col = 1;
+        continue;
+      }
      s += input[pos];
      pos++;
+      col++;
    }
  }
+  if (pos >= input.size()) {
+    throw std::runtime_error("Unterminated string at " + std::to_string(start_line) + ":" +
+                             std::to_string(start_col));
+  }
  pos++;
+  col++;

  Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING;
-  tokens.push_back({type, s, line, col});
-  col += s.size() + 2;
+  tokens.push_back({type, s, start_line, start_col});
 }

 void Lexer::tokenize_indented_string() {
--- a/src/irc/lexer.h
+++ b/src/irc/lexer.h
@ -68,12 +68,12 @@ struct Token {

 class Lexer {
 public:
-  Lexer(const std::string& input);
+  explicit Lexer(std::string input);
  std::vector<Token> tokenize();

 private:
  std::vector<Token> tokens;
-  const std::string& input;
+  std::string input;
  size_t pos;
  size_t line;
  size_t col;
--- a/src/irc/parser.cpp
+++ b/src/irc/parser.cpp
@ -18,6 +18,116 @@ static std::string trim(const std::string& s) {
  return s.substr(start, end - start + 1);
 }

+static const char* token_type_name(Token::Type type) {
+  switch (type) {
+  case Token::LPAREN:
+    return "LPAREN";
+  case Token::RPAREN:
+    return "RPAREN";
+  case Token::LBRACE:
+    return "LBRACE";
+  case Token::RBRACE:
+    return "RBRACE";
+  case Token::LBRACKET:
+    return "LBRACKET";
+  case Token::RBRACKET:
+    return "RBRACKET";
+  case Token::IDENT:
+    return "IDENT";
+  case Token::STRING:
+    return "STRING";
+  case Token::STRING_INTERP:
+    return "STRING_INTERP";
+  case Token::INDENTED_STRING:
+    return "INDENTED_STRING";
+  case Token::INDENTED_STRING_INTERP:
+    return "INDENTED_STRING_INTERP";
+  case Token::PATH:
+    return "PATH";
+  case Token::LOOKUP_PATH:
+    return "LOOKUP_PATH";
+  case Token::INT:
+    return "INT";
+  case Token::FLOAT:
+    return "FLOAT";
+  case Token::URI:
+    return "URI";
+  case Token::BOOL:
+    return "BOOL";
+  case Token::LET:
+    return "LET";
+  case Token::IN:
+    return "IN";
+  case Token::REC:
+    return "REC";
+  case Token::IF:
+    return "IF";
+  case Token::THEN:
+    return "THEN";
+  case Token::ELSE:
+    return "ELSE";
+  case Token::ASSERT:
+    return "ASSERT";
+  case Token::WITH:
+    return "WITH";
+  case Token::INHERIT:
+    return "INHERIT";
+  case Token::IMPORT:
+    return "IMPORT";
+  case Token::DOT:
+    return "DOT";
+  case Token::SEMICOLON:
+    return "SEMICOLON";
+  case Token::COLON:
+    return "COLON";
+  case Token::EQUALS:
+    return "EQUALS";
+  case Token::AT:
+    return "AT";
+  case Token::COMMA:
+    return "COMMA";
+  case Token::QUESTION:
+    return "QUESTION";
+  case Token::ELLIPSIS:
+    return "ELLIPSIS";
+  case Token::PLUS:
+    return "PLUS";
+  case Token::MINUS:
+    return "MINUS";
+  case Token::STAR:
+    return "STAR";
+  case Token::SLASH:
+    return "SLASH";
+  case Token::CONCAT:
+    return "CONCAT";
+  case Token::MERGE:
+    return "MERGE";
+  case Token::EQEQ:
+    return "EQEQ";
+  case Token::NE:
+    return "NE";
+  case Token::LT:
+    return "LT";
+  case Token::GT:
+    return "GT";
+  case Token::LE:
+    return "LE";
+  case Token::GE:
+    return "GE";
+  case Token::AND:
+    return "AND";
+  case Token::OR:
+    return "OR";
+  case Token::IMPL:
+    return "IMPL";
+  case Token::NOT:
+    return "NOT";
+  case Token::EOF_:
+    return "EOF";
+  }
+  return "UNKNOWN";
+}
+
 static std::string read_file(const std::string& path) {
  FILE* f = fopen(path.c_str(), "r");
  if (!f) {
@ -85,14 +195,17 @@ public:

  bool expect(Token::Type type) {
    if (current().type != type) {
-      throw std::runtime_error(
-          "Expected token " + std::to_string(type) + " but got " + std::to_string(current().type) +
-          " at " + std::to_string(current().line) + ":" + std::to_string(current().col));
+      throw std::runtime_error("Expected token " + std::string(token_type_name(type)) +
+                               " but got " + token_type_name(current().type) + " at " +
+                               std::to_string(current().line) + ":" +
+                               std::to_string(current().col));
    }
    advance();
    return true;
  }

+  bool is_right_associative(Token::Type type) { return type == Token::IMPL; }
+
  // Get operator precedence (higher = tighter binding)
  int get_precedence(Token::Type type) {
    switch (type) {
@ -263,7 +376,8 @@ public:
      Token op_token = current();
      advance();

-      auto right = parse_binary_op(prec + 1);
+      int next_prec = is_right_associative(op_token.type) ? prec : prec + 1;
+      auto right = parse_binary_op(next_prec);
      left = std::make_shared<Node>(BinaryOpNode(token_to_binop(op_token.type), left, right));
    }

--- a/src/irc/types.cpp
+++ b/src/irc/types.cpp
@ -16,8 +16,8 @@ BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr<Node> l, std::shared_ptr<
    : op(o), left(std::move(l)), right(std::move(r)), line(ln) {}

 // UnaryOpNode constructor
-UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr<Node> operand, uint32_t l)
-    : op(o), operand(std::move(operand)), line(l) {}
+UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr<Node> operand_ptr, uint32_t l)
+    : op(o), operand(std::move(operand_ptr)), line(l) {}

 // SelectNode constructor
 SelectNode::SelectNode(std::shared_ptr<Node> e, std::shared_ptr<Node> a, uint32_t l)