irc/parser: fix lexer ownership, errors, and implication parsing

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I12a6b52ec1c0edff605d02393eafde896a6a6964
2026-04-24 18:35:16 +03:00 · 2026-04-24 18:35:16 +03:00 · b319ef6f3f
commit b319ef6f3f
parent 760094a2b7
4 changed files with 165 additions and 15 deletions
--- a/src/irc/lexer.cpp
+++ b/src/irc/lexer.cpp
@ -1,9 +1,10 @@
 #include "lexer.h"
 #include <cctype>
 #include <stdexcept>
 namespace nix_irc {
-Lexer::Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
+Lexer::Lexer(std::string input) : input(std::move(input)), pos(0), line(1), col(1) {}
 std::vector<Token> Lexer::tokenize() {
 #define TOKEN(t)                                                                                   \
@ -109,9 +110,10 @@ std::vector<Token> Lexer::tokenize() {
      // If we found > and there's content, it's a lookup path
      if (end < input.size() && input[end] == '>' && end > pos + 1) {
        std::string path = input.substr(pos + 1, end - pos - 1);
        size_t consumed = end - pos + 1;
        tokens.push_back({Token::LOOKUP_PATH, path, line, col});
        pos = end + 1;
-        col += (end - pos + 1);
+        col += consumed;
        is_lookup_path = true;
      }
@ -123,8 +125,14 @@ std::vector<Token> Lexer::tokenize() {
    } else if (c == '!') {
      emit(TOKEN(NOT));
    } else if (c == '.') {
      // Relative paths: ./foo and ../foo
      if (pos + 1 < input.size() && input[pos + 1] == '/') {
        tokenize_path();
      } else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '/') {
        tokenize_path();
      }
      // Check for ellipsis (...)
-      if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
+      else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
        tokens.push_back(TOKEN(ELLIPSIS));
        pos += 3;
        col += 3;
@ -176,8 +184,8 @@ std::vector<Token> Lexer::tokenize() {
        tokenize_ident();
      }
    } else {
-      pos++;
+      throw std::runtime_error("Unexpected character '" + std::string(1, c) + "' at " +
-      col++;
+                               std::to_string(line) + ":" + std::to_string(col));
    }
  }
  tokens.push_back({Token::EOF_, "", line, col});
@ -210,10 +218,16 @@ void Lexer::skip_whitespace() {
    } else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
      // Block comment /* ... */
      // Note: Nix block comments do NOT nest
      size_t start_line = line;
      size_t start_col = col;
      bool terminated = false;
      pos += 2; // Skip /*
      col += 2;
      while (pos + 1 < input.size()) {
        if (input[pos] == '*' && input[pos + 1] == '/') {
          pos += 2; // Skip */
          col += 2;
          terminated = true;
          break;
        }
        if (input[pos] == '\n') {
@ -224,6 +238,10 @@ void Lexer::skip_whitespace() {
        }
        pos++;
      }
      if (!terminated) {
        throw std::runtime_error("Unterminated block comment at " + std::to_string(start_line) +
                                 ":" + std::to_string(start_col));
      }
    } else {
      break;
    }
@ -231,13 +249,17 @@ void Lexer::skip_whitespace() {
 }
 void Lexer::tokenize_string() {
  size_t start_line = line;
  size_t start_col = col;
  pos++;
  col++;
  std::string s;
  bool has_interp = false;
  while (pos < input.size() && input[pos] != '"') {
    if (input[pos] == '\\' && pos + 1 < input.size()) {
      pos++;
      col++;
      switch (input[pos]) {
      case 'n':
        s += '\n';
@ -262,21 +284,35 @@ void Lexer::tokenize_string() {
        break;
      }
      pos++;
      col++;
    } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
      // Found interpolation marker
      has_interp = true;
      s += input[pos]; // Keep $ in raw string
      pos++;
      col++;
    } else {
      if (input[pos] == '\n') {
        s += input[pos];
        pos++;
        line++;
        col = 1;
        continue;
      }
      s += input[pos];
      pos++;
      col++;
    }
  }
  if (pos >= input.size()) {
    throw std::runtime_error("Unterminated string at " + std::to_string(start_line) + ":" +
                             std::to_string(start_col));
  }
  pos++;
  col++;
  Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING;
-  tokens.push_back({type, s, line, col});
+  tokens.push_back({type, s, start_line, start_col});
  col += s.size() + 2;
 }
 void Lexer::tokenize_indented_string() {
--- a/src/irc/lexer.h
+++ b/src/irc/lexer.h
@ -68,12 +68,12 @@ struct Token {
 class Lexer {
 public:
-  Lexer(const std::string& input);
+  explicit Lexer(std::string input);
  std::vector<Token> tokenize();
 private:
  std::vector<Token> tokens;
-  const std::string& input;
+  std::string input;
  size_t pos;
  size_t line;
  size_t col;
--- a/src/irc/parser.cpp
+++ b/src/irc/parser.cpp
@ -18,6 +18,116 @@ static std::string trim(const std::string& s) {
  return s.substr(start, end - start + 1);
 }
 static const char* token_type_name(Token::Type type) {
  switch (type) {
  case Token::LPAREN:
    return "LPAREN";
  case Token::RPAREN:
    return "RPAREN";
  case Token::LBRACE:
    return "LBRACE";
  case Token::RBRACE:
    return "RBRACE";
  case Token::LBRACKET:
    return "LBRACKET";
  case Token::RBRACKET:
    return "RBRACKET";
  case Token::IDENT:
    return "IDENT";
  case Token::STRING:
    return "STRING";
  case Token::STRING_INTERP:
    return "STRING_INTERP";
  case Token::INDENTED_STRING:
    return "INDENTED_STRING";
  case Token::INDENTED_STRING_INTERP:
    return "INDENTED_STRING_INTERP";
  case Token::PATH:
    return "PATH";
  case Token::LOOKUP_PATH:
    return "LOOKUP_PATH";
  case Token::INT:
    return "INT";
  case Token::FLOAT:
    return "FLOAT";
  case Token::URI:
    return "URI";
  case Token::BOOL:
    return "BOOL";
  case Token::LET:
    return "LET";
  case Token::IN:
    return "IN";
  case Token::REC:
    return "REC";
  case Token::IF:
    return "IF";
  case Token::THEN:
    return "THEN";
  case Token::ELSE:
    return "ELSE";
  case Token::ASSERT:
    return "ASSERT";
  case Token::WITH:
    return "WITH";
  case Token::INHERIT:
    return "INHERIT";
  case Token::IMPORT:
    return "IMPORT";
  case Token::DOT:
    return "DOT";
  case Token::SEMICOLON:
    return "SEMICOLON";
  case Token::COLON:
    return "COLON";
  case Token::EQUALS:
    return "EQUALS";
  case Token::AT:
    return "AT";
  case Token::COMMA:
    return "COMMA";
  case Token::QUESTION:
    return "QUESTION";
  case Token::ELLIPSIS:
    return "ELLIPSIS";
  case Token::PLUS:
    return "PLUS";
  case Token::MINUS:
    return "MINUS";
  case Token::STAR:
    return "STAR";
  case Token::SLASH:
    return "SLASH";
  case Token::CONCAT:
    return "CONCAT";
  case Token::MERGE:
    return "MERGE";
  case Token::EQEQ:
    return "EQEQ";
  case Token::NE:
    return "NE";
  case Token::LT:
    return "LT";
  case Token::GT:
    return "GT";
  case Token::LE:
    return "LE";
  case Token::GE:
    return "GE";
  case Token::AND:
    return "AND";
  case Token::OR:
    return "OR";
  case Token::IMPL:
    return "IMPL";
  case Token::NOT:
    return "NOT";
  case Token::EOF_:
    return "EOF";
  }
  return "UNKNOWN";
 }
 static std::string read_file(const std::string& path) {
  FILE* f = fopen(path.c_str(), "r");
  if (!f) {
@ -85,14 +195,17 @@ public:
  bool expect(Token::Type type) {
    if (current().type != type) {
-      throw std::runtime_error(
+      throw std::runtime_error("Expected token " + std::string(token_type_name(type)) +
-          "Expected token " + std::to_string(type) + " but got " + std::to_string(current().type) +
+                               " but got " + token_type_name(current().type) + " at " +
-          " at " + std::to_string(current().line) + ":" + std::to_string(current().col));
+                               std::to_string(current().line) + ":" +
                               std::to_string(current().col));
    }
    advance();
    return true;
  }
  bool is_right_associative(Token::Type type) { return type == Token::IMPL; }
  // Get operator precedence (higher = tighter binding)
  int get_precedence(Token::Type type) {
    switch (type) {
@ -263,7 +376,8 @@ public:
      Token op_token = current();
      advance();
-      auto right = parse_binary_op(prec + 1);
+      int next_prec = is_right_associative(op_token.type) ? prec : prec + 1;
      auto right = parse_binary_op(next_prec);
      left = std::make_shared<Node>(BinaryOpNode(token_to_binop(op_token.type), left, right));
    }
--- a/src/irc/types.cpp
+++ b/src/irc/types.cpp
@ -16,8 +16,8 @@ BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr<Node> l, std::shared_ptr<
    : op(o), left(std::move(l)), right(std::move(r)), line(ln) {}
 // UnaryOpNode constructor
-UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr<Node> operand, uint32_t l)
+UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr<Node> operand_ptr, uint32_t l)
-    : op(o), operand(std::move(operand)), line(l) {}
+    : op(o), operand(std::move(operand_ptr)), line(l) {}
 // SelectNode constructor
 SelectNode::SelectNode(std::shared_ptr<Node> e, std::shared_ptr<Node> a, uint32_t l)