irc: more syntax support

Indented strings, ancient let bindings and a bit more Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ib86c2d8ca4402dfa0c5c536a9959f4006a6a6964
2026-02-22 20:03:25 +03:00 · 2026-02-22 20:03:25 +03:00 · ed8f637c99
commit ed8f637c99
parent 77aa67c7e0
3 changed files with 313 additions and 12 deletions
--- a/src/irc/evaluator.cpp
+++ b/src/irc/evaluator.cpp
@ -107,7 +107,15 @@ struct Evaluator::Impl {
    } else if (auto* n = node->get_if<ConstStringNode>()) {
      v.mkString(n->value);
    } else if (auto* n = node->get_if<ConstPathNode>()) {
-      v.mkPath(state.rootPath(CanonPath(n->value)));
+      std::string path = n->value;
+      // Expand ~/ to home directory
+      if (path.size() >= 2 && path[0] == '~' && path[1] == '/') {
+        const char* home = getenv("HOME");
+        if (home) {
+          path = std::string(home) + path.substr(1);
+        }
+      }
+      v.mkPath(state.rootPath(CanonPath(path)));
    } else if (auto* n = node->get_if<ConstBoolNode>()) {
      v.mkBool(n->value);
    } else if (auto* n = node->get_if<ConstNullNode>()) { // NOLINT(bugprone-branch-clone)
@ -222,6 +230,22 @@ struct Evaluator::Impl {
            v.mkInt((left->integer() + right->integer()).valueWrapping());
          } else if (left->type() == nString && right->type() == nString) {
            v.mkString(std::string(left->c_str()) + std::string(right->c_str()));
+          } else if (left->type() == nPath && right->type() == nString) {
+            // Path + string = path
+            std::string leftPath = std::string(left->path().path.abs());
+            std::string result = leftPath + std::string(right->c_str());
+            v.mkPath(state.rootPath(CanonPath(result)));
+          } else if (left->type() == nString && right->type() == nPath) {
+            // String + path = path
+            std::string rightPath = std::string(right->path().path.abs());
+            std::string result = std::string(left->c_str()) + rightPath;
+            v.mkPath(state.rootPath(CanonPath(result)));
+          } else if (left->type() == nPath && right->type() == nPath) {
+            // Path + path = path
+            std::string leftPath = std::string(left->path().path.abs());
+            std::string rightPath = std::string(right->path().path.abs());
+            std::string result = leftPath + rightPath;
+            v.mkPath(state.rootPath(CanonPath(result)));
          } else {
            state.error<EvalError>("type error in addition").debugThrow();
          }
@ -293,8 +317,13 @@ struct Evaluator::Impl {
          }
          break;
        case BinaryOp::CONCAT:
-          // ++ is list concatenation in Nix; string concat uses ADD (+)
-          state.error<EvalError>("list concatenation not yet implemented").debugThrow();
+          // TODO: ++ list concatenation requires accessing private Nix Value payload
+          // For now, delegate to Nix's concatLists or implement via builtins
+          // Parser recognizes ++ but evaluator not yet fully implemented
+          state
+              .error<EvalError>(
+                  "list concatenation (++) not yet fully implemented - use builtins.concatLists")
+              .debugThrow();
          break;
        case BinaryOp::MERGE: {
          // // is attrset merge - right overrides left
--- a/src/irc/ir_gen.cpp
+++ b/src/irc/ir_gen.cpp
@ -163,7 +163,7 @@ struct IRGenerator::Impl {
      }
      std::vector<std::pair<std::string, std::shared_ptr<Node>>> new_bindings;
      new_bindings.reserve(n->bindings.size());
-for (const auto& [key, val] : n->bindings) {
+      for (const auto& [key, val] : n->bindings) {
        new_bindings.push_back({key, convert(val)});
      }
      auto body = convert(n->body);
@ -179,7 +179,7 @@ for (const auto& [key, val] : n->bindings) {
      }
      std::vector<std::pair<std::string, std::shared_ptr<Node>>> new_bindings;
      new_bindings.reserve(n->bindings.size());
-for (const auto& [key, val] : n->bindings) {
+      for (const auto& [key, val] : n->bindings) {
        new_bindings.push_back({key, convert(val)});
      }
      auto body = convert(n->body);
--- a/src/irc/parser.cpp
+++ b/src/irc/parser.cpp
@ -70,6 +70,8 @@ struct Token {
    IDENT,
    STRING,
    STRING_INTERP,
+    INDENTED_STRING,
+    INDENTED_STRING_INTERP,
    PATH,
    LOOKUP_PATH,
    INT,
@ -153,6 +155,8 @@ public:
        emit(TOKEN(AT));
      } else if (c == ',') {
        emit(TOKEN(COMMA));
+      } else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') {
+        tokenize_indented_string();
      } else if (c == '"') {
        tokenize_string();
      }
@ -247,6 +251,14 @@ public:
        }
      } else if (c == '?') {
        emit(TOKEN(QUESTION));
+      } else if (c == '~') {
+        // Home-relative path ~/...
+        if (pos + 1 < input.size() && input[pos + 1] == '/') {
+          tokenize_home_path();
+        } else {
+          // Just ~ by itself is an identifier
+          tokenize_ident();
+        }
      } else if (c == '-') {
        // Check if it's a negative number or minus operator
        if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
@ -317,8 +329,26 @@ private:
        }
        pos++;
      } else if (c == '#') {
+        // Line comment - skip until newline
        while (pos < input.size() && input[pos] != '\n')
          pos++;
+      } else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
+        // Block comment /* ... */
+        // Note: Nix block comments do NOT nest
+        pos += 2; // Skip /*
+        while (pos + 1 < input.size()) {
+          if (input[pos] == '*' && input[pos + 1] == '/') {
+            pos += 2; // Skip */
+            break;
+          }
+          if (input[pos] == '\n') {
+            line++;
+            col = 1;
+          } else {
+            col++;
+          }
+          pos++;
+        }
      } else {
        break;
      }
@ -374,10 +404,131 @@ private:
    col += s.size() + 2;
  }

+  void tokenize_indented_string() {
+    pos += 2; // Skip opening ''
+    std::string raw_content;
+    bool has_interp = false;
+    size_t start_line = line;
+
+    // Collect raw content until closing ''
+    while (pos < input.size()) {
+      // Check for escape sequences
+      if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') {
+        // Check if it's an escape or the closing delimiter
+        if (pos + 2 < input.size() && input[pos + 2] == '\'') {
+          // '''  -> escape for ''
+          raw_content += "''";
+          pos += 3;
+          continue;
+        } else if (pos + 2 < input.size() && input[pos + 2] == '$') {
+          // ''$ -> escape for $
+          raw_content += '$';
+          pos += 3;
+          continue;
+        } else if (pos + 2 < input.size() && input[pos + 2] == '\\') {
+          // ''\ -> escape for backslash
+          raw_content += '\\';
+          pos += 3;
+          continue;
+        } else {
+          // Just closing ''
+          pos += 2;
+          break;
+        }
+      }
+
+      // Check for interpolation
+      if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
+        has_interp = true;
+        raw_content += input[pos];
+        pos++;
+        if (input[pos] == '\n') {
+          line++;
+        }
+        continue;
+      }
+
+      // Track newlines
+      if (input[pos] == '\n') {
+        line++;
+        raw_content += input[pos];
+        pos++;
+      } else {
+        raw_content += input[pos];
+        pos++;
+      }
+    }
+
+    // Strip common indentation
+    std::string stripped = strip_indentation(raw_content);
+
+    Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING;
+    tokens.push_back({type, stripped, start_line, col});
+  }
+
+  std::string strip_indentation(const std::string& s) {
+    if (s.empty())
+      return s;
+
+    // Split into lines
+    std::vector<std::string> lines;
+    std::string current_line;
+    for (char c : s) {
+      if (c == '\n') {
+        lines.push_back(current_line);
+        current_line.clear();
+      } else {
+        current_line += c;
+      }
+    }
+    if (!current_line.empty() || (!s.empty() && s.back() == '\n')) {
+      lines.push_back(current_line);
+    }
+
+    // Find minimum indentation (spaces/tabs at start of non-empty lines)
+    size_t min_indent = std::string::npos;
+    for (const auto& line : lines) {
+      if (line.empty())
+        continue; // Skip empty lines when calculating indentation
+      size_t indent = 0;
+      for (char c : line) {
+        if (c == ' ' || c == '\t')
+          indent++;
+        else
+          break;
+      }
+      if (indent < min_indent)
+        min_indent = indent;
+    }
+
+    if (min_indent == std::string::npos)
+      min_indent = 0;
+
+    // Strip min_indent from all lines
+    std::string result;
+    for (size_t i = 0; i < lines.size(); i++) {
+      const auto& line = lines[i];
+      if (line.empty()) {
+        // Preserve empty lines
+        if (i + 1 < lines.size())
+          result += '\n';
+      } else {
+        // Strip indentation
+        size_t skip = std::min(min_indent, line.size());
+        result += line.substr(skip);
+        if (i + 1 < lines.size())
+          result += '\n';
+      }
+    }
+
+    return result;
+  }
+
  void tokenize_path() {
    size_t start = pos;
    while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
-           input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']') {
+           input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
+           input[pos] != ';') {
      pos++;
    }
    std::string path = input.substr(start, pos - start);
@ -385,6 +536,22 @@ private:
    col += path.size();
  }

+  void tokenize_home_path() {
+    size_t start = pos;
+    pos++; // Skip ~
+    if (pos < input.size() && input[pos] == '/') {
+      // Home-relative path ~/something
+      while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
+             input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
+             input[pos] != ';') {
+        pos++;
+      }
+    }
+    std::string path = input.substr(start, pos - start);
+    tokens.push_back({Token::PATH, path, line, col});
+    col += path.size();
+  }
+
  void tokenize_int() {
    size_t start = pos;
    if (input[pos] == '-')
@ -587,6 +754,45 @@ public:
      return std::make_shared<Node>(IfNode(cond, then, else_));
    }
    if (consume(Token::LET)) {
+      // Check for ancient let syntax: let { x = 1; body = x; }
+      if (current().type == Token::LBRACE) {
+        advance(); // consume {
+        std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
+        std::shared_ptr<Node> body_expr;
+
+        while (current().type != Token::RBRACE && current().type != Token::EOF_) {
+          if (current().type != Token::IDENT && current().type != Token::STRING &&
+              current().type != Token::INDENTED_STRING) {
+            throw std::runtime_error("Expected identifier in ancient let");
+          }
+
+          std::string name = current().value;
+          advance();
+          expect(Token::EQUALS);
+          auto value = parse_expr();
+          expect(Token::SEMICOLON);
+
+          // Check if this is the special 'body' binding
+          if (name == "body") {
+            body_expr = value;
+          } else {
+            bindings.push_back({name, value});
+          }
+        }
+
+        expect(Token::RBRACE);
+
+        if (!body_expr) {
+          throw std::runtime_error("Ancient let syntax requires 'body' attribute");
+        }
+
+        // Ancient let is always recursive
+        auto letrec = LetRecNode(body_expr);
+        letrec.bindings = std::move(bindings);
+        return std::make_shared<Node>(std::move(letrec));
+      }
+
+      // Modern let syntax: let x = 1; in x
      bool is_rec = consume(Token::REC);
      std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
      parse_bindings(bindings);
@ -653,11 +859,23 @@ public:
        // Continue loop to handle multi-dot selections (a.b.c)
        continue;
      }
-      // If we get here, the token after DOT was not IDENT or LBRACE
+      // If we get here, the token after DOT was not IDENT
      // This is a parse error, but we'll just return what we have
      break;
    }

+    // Check for 'or' default value: a.b or default
+    // This is checked after all selections, so works for any selection depth
+    // 'or' is contextual - only special after a selection expression
+    if (left->get_if<SelectNode>() && current().type == Token::IDENT && current().value == "or") {
+      advance();
+      // Parse default as a primary expression
+      auto default_expr = parse_expr3();
+      // Update the SelectNode with the default expression
+      auto* select = left->get_if<SelectNode>();
+      select->default_expr = default_expr;
+    }
+
    return left;
  }

@ -748,6 +966,17 @@ public:
      return parse_string_interp(str_token.value);
    }

+    if (t.type == Token::INDENTED_STRING) {
+      advance();
+      return std::make_shared<Node>(ConstStringNode(t.value));
+    }
+
+    if (t.type == Token::INDENTED_STRING_INTERP) {
+      Token str_token = current();
+      advance();
+      return parse_string_interp(str_token.value);
+    }
+
    if (t.type == Token::PATH) {
      advance();
      return std::make_shared<Node>(ConstPathNode(t.value));
@ -808,18 +1037,61 @@ public:
        continue;
      }

-      if (current().type == Token::IDENT || current().type == Token::STRING) {
-        Token key = current();
+      // Check for dynamic attribute name: ${expr} = value
+      if (current().type == Token::STRING_INTERP ||
+          current().type == Token::INDENTED_STRING_INTERP) {
+        Token str_token = current();
        advance();
-        std::string key_str = key.value;
+        auto name_expr = parse_string_interp(str_token.value);

        if (consume(Token::EQUALS)) {
          auto value = parse_expr();
-          attrs.attrs.push_back({key_str, value});
+          // For dynamic attrs, we use special marker in key and store expr as value
+          // This will need runtime evaluation - store as special node
+          // For now, convert to string at parse time if possible
+          // TODO: Full dynamic attr support needs IR node for dynamic keys
+          attrs.attrs.push_back({"__dynamic__", value});
+        }
+      } else if (current().type == Token::IDENT || current().type == Token::STRING ||
+                 current().type == Token::INDENTED_STRING) {
+        // Parse attribute path: a.b.c = value
+        std::vector<std::string> path;
+        path.push_back(current().value);
+        advance();
+
+        // Collect dot-separated path components
+        while (consume(Token::DOT)) {
+          if (current().type == Token::IDENT || current().type == Token::STRING ||
+              current().type == Token::INDENTED_STRING) {
+            path.push_back(current().value);
+            advance();
+          } else {
+            break;
+          }
+        }
+
+        if (consume(Token::EQUALS)) {
+          auto value = parse_expr();
+
+          // Desugar nested paths: a.b.c = v becomes a = { b = { c = v; }; }
+          if (path.size() == 1) {
+            // Simple case: just one key
+            attrs.attrs.push_back({path[0], value});
+          } else {
+            // Nested case: build nested attrsets from right to left
+            auto nested = value;
+            for (int i = path.size() - 1; i > 0; i--) {
+              auto inner_attrs = AttrsetNode(false);
+              inner_attrs.attrs.push_back({path[i], nested});
+              nested = std::make_shared<Node>(std::move(inner_attrs));
+            }
+            attrs.attrs.push_back({path[0], nested});
+          }
        } else if (consume(Token::AT)) {
+          // @ pattern - not affected by nested paths
          auto pattern = parse_expr();
          auto value = parse_expr();
-          attrs.attrs.push_back({key_str, value});
+          attrs.attrs.push_back({path[0], value});
        }
      }