diff --git a/src/irc/evaluator.cpp b/src/irc/evaluator.cpp index 769897b..6004f01 100644 --- a/src/irc/evaluator.cpp +++ b/src/irc/evaluator.cpp @@ -107,7 +107,15 @@ struct Evaluator::Impl { } else if (auto* n = node->get_if()) { v.mkString(n->value); } else if (auto* n = node->get_if()) { - v.mkPath(state.rootPath(CanonPath(n->value))); + std::string path = n->value; + // Expand ~/ to home directory + if (path.size() >= 2 && path[0] == '~' && path[1] == '/') { + const char* home = getenv("HOME"); + if (home) { + path = std::string(home) + path.substr(1); + } + } + v.mkPath(state.rootPath(CanonPath(path))); } else if (auto* n = node->get_if()) { v.mkBool(n->value); } else if (auto* n = node->get_if()) { // NOLINT(bugprone-branch-clone) @@ -222,6 +230,22 @@ struct Evaluator::Impl { v.mkInt((left->integer() + right->integer()).valueWrapping()); } else if (left->type() == nString && right->type() == nString) { v.mkString(std::string(left->c_str()) + std::string(right->c_str())); + } else if (left->type() == nPath && right->type() == nString) { + // Path + string = path + std::string leftPath = std::string(left->path().path.abs()); + std::string result = leftPath + std::string(right->c_str()); + v.mkPath(state.rootPath(CanonPath(result))); + } else if (left->type() == nString && right->type() == nPath) { + // String + path = path + std::string rightPath = std::string(right->path().path.abs()); + std::string result = std::string(left->c_str()) + rightPath; + v.mkPath(state.rootPath(CanonPath(result))); + } else if (left->type() == nPath && right->type() == nPath) { + // Path + path = path + std::string leftPath = std::string(left->path().path.abs()); + std::string rightPath = std::string(right->path().path.abs()); + std::string result = leftPath + rightPath; + v.mkPath(state.rootPath(CanonPath(result))); } else { state.error("type error in addition").debugThrow(); } @@ -293,8 +317,13 @@ struct Evaluator::Impl { } break; case BinaryOp::CONCAT: - // ++ is list concatenation in Nix; string concat uses ADD (+) - state.error("list concatenation not yet implemented").debugThrow(); + // TODO: ++ list concatenation requires accessing private Nix Value payload + // For now, delegate to Nix's concatLists or implement via builtins + // Parser recognizes ++ but evaluator not yet fully implemented + state + .error( + "list concatenation (++) not yet fully implemented - use builtins.concatLists") + .debugThrow(); break; case BinaryOp::MERGE: { // // is attrset merge - right overrides left diff --git a/src/irc/ir_gen.cpp b/src/irc/ir_gen.cpp index 06318b4..95999c8 100644 --- a/src/irc/ir_gen.cpp +++ b/src/irc/ir_gen.cpp @@ -163,7 +163,7 @@ struct IRGenerator::Impl { } std::vector>> new_bindings; new_bindings.reserve(n->bindings.size()); -for (const auto& [key, val] : n->bindings) { + for (const auto& [key, val] : n->bindings) { new_bindings.push_back({key, convert(val)}); } auto body = convert(n->body); @@ -179,7 +179,7 @@ for (const auto& [key, val] : n->bindings) { } std::vector>> new_bindings; new_bindings.reserve(n->bindings.size()); -for (const auto& [key, val] : n->bindings) { + for (const auto& [key, val] : n->bindings) { new_bindings.push_back({key, convert(val)}); } auto body = convert(n->body); diff --git a/src/irc/parser.cpp b/src/irc/parser.cpp index 43ebd1f..3780fb7 100644 --- a/src/irc/parser.cpp +++ b/src/irc/parser.cpp @@ -70,6 +70,8 @@ struct Token { IDENT, STRING, STRING_INTERP, + INDENTED_STRING, + INDENTED_STRING_INTERP, PATH, LOOKUP_PATH, INT, @@ -153,6 +155,8 @@ public: emit(TOKEN(AT)); } else if (c == ',') { emit(TOKEN(COMMA)); + } else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') { + tokenize_indented_string(); } else if (c == '"') { tokenize_string(); } @@ -247,6 +251,14 @@ public: } } else if (c == '?') { emit(TOKEN(QUESTION)); + } else if (c == '~') { + // Home-relative path ~/... + if (pos + 1 < input.size() && input[pos + 1] == '/') { + tokenize_home_path(); + } else { + // Just ~ by itself is an identifier + tokenize_ident(); + } } else if (c == '-') { // Check if it's a negative number or minus operator if (pos + 1 < input.size() && isdigit(input[pos + 1])) { @@ -317,8 +329,26 @@ private: } pos++; } else if (c == '#') { + // Line comment - skip until newline while (pos < input.size() && input[pos] != '\n') pos++; + } else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') { + // Block comment /* ... */ + // Note: Nix block comments do NOT nest + pos += 2; // Skip /* + while (pos + 1 < input.size()) { + if (input[pos] == '*' && input[pos + 1] == '/') { + pos += 2; // Skip */ + break; + } + if (input[pos] == '\n') { + line++; + col = 1; + } else { + col++; + } + pos++; + } } else { break; } @@ -374,10 +404,131 @@ private: col += s.size() + 2; } + void tokenize_indented_string() { + pos += 2; // Skip opening '' + std::string raw_content; + bool has_interp = false; + size_t start_line = line; + + // Collect raw content until closing '' + while (pos < input.size()) { + // Check for escape sequences + if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') { + // Check if it's an escape or the closing delimiter + if (pos + 2 < input.size() && input[pos + 2] == '\'') { + // ''' -> escape for '' + raw_content += "''"; + pos += 3; + continue; + } else if (pos + 2 < input.size() && input[pos + 2] == '$') { + // ''$ -> escape for $ + raw_content += '$'; + pos += 3; + continue; + } else if (pos + 2 < input.size() && input[pos + 2] == '\\') { + // ''\ -> escape for backslash + raw_content += '\\'; + pos += 3; + continue; + } else { + // Just closing '' + pos += 2; + break; + } + } + + // Check for interpolation + if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { + has_interp = true; + raw_content += input[pos]; + pos++; + if (input[pos] == '\n') { + line++; + } + continue; + } + + // Track newlines + if (input[pos] == '\n') { + line++; + raw_content += input[pos]; + pos++; + } else { + raw_content += input[pos]; + pos++; + } + } + + // Strip common indentation + std::string stripped = strip_indentation(raw_content); + + Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING; + tokens.push_back({type, stripped, start_line, col}); + } + + std::string strip_indentation(const std::string& s) { + if (s.empty()) + return s; + + // Split into lines + std::vector lines; + std::string current_line; + for (char c : s) { + if (c == '\n') { + lines.push_back(current_line); + current_line.clear(); + } else { + current_line += c; + } + } + if (!current_line.empty() || (!s.empty() && s.back() == '\n')) { + lines.push_back(current_line); + } + + // Find minimum indentation (spaces/tabs at start of non-empty lines) + size_t min_indent = std::string::npos; + for (const auto& line : lines) { + if (line.empty()) + continue; // Skip empty lines when calculating indentation + size_t indent = 0; + for (char c : line) { + if (c == ' ' || c == '\t') + indent++; + else + break; + } + if (indent < min_indent) + min_indent = indent; + } + + if (min_indent == std::string::npos) + min_indent = 0; + + // Strip min_indent from all lines + std::string result; + for (size_t i = 0; i < lines.size(); i++) { + const auto& line = lines[i]; + if (line.empty()) { + // Preserve empty lines + if (i + 1 < lines.size()) + result += '\n'; + } else { + // Strip indentation + size_t skip = std::min(min_indent, line.size()); + result += line.substr(skip); + if (i + 1 < lines.size()) + result += '\n'; + } + } + + return result; + } + void tokenize_path() { size_t start = pos; while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' && - input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']') { + input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' && + input[pos] != ';') { pos++; } std::string path = input.substr(start, pos - start); @@ -385,6 +536,22 @@ private: col += path.size(); } + void tokenize_home_path() { + size_t start = pos; + pos++; // Skip ~ + if (pos < input.size() && input[pos] == '/') { + // Home-relative path ~/something + while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' && + input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' && + input[pos] != ';') { + pos++; + } + } + std::string path = input.substr(start, pos - start); + tokens.push_back({Token::PATH, path, line, col}); + col += path.size(); + } + void tokenize_int() { size_t start = pos; if (input[pos] == '-') @@ -587,6 +754,45 @@ public: return std::make_shared(IfNode(cond, then, else_)); } if (consume(Token::LET)) { + // Check for ancient let syntax: let { x = 1; body = x; } + if (current().type == Token::LBRACE) { + advance(); // consume { + std::vector>> bindings; + std::shared_ptr body_expr; + + while (current().type != Token::RBRACE && current().type != Token::EOF_) { + if (current().type != Token::IDENT && current().type != Token::STRING && + current().type != Token::INDENTED_STRING) { + throw std::runtime_error("Expected identifier in ancient let"); + } + + std::string name = current().value; + advance(); + expect(Token::EQUALS); + auto value = parse_expr(); + expect(Token::SEMICOLON); + + // Check if this is the special 'body' binding + if (name == "body") { + body_expr = value; + } else { + bindings.push_back({name, value}); + } + } + + expect(Token::RBRACE); + + if (!body_expr) { + throw std::runtime_error("Ancient let syntax requires 'body' attribute"); + } + + // Ancient let is always recursive + auto letrec = LetRecNode(body_expr); + letrec.bindings = std::move(bindings); + return std::make_shared(std::move(letrec)); + } + + // Modern let syntax: let x = 1; in x bool is_rec = consume(Token::REC); std::vector>> bindings; parse_bindings(bindings); @@ -653,11 +859,23 @@ public: // Continue loop to handle multi-dot selections (a.b.c) continue; } - // If we get here, the token after DOT was not IDENT or LBRACE + // If we get here, the token after DOT was not IDENT // This is a parse error, but we'll just return what we have break; } + // Check for 'or' default value: a.b or default + // This is checked after all selections, so works for any selection depth + // 'or' is contextual - only special after a selection expression + if (left->get_if() && current().type == Token::IDENT && current().value == "or") { + advance(); + // Parse default as a primary expression + auto default_expr = parse_expr3(); + // Update the SelectNode with the default expression + auto* select = left->get_if(); + select->default_expr = default_expr; + } + return left; } @@ -748,6 +966,17 @@ public: return parse_string_interp(str_token.value); } + if (t.type == Token::INDENTED_STRING) { + advance(); + return std::make_shared(ConstStringNode(t.value)); + } + + if (t.type == Token::INDENTED_STRING_INTERP) { + Token str_token = current(); + advance(); + return parse_string_interp(str_token.value); + } + if (t.type == Token::PATH) { advance(); return std::make_shared(ConstPathNode(t.value)); @@ -808,18 +1037,61 @@ public: continue; } - if (current().type == Token::IDENT || current().type == Token::STRING) { - Token key = current(); + // Check for dynamic attribute name: ${expr} = value + if (current().type == Token::STRING_INTERP || + current().type == Token::INDENTED_STRING_INTERP) { + Token str_token = current(); advance(); - std::string key_str = key.value; + auto name_expr = parse_string_interp(str_token.value); if (consume(Token::EQUALS)) { auto value = parse_expr(); - attrs.attrs.push_back({key_str, value}); + // For dynamic attrs, we use special marker in key and store expr as value + // This will need runtime evaluation - store as special node + // For now, convert to string at parse time if possible + // TODO: Full dynamic attr support needs IR node for dynamic keys + attrs.attrs.push_back({"__dynamic__", value}); + } + } else if (current().type == Token::IDENT || current().type == Token::STRING || + current().type == Token::INDENTED_STRING) { + // Parse attribute path: a.b.c = value + std::vector path; + path.push_back(current().value); + advance(); + + // Collect dot-separated path components + while (consume(Token::DOT)) { + if (current().type == Token::IDENT || current().type == Token::STRING || + current().type == Token::INDENTED_STRING) { + path.push_back(current().value); + advance(); + } else { + break; + } + } + + if (consume(Token::EQUALS)) { + auto value = parse_expr(); + + // Desugar nested paths: a.b.c = v becomes a = { b = { c = v; }; } + if (path.size() == 1) { + // Simple case: just one key + attrs.attrs.push_back({path[0], value}); + } else { + // Nested case: build nested attrsets from right to left + auto nested = value; + for (int i = path.size() - 1; i > 0; i--) { + auto inner_attrs = AttrsetNode(false); + inner_attrs.attrs.push_back({path[i], nested}); + nested = std::make_shared(std::move(inner_attrs)); + } + attrs.attrs.push_back({path[0], nested}); + } } else if (consume(Token::AT)) { + // @ pattern - not affected by nested paths auto pattern = parse_expr(); auto value = parse_expr(); - attrs.attrs.push_back({key_str, value}); + attrs.attrs.push_back({path[0], value}); } }