#include "parser.h" #include #include #include #include #include #include #include #include #include namespace nix_irc { static std::string trim(const std::string& s) { size_t start = s.find_first_not_of(" \t\n\r"); if (start == std::string::npos) return ""; size_t end = s.find_last_not_of(" \t\n\r"); return s.substr(start, end - start + 1); } static std::string read_file(const std::string& path) { FILE* f = fopen(path.c_str(), "r"); if (!f) { throw std::runtime_error("Cannot open file: " + path); } fseek(f, 0, SEEK_END); long size = ftell(f); fseek(f, 0, SEEK_SET); std::string content(size, '\0'); if (fread(content.data(), 1, size, f) != static_cast(size)) { fclose(f); throw std::runtime_error("Failed to read file: " + path); } fclose(f); return content; } static std::pair run_command(const std::string& cmd) { std::array buffer; std::string result; std::string error; FILE* pipe = popen(cmd.c_str(), "r"); if (!pipe) throw std::runtime_error("popen failed"); while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { result += buffer.data(); } int status = pclose(pipe); if (status != 0) { throw std::runtime_error("Command failed: " + cmd); } return {result, error}; } struct Token { enum Type { LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET, IDENT, STRING, STRING_INTERP, PATH, INT, BOOL, LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH, INHERIT, DOT, SEMICOLON, COLON, EQUALS, AT, COMMA, QUESTION, ELLIPSIS, // Operators PLUS, MINUS, STAR, SLASH, CONCAT, EQEQ, NE, LT, GT, LE, GE, AND, OR, IMPL, NOT, EOF_ } type; std::string value; size_t line; size_t col; }; class Lexer { public: Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {} std::vector tokenize() { #define TOKEN(t) Token{Token::t, "", line, col} while (pos < input.size()) { skip_whitespace(); if (pos >= input.size()) break; char c = input[pos]; if (c == '(') { emit(TOKEN(LPAREN)); } else if (c == ')') { emit(TOKEN(RPAREN)); } else if (c == '{') { emit(TOKEN(LBRACE)); } else if (c == '}') { emit(TOKEN(RBRACE)); } else if (c == '[') { emit(TOKEN(LBRACKET)); } else if (c == ']') { emit(TOKEN(RBRACKET)); } else if (c == ';') { emit(TOKEN(SEMICOLON)); } else if (c == ':') { emit(TOKEN(COLON)); } else if (c == '@') { emit(TOKEN(AT)); } else if (c == ',') { emit(TOKEN(COMMA)); } else if (c == '"') { tokenize_string(); } // Two-char operators else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') { tokens.push_back(TOKEN(EQEQ)); pos += 2; col += 2; } else if (c == '=') { emit(TOKEN(EQUALS)); } else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') { tokens.push_back(TOKEN(NE)); pos += 2; col += 2; } else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') { tokens.push_back(TOKEN(LE)); pos += 2; col += 2; } else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') { tokens.push_back(TOKEN(GE)); pos += 2; col += 2; } else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') { tokens.push_back(TOKEN(CONCAT)); pos += 2; col += 2; } else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') { tokens.push_back(TOKEN(AND)); pos += 2; col += 2; } else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') { tokens.push_back(TOKEN(OR)); pos += 2; col += 2; } else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') { tokens.push_back(TOKEN(IMPL)); pos += 2; col += 2; } // Single-char operators else if (c == '+') { emit(TOKEN(PLUS)); } else if (c == '*') { emit(TOKEN(STAR)); } else if (c == '/') { // Check if it's a path or division if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) { tokenize_path(); } else { emit(TOKEN(SLASH)); } } else if (c == '<') { emit(TOKEN(LT)); } else if (c == '>') { emit(TOKEN(GT)); } else if (c == '!') { emit(TOKEN(NOT)); } else if (c == '.') { // Check for ellipsis (...) if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') { tokens.push_back(TOKEN(ELLIPSIS)); pos += 3; col += 3; } else { emit(TOKEN(DOT)); } } else if (c == '?') { emit(TOKEN(QUESTION)); } else if (c == '-') { // Check if it's a negative number or minus operator if (pos + 1 < input.size() && isdigit(input[pos + 1])) { tokenize_int(); } else { emit(TOKEN(MINUS)); } } else if (isdigit(c)) { tokenize_int(); } else if (isalpha(c) || c == '_') { tokenize_ident(); } else { pos++; col++; } } tokens.push_back({Token::EOF_, "", line, col}); #undef TOKEN return tokens; } private: std::vector tokens; const std::string& input; size_t pos; size_t line; size_t col; void emit(Token t) { tokens.push_back(t); pos++; col++; } void skip_whitespace() { while (pos < input.size()) { char c = input[pos]; if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { if (c == '\n') { line++; col = 1; } else { col++; } pos++; } else if (c == '#') { while (pos < input.size() && input[pos] != '\n') pos++; } else { break; } } } void tokenize_string() { pos++; std::string s; bool has_interp = false; while (pos < input.size() && input[pos] != '"') { if (input[pos] == '\\' && pos + 1 < input.size()) { pos++; switch (input[pos]) { case 'n': s += '\n'; break; case 't': s += '\t'; break; case 'r': s += '\r'; break; case '"': s += '"'; break; case '\\': s += '\\'; break; case '$': s += '$'; break; // Escaped $ default: s += input[pos]; break; } pos++; } else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') { // Found interpolation marker has_interp = true; s += input[pos]; // Keep $ in raw string pos++; } else { s += input[pos]; pos++; } } pos++; Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING; tokens.push_back({type, s, line, col}); col += s.size() + 2; } void tokenize_path() { size_t start = pos; while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' && input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']') { pos++; } std::string path = input.substr(start, pos - start); tokens.push_back({Token::PATH, path, line, col}); col += path.size(); } void tokenize_int() { size_t start = pos; if (input[pos] == '-') pos++; while (pos < input.size() && isdigit(input[pos])) pos++; std::string num = input.substr(start, pos - start); tokens.push_back({Token::INT, num, line, col}); col += num.size(); } void tokenize_ident() { size_t start = pos; while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++; std::string ident = input.substr(start, pos - start); Token::Type type = Token::IDENT; if (ident == "let") type = Token::LET; else if (ident == "in") type = Token::IN; else if (ident == "rec") type = Token::REC; else if (ident == "if") type = Token::IF; else if (ident == "then") type = Token::THEN; else if (ident == "else") type = Token::ELSE; else if (ident == "assert") type = Token::ASSERT; else if (ident == "with") type = Token::WITH; else if (ident == "inherit") type = Token::INHERIT; else if (ident == "true") type = Token::BOOL; else if (ident == "false") type = Token::BOOL; tokens.push_back({type, ident, line, col}); col += ident.size(); } }; class Parser::Impl { public: std::vector tokens; size_t pos = 0; std::string current_file; const Token& current() { if (pos < tokens.size()) return tokens[pos]; static Token eof{Token::EOF_, "", 0, 0}; return eof; } void advance() { pos++; } bool consume(Token::Type type) { if (current().type == type) { advance(); return true; } return false; } bool expect(Token::Type type) { if (current().type != type) { std::cerr << "Expected token " << type << " but got " << current().type << " at " << current().line << ":" << current().col << "\n"; return false; } advance(); return true; } // Get operator precedence (higher = tighter binding) int get_precedence(Token::Type type) { switch (type) { case Token::OR: return 1; case Token::AND: return 2; case Token::IMPL: return 3; case Token::EQEQ: case Token::NE: return 4; case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5; case Token::CONCAT: return 6; case Token::PLUS: case Token::MINUS: return 7; case Token::STAR: case Token::SLASH: return 8; default: return 0; } } // Convert token type to binary operator BinaryOp token_to_binop(Token::Type type) { switch (type) { case Token::PLUS: return BinaryOp::ADD; case Token::MINUS: return BinaryOp::SUB; case Token::STAR: return BinaryOp::MUL; case Token::SLASH: return BinaryOp::DIV; case Token::CONCAT: return BinaryOp::CONCAT; case Token::EQEQ: return BinaryOp::EQ; case Token::NE: return BinaryOp::NE; case Token::LT: return BinaryOp::LT; case Token::GT: return BinaryOp::GT; case Token::LE: return BinaryOp::LE; case Token::GE: return BinaryOp::GE; case Token::AND: return BinaryOp::AND; case Token::OR: return BinaryOp::OR; case Token::IMPL: return BinaryOp::IMPL; default: throw std::runtime_error("Invalid binary operator"); } } std::shared_ptr parse_expr() { // Try to parse lambda auto lambda = try_parse_lambda(); if (lambda) return lambda; if (consume(Token::IF)) { auto cond = parse_expr(); expect(Token::THEN); auto then = parse_expr(); expect(Token::ELSE); auto else_ = parse_expr(); return std::make_shared(IfNode(cond, then, else_)); } if (consume(Token::LET)) { bool is_rec = consume(Token::REC); std::vector>> bindings; parse_bindings(bindings); expect(Token::IN); auto body = parse_expr(); if (is_rec) { auto letrec = LetRecNode(body); letrec.bindings = std::move(bindings); return std::make_shared(std::move(letrec)); } else { auto let = LetNode(body); let.bindings = std::move(bindings); return std::make_shared(std::move(let)); } } if (consume(Token::ASSERT)) { auto cond = parse_expr(); expect(Token::SEMICOLON); auto body = parse_expr(); return std::make_shared(AssertNode(cond, body)); } if (consume(Token::WITH)) { auto attrs = parse_expr(); expect(Token::SEMICOLON); auto body = parse_expr(); return std::make_shared(WithNode(attrs, body)); } return parse_expr1(); } std::shared_ptr parse_expr1() { return parse_binary_op(0); } // Precedence climbing for binary operators std::shared_ptr parse_binary_op(int min_prec) { auto left = parse_selection(); while (true) { int prec = get_precedence(current().type); if (prec == 0 || prec < min_prec) break; Token op_token = current(); advance(); auto right = parse_binary_op(prec + 1); left = std::make_shared(BinaryOpNode( token_to_binop(op_token.type), left, right )); } return left; } std::shared_ptr parse_selection() { auto left = parse_expr2(); while (current().type == Token::DOT) { advance(); Token name = current(); if (name.type == Token::IDENT) { advance(); auto attr = std::make_shared(ConstStringNode(name.value)); auto result = std::make_shared(SelectNode(left, attr)); if (consume(Token::DOT)) { Token name2 = current(); if (name2.type == Token::IDENT) { advance(); auto attr2 = std::make_shared(ConstStringNode(name2.value)); auto* curr = result->get_if(); while (curr && consume(Token::DOT)) { Token n = current(); expect(Token::IDENT); auto a = std::make_shared(ConstStringNode(n.value)); curr->attr = std::make_shared(AppNode( std::make_shared(AppNode(curr->attr, a)), std::make_shared(ConstNullNode()) )); } } } return result; } else if (consume(Token::LBRACE)) { auto result = std::make_shared(SelectNode(left, std::make_shared(ConstStringNode(name.value)))); parse_expr_attrs(result); expect(Token::RBRACE); return result; } return left; } return left; } void parse_expr_attrs(std::shared_ptr&) { // Extended selection syntax } std::shared_ptr parse_expr2() { std::shared_ptr left = parse_expr3(); while (true) { if (current().type == Token::LBRACKET) { advance(); auto arg = parse_expr(); expect(Token::RBRACKET); left = std::make_shared(AppNode(left, arg)); } else if (current().type == Token::STRING) { Token s = current(); advance(); auto arg = std::make_shared(ConstStringNode(s.value)); left = std::make_shared(AppNode(left, arg)); } else { break; } } return left; } std::shared_ptr parse_expr3() { // Handle unary operators if (consume(Token::MINUS)) { auto operand = parse_expr3(); return std::make_shared(UnaryOpNode(UnaryOp::NEG, operand)); } if (consume(Token::NOT)) { auto operand = parse_expr3(); return std::make_shared(UnaryOpNode(UnaryOp::NOT, operand)); } if (consume(Token::LPAREN)) { auto expr = parse_expr(); expect(Token::RPAREN); return expr; } if (consume(Token::LBRACE)) { return parse_attrs(); } if (consume(Token::LBRACKET)) { return parse_list(); } Token t = current(); if (t.type == Token::IDENT) { advance(); return std::make_shared(VarNode(0, t.value)); } if (t.type == Token::INT) { advance(); return std::make_shared(ConstIntNode(std::stoll(t.value))); } if (t.type == Token::STRING) { advance(); return std::make_shared(ConstStringNode(t.value)); } if (t.type == Token::STRING_INTERP) { Token str_token = current(); advance(); return parse_string_interp(str_token.value); } if (t.type == Token::PATH) { advance(); return std::make_shared(ConstPathNode(t.value)); } if (t.type == Token::BOOL) { advance(); return std::make_shared(ConstBoolNode(t.value == "true")); } std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n"; advance(); return std::make_shared(ConstNullNode()); } std::shared_ptr parse_attrs() { auto attrs = AttrsetNode(false); while (current().type != Token::RBRACE && current().type != Token::EOF_) { if (consume(Token::REC)) { attrs.recursive = true; continue; } // Handle inherit keyword if (consume(Token::INHERIT)) { std::shared_ptr source; // Check for (expr) form if (consume(Token::LPAREN)) { source = parse_expr(); expect(Token::RPAREN); } // Parse identifier list while (current().type == Token::IDENT) { Token name = current(); advance(); if (source) { // inherit (expr) x → x = expr.x auto select = std::make_shared(SelectNode( source, std::make_shared(ConstStringNode(name.value)) )); attrs.attrs.push_back({name.value, select}); } else { // inherit x → x = x auto var = std::make_shared(VarNode(0, name.value)); attrs.attrs.push_back({name.value, var}); } } expect(Token::SEMICOLON); continue; } if (current().type == Token::IDENT || current().type == Token::STRING) { Token key = current(); advance(); std::string key_str = key.value; if (consume(Token::EQUALS)) { auto value = parse_expr(); attrs.attrs.push_back({key_str, value}); } else if (consume(Token::AT)) { auto pattern = parse_expr(); auto value = parse_expr(); attrs.attrs.push_back({key_str, value}); } } if (consume(Token::COMMA)) continue; if (consume(Token::SEMICOLON)) continue; // If we get here and haven't handled the token, break if (current().type != Token::RBRACE && current().type != Token::EOF_) { break; } } expect(Token::RBRACE); return std::make_shared(std::move(attrs)); } std::shared_ptr parse_list() { std::shared_ptr list = std::make_shared(ConstNullNode()); if (consume(Token::RBRACKET)) { return list; } std::vector> elements; while (current().type != Token::RBRACKET) { elements.push_back(parse_expr()); if (!consume(Token::COMMA)) break; } expect(Token::RBRACKET); for (auto it = elements.rbegin(); it != elements.rend(); ++it) { list = std::make_shared(AppNode( std::make_shared(AppNode( std::make_shared(VarNode(0, "__list")), *it )), list )); } return list; } void parse_bindings(std::vector>>& bindings) { while (current().type == Token::IDENT || current().type == Token::INHERIT) { // Handle inherit keyword if (consume(Token::INHERIT)) { std::shared_ptr source; // Check for (expr) form if (consume(Token::LPAREN)) { source = parse_expr(); expect(Token::RPAREN); } // Parse identifier list while (current().type == Token::IDENT) { Token name = current(); advance(); if (source) { // inherit (expr) x → x = expr.x auto select = std::make_shared(SelectNode( source, std::make_shared(ConstStringNode(name.value)) )); bindings.push_back({name.value, select}); } else { // inherit x → x = x auto var = std::make_shared(VarNode(0, name.value)); bindings.push_back({name.value, var}); } } expect(Token::SEMICOLON); continue; } if (current().type != Token::IDENT) break; Token key = current(); advance(); if (consume(Token::AT)) { auto pattern = parse_expr(); auto value = parse_expr(); bindings.push_back({key.value, value}); } else { expect(Token::EQUALS); auto value = parse_expr(); bindings.push_back({key.value, value}); } if (!consume(Token::SEMICOLON)) break; } } // Try to parse lambda, return nullptr if not a lambda std::shared_ptr try_parse_lambda() { size_t saved_pos = pos; // Check for named pattern: arg@{ ... }: std::optional named_arg; if (current().type == Token::IDENT) { Token name = current(); advance(); if (consume(Token::AT)) { named_arg = name.value; } else if (consume(Token::COLON)) { // Simple lambda: x: body auto body = parse_expr(); auto lambda = LambdaNode(1, body); lambda.param_name = name.value; return std::make_shared(std::move(lambda)); } else { // Not a lambda, restore position pos = saved_pos; return nullptr; } } // Check for pattern: { ... }: if (current().type == Token::LBRACE) { advance(); // Parse pattern fields struct Field { std::string name; std::optional> default_val; }; std::vector fields; bool has_ellipsis = false; while (current().type != Token::RBRACE && current().type != Token::EOF_) { if (consume(Token::ELLIPSIS)) { has_ellipsis = true; if (consume(Token::COMMA)) continue; break; } if (current().type == Token::IDENT) { Token field_name = current(); advance(); Field field; field.name = field_name.value; // Check for default value if (consume(Token::QUESTION)) { field.default_val = parse_expr(); } fields.push_back(field); if (consume(Token::COMMA)) continue; break; } else { break; } } if (!consume(Token::RBRACE)) { // Not a lambda pattern, restore pos = saved_pos; return nullptr; } if (!consume(Token::COLON)) { // Not a lambda, restore pos = saved_pos; return nullptr; } // Parse body auto body = parse_expr(); // Desugar pattern to lambda with let bindings // { a, b ? x }: body → arg: let a = arg.a; b = if arg ? a then arg.a else x; in body std::string arg_name = named_arg.value_or("_arg"); auto arg_var = std::make_shared(VarNode(0, arg_name)); std::vector>> bindings; for (const auto& field : fields) { // Create arg.field selection auto select = std::make_shared(SelectNode( arg_var, std::make_shared(ConstStringNode(field.name)) )); if (field.default_val) { // if arg ? field then arg.field else default auto has_attr = std::make_shared(HasAttrNode( arg_var, std::make_shared(ConstStringNode(field.name)) )); auto if_node = std::make_shared(IfNode( has_attr, select, *field.default_val )); bindings.push_back({field.name, if_node}); } else { bindings.push_back({field.name, select}); } } // If named pattern, also bind the argument name if (named_arg) { bindings.push_back({*named_arg, arg_var}); } // Create let expression auto let = LetNode(body); let.bindings = std::move(bindings); auto let_node = std::make_shared(std::move(let)); // Create lambda auto lambda = LambdaNode(1, let_node); lambda.param_name = arg_name; lambda.strict_pattern = !has_ellipsis; return std::make_shared(std::move(lambda)); } // Not a lambda pos = saved_pos; return nullptr; } std::shared_ptr parse_string_interp(const std::string& raw) { std::vector> parts; size_t i = 0; std::string current_str; while (i < raw.size()) { if (raw[i] == '$' && i + 1 < raw.size() && raw[i + 1] == '{') { // Save current string part if any if (!current_str.empty()) { parts.push_back(std::make_shared(ConstStringNode(current_str))); current_str.clear(); } // Find matching } i += 2; // Skip ${ int depth = 1; size_t expr_start = i; bool in_string = false; char string_quote = 0; while (i < raw.size() && depth > 0) { if (!in_string) { if (raw[i] == '"' || raw[i] == '\'') { in_string = true; string_quote = raw[i]; } else if (raw[i] == '{') { depth++; } else if (raw[i] == '}') { depth--; } } else { if (raw[i] == string_quote && (i == 0 || raw[i-1] != '\\')) { in_string = false; } else if (raw[i] == '\\') { i++; } } if (depth > 0) i++; } if (depth > 0) { throw std::runtime_error("unterminated ${ in string interpolation"); } // Parse the expression std::string expr_str = raw.substr(expr_start, i - expr_start); // Tokenize and parse the expression Lexer lexer(expr_str); auto expr_tokens = lexer.tokenize(); // Save current state auto saved_tokens = tokens; auto saved_pos = pos; // Parse expression tokens = expr_tokens; pos = 0; auto expr = parse_expr(); // Restore state tokens = saved_tokens; pos = saved_pos; // Convert to string using toString builtin auto to_string = std::make_shared(VarNode(0, "toString")); auto str_expr = std::make_shared(AppNode(to_string, expr)); parts.push_back(str_expr); i++; // Skip } } else { current_str += raw[i]; i++; } } // Add remaining string part if (!current_str.empty()) { parts.push_back(std::make_shared(ConstStringNode(current_str))); } // Build concatenation tree if (parts.empty()) { return std::make_shared(ConstStringNode("")); } auto result = parts[0]; for (size_t j = 1; j < parts.size(); j++) { // Use ADD (+) for string concatenation; CONCAT (++) is Nix list concatenation result = std::make_shared(BinaryOpNode(BinaryOp::ADD, result, parts[j])); } return result; } }; Parser::Parser() : pImpl(std::make_unique()) {} Parser::~Parser() = default; std::shared_ptr Parser::parse(const std::string& source, const std::string& path) { pImpl->current_file = path; Lexer lexer(source); pImpl->tokens = lexer.tokenize(); pImpl->pos = 0; return pImpl->parse_expr(); } std::shared_ptr Parser::parse_file(const std::string& path) { std::string content = read_file(path); return parse(content, path); } }