Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ice1bfb5682ab48a967dc16f1378e23ae6a6a6964
950 lines
32 KiB
C++
950 lines
32 KiB
C++
#include "parser.h"
|
|
#include <iostream>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
#include <sstream>
|
|
#include <vector>
|
|
#include <regex>
|
|
#include <array>
|
|
|
|
namespace nix_irc {
|
|
|
|
static std::string trim(const std::string& s) {
|
|
size_t start = s.find_first_not_of(" \t\n\r");
|
|
if (start == std::string::npos) return "";
|
|
size_t end = s.find_last_not_of(" \t\n\r");
|
|
return s.substr(start, end - start + 1);
|
|
}
|
|
|
|
static std::string read_file(const std::string& path) {
|
|
FILE* f = fopen(path.c_str(), "r");
|
|
if (!f) {
|
|
throw std::runtime_error("Cannot open file: " + path);
|
|
}
|
|
fseek(f, 0, SEEK_END);
|
|
long size = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
std::string content(size, '\0');
|
|
if (fread(content.data(), 1, size, f) != static_cast<size_t>(size)) {
|
|
fclose(f);
|
|
throw std::runtime_error("Failed to read file: " + path);
|
|
}
|
|
fclose(f);
|
|
return content;
|
|
}
|
|
|
|
static std::pair<std::string, std::string> run_command(const std::string& cmd) {
|
|
std::array<char, 256> buffer;
|
|
std::string result;
|
|
std::string error;
|
|
|
|
FILE* pipe = popen(cmd.c_str(), "r");
|
|
if (!pipe) throw std::runtime_error("popen failed");
|
|
|
|
while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {
|
|
result += buffer.data();
|
|
}
|
|
|
|
int status = pclose(pipe);
|
|
if (status != 0) {
|
|
throw std::runtime_error("Command failed: " + cmd);
|
|
}
|
|
return {result, error};
|
|
}
|
|
|
|
struct Token {
|
|
enum Type {
|
|
LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET,
|
|
IDENT, STRING, STRING_INTERP, PATH, INT, BOOL,
|
|
LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH, INHERIT,
|
|
DOT, SEMICOLON, COLON, EQUALS, AT, COMMA, QUESTION, ELLIPSIS,
|
|
// Operators
|
|
PLUS, MINUS, STAR, SLASH, CONCAT,
|
|
EQEQ, NE, LT, GT, LE, GE,
|
|
AND, OR, IMPL, NOT,
|
|
EOF_
|
|
} type;
|
|
std::string value;
|
|
size_t line;
|
|
size_t col;
|
|
};
|
|
|
|
class Lexer {
|
|
public:
|
|
Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
|
|
|
|
std::vector<Token> tokenize() {
|
|
#define TOKEN(t) Token{Token::t, "", line, col}
|
|
|
|
while (pos < input.size()) {
|
|
skip_whitespace();
|
|
if (pos >= input.size()) break;
|
|
|
|
char c = input[pos];
|
|
|
|
if (c == '(') { emit(TOKEN(LPAREN)); }
|
|
else if (c == ')') { emit(TOKEN(RPAREN)); }
|
|
else if (c == '{') { emit(TOKEN(LBRACE)); }
|
|
else if (c == '}') { emit(TOKEN(RBRACE)); }
|
|
else if (c == '[') { emit(TOKEN(LBRACKET)); }
|
|
else if (c == ']') { emit(TOKEN(RBRACKET)); }
|
|
else if (c == ';') { emit(TOKEN(SEMICOLON)); }
|
|
else if (c == ':') { emit(TOKEN(COLON)); }
|
|
else if (c == '@') { emit(TOKEN(AT)); }
|
|
else if (c == ',') { emit(TOKEN(COMMA)); }
|
|
else if (c == '"') { tokenize_string(); }
|
|
// Two-char operators
|
|
else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
|
tokens.push_back(TOKEN(EQEQ));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '=') { emit(TOKEN(EQUALS)); }
|
|
else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
|
tokens.push_back(TOKEN(NE));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
|
tokens.push_back(TOKEN(LE));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
|
tokens.push_back(TOKEN(GE));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') {
|
|
tokens.push_back(TOKEN(CONCAT));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') {
|
|
tokens.push_back(TOKEN(AND));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') {
|
|
tokens.push_back(TOKEN(OR));
|
|
pos += 2; col += 2;
|
|
}
|
|
else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') {
|
|
tokens.push_back(TOKEN(IMPL));
|
|
pos += 2; col += 2;
|
|
}
|
|
// Single-char operators
|
|
else if (c == '+') { emit(TOKEN(PLUS)); }
|
|
else if (c == '*') { emit(TOKEN(STAR)); }
|
|
else if (c == '/') {
|
|
// Check if it's a path or division
|
|
if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) {
|
|
tokenize_path();
|
|
} else {
|
|
emit(TOKEN(SLASH));
|
|
}
|
|
}
|
|
else if (c == '<') { emit(TOKEN(LT)); }
|
|
else if (c == '>') { emit(TOKEN(GT)); }
|
|
else if (c == '!') { emit(TOKEN(NOT)); }
|
|
else if (c == '.') {
|
|
// Check for ellipsis (...)
|
|
if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
|
|
tokens.push_back(TOKEN(ELLIPSIS));
|
|
pos += 3; col += 3;
|
|
} else {
|
|
emit(TOKEN(DOT));
|
|
}
|
|
}
|
|
else if (c == '?') { emit(TOKEN(QUESTION)); }
|
|
else if (c == '-') {
|
|
// Check if it's a negative number or minus operator
|
|
if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
|
|
tokenize_int();
|
|
} else {
|
|
emit(TOKEN(MINUS));
|
|
}
|
|
}
|
|
else if (isdigit(c)) { tokenize_int(); }
|
|
else if (isalpha(c) || c == '_') { tokenize_ident(); }
|
|
else { pos++; col++; }
|
|
}
|
|
tokens.push_back({Token::EOF_, "", line, col});
|
|
|
|
#undef TOKEN
|
|
return tokens;
|
|
}
|
|
|
|
private:
|
|
std::vector<Token> tokens;
|
|
const std::string& input;
|
|
size_t pos;
|
|
size_t line;
|
|
size_t col;
|
|
|
|
void emit(Token t) {
|
|
tokens.push_back(t);
|
|
pos++;
|
|
col++;
|
|
}
|
|
|
|
void skip_whitespace() {
|
|
while (pos < input.size()) {
|
|
char c = input[pos];
|
|
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
|
|
if (c == '\n') { line++; col = 1; }
|
|
else { col++; }
|
|
pos++;
|
|
} else if (c == '#') {
|
|
while (pos < input.size() && input[pos] != '\n') pos++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void tokenize_string() {
|
|
pos++;
|
|
std::string s;
|
|
bool has_interp = false;
|
|
|
|
while (pos < input.size() && input[pos] != '"') {
|
|
if (input[pos] == '\\' && pos + 1 < input.size()) {
|
|
pos++;
|
|
switch (input[pos]) {
|
|
case 'n': s += '\n'; break;
|
|
case 't': s += '\t'; break;
|
|
case 'r': s += '\r'; break;
|
|
case '"': s += '"'; break;
|
|
case '\\': s += '\\'; break;
|
|
case '$': s += '$'; break; // Escaped $
|
|
default: s += input[pos]; break;
|
|
}
|
|
pos++;
|
|
} else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
|
// Found interpolation marker
|
|
has_interp = true;
|
|
s += input[pos]; // Keep $ in raw string
|
|
pos++;
|
|
} else {
|
|
s += input[pos];
|
|
pos++;
|
|
}
|
|
}
|
|
pos++;
|
|
|
|
Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING;
|
|
tokens.push_back({type, s, line, col});
|
|
col += s.size() + 2;
|
|
}
|
|
|
|
void tokenize_path() {
|
|
size_t start = pos;
|
|
while (pos < input.size() && !isspace(input[pos]) &&
|
|
input[pos] != '(' && input[pos] != ')' &&
|
|
input[pos] != '{' && input[pos] != '}' &&
|
|
input[pos] != '[' && input[pos] != ']') {
|
|
pos++;
|
|
}
|
|
std::string path = input.substr(start, pos - start);
|
|
tokens.push_back({Token::PATH, path, line, col});
|
|
col += path.size();
|
|
}
|
|
|
|
void tokenize_int() {
|
|
size_t start = pos;
|
|
if (input[pos] == '-') pos++;
|
|
while (pos < input.size() && isdigit(input[pos])) pos++;
|
|
std::string num = input.substr(start, pos - start);
|
|
tokens.push_back({Token::INT, num, line, col});
|
|
col += num.size();
|
|
}
|
|
|
|
void tokenize_ident() {
|
|
size_t start = pos;
|
|
while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++;
|
|
std::string ident = input.substr(start, pos - start);
|
|
|
|
Token::Type type = Token::IDENT;
|
|
if (ident == "let") type = Token::LET;
|
|
else if (ident == "in") type = Token::IN;
|
|
else if (ident == "rec") type = Token::REC;
|
|
else if (ident == "if") type = Token::IF;
|
|
else if (ident == "then") type = Token::THEN;
|
|
else if (ident == "else") type = Token::ELSE;
|
|
else if (ident == "assert") type = Token::ASSERT;
|
|
else if (ident == "with") type = Token::WITH;
|
|
else if (ident == "inherit") type = Token::INHERIT;
|
|
else if (ident == "true") type = Token::BOOL;
|
|
else if (ident == "false") type = Token::BOOL;
|
|
|
|
tokens.push_back({type, ident, line, col});
|
|
col += ident.size();
|
|
}
|
|
};
|
|
|
|
class Parser::Impl {
|
|
public:
|
|
std::vector<Token> tokens;
|
|
size_t pos = 0;
|
|
std::string current_file;
|
|
|
|
const Token& current() {
|
|
if (pos < tokens.size()) return tokens[pos];
|
|
static Token eof{Token::EOF_, "", 0, 0};
|
|
return eof;
|
|
}
|
|
|
|
void advance() { pos++; }
|
|
|
|
bool consume(Token::Type type) {
|
|
if (current().type == type) {
|
|
advance();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool expect(Token::Type type) {
|
|
if (current().type != type) {
|
|
std::cerr << "Expected token " << type << " but got " << current().type
|
|
<< " at " << current().line << ":" << current().col << "\n";
|
|
return false;
|
|
}
|
|
advance();
|
|
return true;
|
|
}
|
|
|
|
// Get operator precedence (higher = tighter binding)
|
|
int get_precedence(Token::Type type) {
|
|
switch (type) {
|
|
case Token::OR: return 1;
|
|
case Token::AND: return 2;
|
|
case Token::IMPL: return 3;
|
|
case Token::EQEQ: case Token::NE: return 4;
|
|
case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5;
|
|
case Token::CONCAT: return 6;
|
|
case Token::PLUS: case Token::MINUS: return 7;
|
|
case Token::STAR: case Token::SLASH: return 8;
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
// Convert token type to binary operator
|
|
BinaryOp token_to_binop(Token::Type type) {
|
|
switch (type) {
|
|
case Token::PLUS: return BinaryOp::ADD;
|
|
case Token::MINUS: return BinaryOp::SUB;
|
|
case Token::STAR: return BinaryOp::MUL;
|
|
case Token::SLASH: return BinaryOp::DIV;
|
|
case Token::CONCAT: return BinaryOp::CONCAT;
|
|
case Token::EQEQ: return BinaryOp::EQ;
|
|
case Token::NE: return BinaryOp::NE;
|
|
case Token::LT: return BinaryOp::LT;
|
|
case Token::GT: return BinaryOp::GT;
|
|
case Token::LE: return BinaryOp::LE;
|
|
case Token::GE: return BinaryOp::GE;
|
|
case Token::AND: return BinaryOp::AND;
|
|
case Token::OR: return BinaryOp::OR;
|
|
case Token::IMPL: return BinaryOp::IMPL;
|
|
default: throw std::runtime_error("Invalid binary operator");
|
|
}
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_expr() {
|
|
// Try to parse lambda
|
|
auto lambda = try_parse_lambda();
|
|
if (lambda) return lambda;
|
|
|
|
if (consume(Token::IF)) {
|
|
auto cond = parse_expr();
|
|
expect(Token::THEN);
|
|
auto then = parse_expr();
|
|
expect(Token::ELSE);
|
|
auto else_ = parse_expr();
|
|
return std::make_shared<Node>(IfNode(cond, then, else_));
|
|
}
|
|
if (consume(Token::LET)) {
|
|
bool is_rec = consume(Token::REC);
|
|
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
|
|
parse_bindings(bindings);
|
|
expect(Token::IN);
|
|
auto body = parse_expr();
|
|
|
|
if (is_rec) {
|
|
auto letrec = LetRecNode(body);
|
|
letrec.bindings = std::move(bindings);
|
|
return std::make_shared<Node>(std::move(letrec));
|
|
} else {
|
|
auto let = LetNode(body);
|
|
let.bindings = std::move(bindings);
|
|
return std::make_shared<Node>(std::move(let));
|
|
}
|
|
}
|
|
if (consume(Token::ASSERT)) {
|
|
auto cond = parse_expr();
|
|
expect(Token::SEMICOLON);
|
|
auto body = parse_expr();
|
|
return std::make_shared<Node>(AssertNode(cond, body));
|
|
}
|
|
if (consume(Token::WITH)) {
|
|
auto attrs = parse_expr();
|
|
expect(Token::SEMICOLON);
|
|
auto body = parse_expr();
|
|
return std::make_shared<Node>(WithNode(attrs, body));
|
|
}
|
|
|
|
return parse_expr1();
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_expr1() {
|
|
return parse_binary_op(0);
|
|
}
|
|
|
|
// Precedence climbing for binary operators
|
|
std::shared_ptr<Node> parse_binary_op(int min_prec) {
|
|
auto left = parse_selection();
|
|
|
|
while (true) {
|
|
int prec = get_precedence(current().type);
|
|
if (prec == 0 || prec < min_prec) break;
|
|
|
|
Token op_token = current();
|
|
advance();
|
|
|
|
auto right = parse_binary_op(prec + 1);
|
|
left = std::make_shared<Node>(BinaryOpNode(
|
|
token_to_binop(op_token.type),
|
|
left,
|
|
right
|
|
));
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_selection() {
|
|
auto left = parse_expr2();
|
|
|
|
while (current().type == Token::DOT) {
|
|
advance();
|
|
Token name = current();
|
|
if (name.type == Token::IDENT) {
|
|
advance();
|
|
auto attr = std::make_shared<Node>(ConstStringNode(name.value));
|
|
auto result = std::make_shared<Node>(SelectNode(left, attr));
|
|
|
|
if (consume(Token::DOT)) {
|
|
Token name2 = current();
|
|
if (name2.type == Token::IDENT) {
|
|
advance();
|
|
auto attr2 = std::make_shared<Node>(ConstStringNode(name2.value));
|
|
auto* curr = result->get_if<SelectNode>();
|
|
while (curr && consume(Token::DOT)) {
|
|
Token n = current();
|
|
expect(Token::IDENT);
|
|
auto a = std::make_shared<Node>(ConstStringNode(n.value));
|
|
curr->attr = std::make_shared<Node>(AppNode(
|
|
std::make_shared<Node>(AppNode(curr->attr, a)),
|
|
std::make_shared<Node>(ConstNullNode())
|
|
));
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
} else if (consume(Token::LBRACE)) {
|
|
auto result = std::make_shared<Node>(SelectNode(left, std::make_shared<Node>(ConstStringNode(name.value))));
|
|
parse_expr_attrs(result);
|
|
expect(Token::RBRACE);
|
|
return result;
|
|
}
|
|
return left;
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
void parse_expr_attrs(std::shared_ptr<Node>&) {
|
|
// Extended selection syntax
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_expr2() {
|
|
std::shared_ptr<Node> left = parse_expr3();
|
|
|
|
while (true) {
|
|
if (current().type == Token::LBRACKET) {
|
|
advance();
|
|
auto arg = parse_expr();
|
|
expect(Token::RBRACKET);
|
|
left = std::make_shared<Node>(AppNode(left, arg));
|
|
} else if (current().type == Token::STRING) {
|
|
Token s = current();
|
|
advance();
|
|
auto arg = std::make_shared<Node>(ConstStringNode(s.value));
|
|
left = std::make_shared<Node>(AppNode(left, arg));
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_expr3() {
|
|
// Handle unary operators
|
|
if (consume(Token::MINUS)) {
|
|
auto operand = parse_expr3();
|
|
return std::make_shared<Node>(UnaryOpNode(UnaryOp::NEG, operand));
|
|
}
|
|
|
|
if (consume(Token::NOT)) {
|
|
auto operand = parse_expr3();
|
|
return std::make_shared<Node>(UnaryOpNode(UnaryOp::NOT, operand));
|
|
}
|
|
|
|
if (consume(Token::LPAREN)) {
|
|
auto expr = parse_expr();
|
|
expect(Token::RPAREN);
|
|
return expr;
|
|
}
|
|
|
|
if (consume(Token::LBRACE)) {
|
|
return parse_attrs();
|
|
}
|
|
|
|
if (consume(Token::LBRACKET)) {
|
|
return parse_list();
|
|
}
|
|
|
|
Token t = current();
|
|
|
|
if (t.type == Token::IDENT) {
|
|
advance();
|
|
return std::make_shared<Node>(VarNode(0, t.value));
|
|
}
|
|
|
|
if (t.type == Token::INT) {
|
|
advance();
|
|
return std::make_shared<Node>(ConstIntNode(std::stoll(t.value)));
|
|
}
|
|
|
|
if (t.type == Token::STRING) {
|
|
advance();
|
|
return std::make_shared<Node>(ConstStringNode(t.value));
|
|
}
|
|
|
|
if (t.type == Token::STRING_INTERP) {
|
|
Token str_token = current();
|
|
advance();
|
|
return parse_string_interp(str_token.value);
|
|
}
|
|
|
|
if (t.type == Token::PATH) {
|
|
advance();
|
|
return std::make_shared<Node>(ConstPathNode(t.value));
|
|
}
|
|
|
|
if (t.type == Token::BOOL) {
|
|
advance();
|
|
return std::make_shared<Node>(ConstBoolNode(t.value == "true"));
|
|
}
|
|
|
|
std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n";
|
|
advance();
|
|
return std::make_shared<Node>(ConstNullNode());
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_attrs() {
|
|
auto attrs = AttrsetNode(false);
|
|
|
|
while (current().type != Token::RBRACE && current().type != Token::EOF_) {
|
|
if (consume(Token::REC)) {
|
|
attrs.recursive = true;
|
|
continue;
|
|
}
|
|
|
|
// Handle inherit keyword
|
|
if (consume(Token::INHERIT)) {
|
|
std::shared_ptr<Node> source;
|
|
|
|
// Check for (expr) form
|
|
if (consume(Token::LPAREN)) {
|
|
source = parse_expr();
|
|
expect(Token::RPAREN);
|
|
}
|
|
|
|
// Parse identifier list
|
|
while (current().type == Token::IDENT) {
|
|
Token name = current();
|
|
advance();
|
|
|
|
if (source) {
|
|
// inherit (expr) x → x = expr.x
|
|
auto select = std::make_shared<Node>(SelectNode(
|
|
source,
|
|
std::make_shared<Node>(ConstStringNode(name.value))
|
|
));
|
|
attrs.attrs.push_back({name.value, select});
|
|
} else {
|
|
// inherit x → x = x
|
|
auto var = std::make_shared<Node>(VarNode(0, name.value));
|
|
attrs.attrs.push_back({name.value, var});
|
|
}
|
|
}
|
|
|
|
expect(Token::SEMICOLON);
|
|
continue;
|
|
}
|
|
|
|
if (current().type == Token::IDENT || current().type == Token::STRING) {
|
|
Token key = current();
|
|
advance();
|
|
std::string key_str = key.value;
|
|
|
|
if (consume(Token::EQUALS)) {
|
|
auto value = parse_expr();
|
|
attrs.attrs.push_back({key_str, value});
|
|
} else if (consume(Token::AT)) {
|
|
auto pattern = parse_expr();
|
|
auto value = parse_expr();
|
|
attrs.attrs.push_back({key_str, value});
|
|
}
|
|
}
|
|
|
|
if (consume(Token::COMMA)) continue;
|
|
if (consume(Token::SEMICOLON)) continue;
|
|
|
|
// If we get here and haven't handled the token, break
|
|
if (current().type != Token::RBRACE && current().type != Token::EOF_) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
expect(Token::RBRACE);
|
|
return std::make_shared<Node>(std::move(attrs));
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_list() {
|
|
std::shared_ptr<Node> list = std::make_shared<Node>(ConstNullNode());
|
|
|
|
if (consume(Token::RBRACKET)) {
|
|
return list;
|
|
}
|
|
|
|
std::vector<std::shared_ptr<Node>> elements;
|
|
while (current().type != Token::RBRACKET) {
|
|
elements.push_back(parse_expr());
|
|
if (!consume(Token::COMMA)) break;
|
|
}
|
|
expect(Token::RBRACKET);
|
|
|
|
for (auto it = elements.rbegin(); it != elements.rend(); ++it) {
|
|
list = std::make_shared<Node>(AppNode(
|
|
std::make_shared<Node>(AppNode(
|
|
std::make_shared<Node>(VarNode(0, "__list")),
|
|
*it
|
|
)),
|
|
list
|
|
));
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
void parse_bindings(std::vector<std::pair<std::string, std::shared_ptr<Node>>>& bindings) {
|
|
while (current().type == Token::IDENT || current().type == Token::INHERIT) {
|
|
// Handle inherit keyword
|
|
if (consume(Token::INHERIT)) {
|
|
std::shared_ptr<Node> source;
|
|
|
|
// Check for (expr) form
|
|
if (consume(Token::LPAREN)) {
|
|
source = parse_expr();
|
|
expect(Token::RPAREN);
|
|
}
|
|
|
|
// Parse identifier list
|
|
while (current().type == Token::IDENT) {
|
|
Token name = current();
|
|
advance();
|
|
|
|
if (source) {
|
|
// inherit (expr) x → x = expr.x
|
|
auto select = std::make_shared<Node>(SelectNode(
|
|
source,
|
|
std::make_shared<Node>(ConstStringNode(name.value))
|
|
));
|
|
bindings.push_back({name.value, select});
|
|
} else {
|
|
// inherit x → x = x
|
|
auto var = std::make_shared<Node>(VarNode(0, name.value));
|
|
bindings.push_back({name.value, var});
|
|
}
|
|
}
|
|
|
|
expect(Token::SEMICOLON);
|
|
continue;
|
|
}
|
|
|
|
if (current().type != Token::IDENT) break;
|
|
Token key = current();
|
|
advance();
|
|
|
|
if (consume(Token::AT)) {
|
|
auto pattern = parse_expr();
|
|
auto value = parse_expr();
|
|
bindings.push_back({key.value, value});
|
|
} else {
|
|
expect(Token::EQUALS);
|
|
auto value = parse_expr();
|
|
bindings.push_back({key.value, value});
|
|
}
|
|
|
|
if (!consume(Token::SEMICOLON)) break;
|
|
}
|
|
}
|
|
|
|
// Try to parse lambda, return nullptr if not a lambda
|
|
std::shared_ptr<Node> try_parse_lambda() {
|
|
size_t saved_pos = pos;
|
|
|
|
// Check for named pattern: arg@{ ... }:
|
|
std::optional<std::string> named_arg;
|
|
if (current().type == Token::IDENT) {
|
|
Token name = current();
|
|
advance();
|
|
if (consume(Token::AT)) {
|
|
named_arg = name.value;
|
|
} else if (consume(Token::COLON)) {
|
|
// Simple lambda: x: body
|
|
auto body = parse_expr();
|
|
auto lambda = LambdaNode(1, body);
|
|
lambda.param_name = name.value;
|
|
return std::make_shared<Node>(std::move(lambda));
|
|
} else {
|
|
// Not a lambda, restore position
|
|
pos = saved_pos;
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
// Check for pattern: { ... }:
|
|
if (current().type == Token::LBRACE) {
|
|
advance();
|
|
|
|
// Parse pattern fields
|
|
struct Field {
|
|
std::string name;
|
|
std::optional<std::shared_ptr<Node>> default_val;
|
|
};
|
|
std::vector<Field> fields;
|
|
bool has_ellipsis = false;
|
|
|
|
while (current().type != Token::RBRACE && current().type != Token::EOF_) {
|
|
if (consume(Token::ELLIPSIS)) {
|
|
has_ellipsis = true;
|
|
if (consume(Token::COMMA)) continue;
|
|
break;
|
|
}
|
|
|
|
if (current().type == Token::IDENT) {
|
|
Token field_name = current();
|
|
advance();
|
|
|
|
Field field;
|
|
field.name = field_name.value;
|
|
|
|
// Check for default value
|
|
if (consume(Token::QUESTION)) {
|
|
field.default_val = parse_expr();
|
|
}
|
|
|
|
fields.push_back(field);
|
|
|
|
if (consume(Token::COMMA)) continue;
|
|
break;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!consume(Token::RBRACE)) {
|
|
// Not a lambda pattern, restore
|
|
pos = saved_pos;
|
|
return nullptr;
|
|
}
|
|
|
|
if (!consume(Token::COLON)) {
|
|
// Not a lambda, restore
|
|
pos = saved_pos;
|
|
return nullptr;
|
|
}
|
|
|
|
// Parse body
|
|
auto body = parse_expr();
|
|
|
|
// Desugar pattern to lambda with let bindings
|
|
// { a, b ? x }: body → arg: let a = arg.a; b = if arg ? a then arg.a else x; in body
|
|
|
|
std::string arg_name = named_arg.value_or("_arg");
|
|
auto arg_var = std::make_shared<Node>(VarNode(0, arg_name));
|
|
|
|
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
|
|
|
|
for (const auto& field : fields) {
|
|
// Create arg.field selection
|
|
auto select = std::make_shared<Node>(SelectNode(
|
|
arg_var,
|
|
std::make_shared<Node>(ConstStringNode(field.name))
|
|
));
|
|
|
|
if (field.default_val) {
|
|
// if arg ? field then arg.field else default
|
|
auto has_attr = std::make_shared<Node>(HasAttrNode(
|
|
arg_var,
|
|
std::make_shared<Node>(ConstStringNode(field.name))
|
|
));
|
|
auto if_node = std::make_shared<Node>(IfNode(
|
|
has_attr,
|
|
select,
|
|
*field.default_val
|
|
));
|
|
bindings.push_back({field.name, if_node});
|
|
} else {
|
|
bindings.push_back({field.name, select});
|
|
}
|
|
}
|
|
|
|
// If named pattern, also bind the argument name
|
|
if (named_arg) {
|
|
bindings.push_back({*named_arg, arg_var});
|
|
}
|
|
|
|
// Create let expression
|
|
auto let = LetNode(body);
|
|
let.bindings = std::move(bindings);
|
|
auto let_node = std::make_shared<Node>(std::move(let));
|
|
|
|
// Create lambda
|
|
auto lambda = LambdaNode(1, let_node);
|
|
lambda.param_name = arg_name;
|
|
lambda.strict_pattern = !has_ellipsis;
|
|
return std::make_shared<Node>(std::move(lambda));
|
|
}
|
|
|
|
// Not a lambda
|
|
pos = saved_pos;
|
|
return nullptr;
|
|
}
|
|
|
|
std::shared_ptr<Node> parse_string_interp(const std::string& raw) {
|
|
std::vector<std::shared_ptr<Node>> parts;
|
|
size_t i = 0;
|
|
std::string current_str;
|
|
|
|
while (i < raw.size()) {
|
|
if (raw[i] == '$' && i + 1 < raw.size() && raw[i + 1] == '{') {
|
|
// Save current string part if any
|
|
if (!current_str.empty()) {
|
|
parts.push_back(std::make_shared<Node>(ConstStringNode(current_str)));
|
|
current_str.clear();
|
|
}
|
|
|
|
// Find matching }
|
|
i += 2; // Skip ${
|
|
int depth = 1;
|
|
size_t expr_start = i;
|
|
bool in_string = false;
|
|
char string_quote = 0;
|
|
|
|
while (i < raw.size() && depth > 0) {
|
|
if (!in_string) {
|
|
if (raw[i] == '"' || raw[i] == '\'') {
|
|
in_string = true;
|
|
string_quote = raw[i];
|
|
} else if (raw[i] == '{') {
|
|
depth++;
|
|
} else if (raw[i] == '}') {
|
|
depth--;
|
|
}
|
|
} else {
|
|
if (raw[i] == string_quote && (i == 0 || raw[i-1] != '\\')) {
|
|
in_string = false;
|
|
} else if (raw[i] == '\\') {
|
|
i++;
|
|
}
|
|
}
|
|
if (depth > 0) i++;
|
|
}
|
|
|
|
if (depth > 0) {
|
|
throw std::runtime_error("unterminated ${ in string interpolation");
|
|
}
|
|
|
|
// Parse the expression
|
|
std::string expr_str = raw.substr(expr_start, i - expr_start);
|
|
|
|
// Tokenize and parse the expression
|
|
Lexer lexer(expr_str);
|
|
auto expr_tokens = lexer.tokenize();
|
|
|
|
// Save current state
|
|
auto saved_tokens = tokens;
|
|
auto saved_pos = pos;
|
|
|
|
// Parse expression
|
|
tokens = expr_tokens;
|
|
pos = 0;
|
|
auto expr = parse_expr();
|
|
|
|
// Restore state
|
|
tokens = saved_tokens;
|
|
pos = saved_pos;
|
|
|
|
// Convert to string using toString builtin
|
|
auto to_string = std::make_shared<Node>(VarNode(0, "toString"));
|
|
auto str_expr = std::make_shared<Node>(AppNode(to_string, expr));
|
|
parts.push_back(str_expr);
|
|
|
|
i++; // Skip }
|
|
} else {
|
|
current_str += raw[i];
|
|
i++;
|
|
}
|
|
}
|
|
|
|
// Add remaining string part
|
|
if (!current_str.empty()) {
|
|
parts.push_back(std::make_shared<Node>(ConstStringNode(current_str)));
|
|
}
|
|
|
|
// Build concatenation tree
|
|
if (parts.empty()) {
|
|
return std::make_shared<Node>(ConstStringNode(""));
|
|
}
|
|
|
|
auto result = parts[0];
|
|
for (size_t j = 1; j < parts.size(); j++) {
|
|
// Use ADD (+) for string concatenation; CONCAT (++) is Nix list concatenation
|
|
result = std::make_shared<Node>(BinaryOpNode(BinaryOp::ADD, result, parts[j]));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
};
|
|
|
|
Parser::Parser() : pImpl(std::make_unique<Impl>()) {}
|
|
Parser::~Parser() = default;
|
|
|
|
std::shared_ptr<Node> Parser::parse(const std::string& source, const std::string& path) {
|
|
pImpl->current_file = path;
|
|
|
|
Lexer lexer(source);
|
|
pImpl->tokens = lexer.tokenize();
|
|
pImpl->pos = 0;
|
|
|
|
return pImpl->parse_expr();
|
|
}
|
|
|
|
std::shared_ptr<Node> Parser::parse_file(const std::string& path) {
|
|
std::string content = read_file(path);
|
|
return parse(content, path);
|
|
}
|
|
|
|
}
|