irc: more syntax support
Indented strings, ancient let bindings and a bit more Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ib86c2d8ca4402dfa0c5c536a9959f4006a6a6964
This commit is contained in:
parent
77aa67c7e0
commit
ed8f637c99
3 changed files with 313 additions and 12 deletions
|
|
@ -107,7 +107,15 @@ struct Evaluator::Impl {
|
|||
} else if (auto* n = node->get_if<ConstStringNode>()) {
|
||||
v.mkString(n->value);
|
||||
} else if (auto* n = node->get_if<ConstPathNode>()) {
|
||||
v.mkPath(state.rootPath(CanonPath(n->value)));
|
||||
std::string path = n->value;
|
||||
// Expand ~/ to home directory
|
||||
if (path.size() >= 2 && path[0] == '~' && path[1] == '/') {
|
||||
const char* home = getenv("HOME");
|
||||
if (home) {
|
||||
path = std::string(home) + path.substr(1);
|
||||
}
|
||||
}
|
||||
v.mkPath(state.rootPath(CanonPath(path)));
|
||||
} else if (auto* n = node->get_if<ConstBoolNode>()) {
|
||||
v.mkBool(n->value);
|
||||
} else if (auto* n = node->get_if<ConstNullNode>()) { // NOLINT(bugprone-branch-clone)
|
||||
|
|
@ -222,6 +230,22 @@ struct Evaluator::Impl {
|
|||
v.mkInt((left->integer() + right->integer()).valueWrapping());
|
||||
} else if (left->type() == nString && right->type() == nString) {
|
||||
v.mkString(std::string(left->c_str()) + std::string(right->c_str()));
|
||||
} else if (left->type() == nPath && right->type() == nString) {
|
||||
// Path + string = path
|
||||
std::string leftPath = std::string(left->path().path.abs());
|
||||
std::string result = leftPath + std::string(right->c_str());
|
||||
v.mkPath(state.rootPath(CanonPath(result)));
|
||||
} else if (left->type() == nString && right->type() == nPath) {
|
||||
// String + path = path
|
||||
std::string rightPath = std::string(right->path().path.abs());
|
||||
std::string result = std::string(left->c_str()) + rightPath;
|
||||
v.mkPath(state.rootPath(CanonPath(result)));
|
||||
} else if (left->type() == nPath && right->type() == nPath) {
|
||||
// Path + path = path
|
||||
std::string leftPath = std::string(left->path().path.abs());
|
||||
std::string rightPath = std::string(right->path().path.abs());
|
||||
std::string result = leftPath + rightPath;
|
||||
v.mkPath(state.rootPath(CanonPath(result)));
|
||||
} else {
|
||||
state.error<EvalError>("type error in addition").debugThrow();
|
||||
}
|
||||
|
|
@ -293,8 +317,13 @@ struct Evaluator::Impl {
|
|||
}
|
||||
break;
|
||||
case BinaryOp::CONCAT:
|
||||
// ++ is list concatenation in Nix; string concat uses ADD (+)
|
||||
state.error<EvalError>("list concatenation not yet implemented").debugThrow();
|
||||
// TODO: ++ list concatenation requires accessing private Nix Value payload
|
||||
// For now, delegate to Nix's concatLists or implement via builtins
|
||||
// Parser recognizes ++ but evaluator not yet fully implemented
|
||||
state
|
||||
.error<EvalError>(
|
||||
"list concatenation (++) not yet fully implemented - use builtins.concatLists")
|
||||
.debugThrow();
|
||||
break;
|
||||
case BinaryOp::MERGE: {
|
||||
// // is attrset merge - right overrides left
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ struct IRGenerator::Impl {
|
|||
}
|
||||
std::vector<std::pair<std::string, std::shared_ptr<Node>>> new_bindings;
|
||||
new_bindings.reserve(n->bindings.size());
|
||||
for (const auto& [key, val] : n->bindings) {
|
||||
for (const auto& [key, val] : n->bindings) {
|
||||
new_bindings.push_back({key, convert(val)});
|
||||
}
|
||||
auto body = convert(n->body);
|
||||
|
|
@ -179,7 +179,7 @@ for (const auto& [key, val] : n->bindings) {
|
|||
}
|
||||
std::vector<std::pair<std::string, std::shared_ptr<Node>>> new_bindings;
|
||||
new_bindings.reserve(n->bindings.size());
|
||||
for (const auto& [key, val] : n->bindings) {
|
||||
for (const auto& [key, val] : n->bindings) {
|
||||
new_bindings.push_back({key, convert(val)});
|
||||
}
|
||||
auto body = convert(n->body);
|
||||
|
|
|
|||
|
|
@ -70,6 +70,8 @@ struct Token {
|
|||
IDENT,
|
||||
STRING,
|
||||
STRING_INTERP,
|
||||
INDENTED_STRING,
|
||||
INDENTED_STRING_INTERP,
|
||||
PATH,
|
||||
LOOKUP_PATH,
|
||||
INT,
|
||||
|
|
@ -153,6 +155,8 @@ public:
|
|||
emit(TOKEN(AT));
|
||||
} else if (c == ',') {
|
||||
emit(TOKEN(COMMA));
|
||||
} else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') {
|
||||
tokenize_indented_string();
|
||||
} else if (c == '"') {
|
||||
tokenize_string();
|
||||
}
|
||||
|
|
@ -247,6 +251,14 @@ public:
|
|||
}
|
||||
} else if (c == '?') {
|
||||
emit(TOKEN(QUESTION));
|
||||
} else if (c == '~') {
|
||||
// Home-relative path ~/...
|
||||
if (pos + 1 < input.size() && input[pos + 1] == '/') {
|
||||
tokenize_home_path();
|
||||
} else {
|
||||
// Just ~ by itself is an identifier
|
||||
tokenize_ident();
|
||||
}
|
||||
} else if (c == '-') {
|
||||
// Check if it's a negative number or minus operator
|
||||
if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
|
||||
|
|
@ -317,8 +329,26 @@ private:
|
|||
}
|
||||
pos++;
|
||||
} else if (c == '#') {
|
||||
// Line comment - skip until newline
|
||||
while (pos < input.size() && input[pos] != '\n')
|
||||
pos++;
|
||||
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
|
||||
// Block comment /* ... */
|
||||
// Note: Nix block comments do NOT nest
|
||||
pos += 2; // Skip /*
|
||||
while (pos + 1 < input.size()) {
|
||||
if (input[pos] == '*' && input[pos + 1] == '/') {
|
||||
pos += 2; // Skip */
|
||||
break;
|
||||
}
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
col = 1;
|
||||
} else {
|
||||
col++;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -374,10 +404,131 @@ private:
|
|||
col += s.size() + 2;
|
||||
}
|
||||
|
||||
void tokenize_indented_string() {
|
||||
pos += 2; // Skip opening ''
|
||||
std::string raw_content;
|
||||
bool has_interp = false;
|
||||
size_t start_line = line;
|
||||
|
||||
// Collect raw content until closing ''
|
||||
while (pos < input.size()) {
|
||||
// Check for escape sequences
|
||||
if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') {
|
||||
// Check if it's an escape or the closing delimiter
|
||||
if (pos + 2 < input.size() && input[pos + 2] == '\'') {
|
||||
// ''' -> escape for ''
|
||||
raw_content += "''";
|
||||
pos += 3;
|
||||
continue;
|
||||
} else if (pos + 2 < input.size() && input[pos + 2] == '$') {
|
||||
// ''$ -> escape for $
|
||||
raw_content += '$';
|
||||
pos += 3;
|
||||
continue;
|
||||
} else if (pos + 2 < input.size() && input[pos + 2] == '\\') {
|
||||
// ''\ -> escape for backslash
|
||||
raw_content += '\\';
|
||||
pos += 3;
|
||||
continue;
|
||||
} else {
|
||||
// Just closing ''
|
||||
pos += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for interpolation
|
||||
if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
||||
has_interp = true;
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Track newlines
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
} else {
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip common indentation
|
||||
std::string stripped = strip_indentation(raw_content);
|
||||
|
||||
Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING;
|
||||
tokens.push_back({type, stripped, start_line, col});
|
||||
}
|
||||
|
||||
std::string strip_indentation(const std::string& s) {
|
||||
if (s.empty())
|
||||
return s;
|
||||
|
||||
// Split into lines
|
||||
std::vector<std::string> lines;
|
||||
std::string current_line;
|
||||
for (char c : s) {
|
||||
if (c == '\n') {
|
||||
lines.push_back(current_line);
|
||||
current_line.clear();
|
||||
} else {
|
||||
current_line += c;
|
||||
}
|
||||
}
|
||||
if (!current_line.empty() || (!s.empty() && s.back() == '\n')) {
|
||||
lines.push_back(current_line);
|
||||
}
|
||||
|
||||
// Find minimum indentation (spaces/tabs at start of non-empty lines)
|
||||
size_t min_indent = std::string::npos;
|
||||
for (const auto& line : lines) {
|
||||
if (line.empty())
|
||||
continue; // Skip empty lines when calculating indentation
|
||||
size_t indent = 0;
|
||||
for (char c : line) {
|
||||
if (c == ' ' || c == '\t')
|
||||
indent++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (indent < min_indent)
|
||||
min_indent = indent;
|
||||
}
|
||||
|
||||
if (min_indent == std::string::npos)
|
||||
min_indent = 0;
|
||||
|
||||
// Strip min_indent from all lines
|
||||
std::string result;
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
const auto& line = lines[i];
|
||||
if (line.empty()) {
|
||||
// Preserve empty lines
|
||||
if (i + 1 < lines.size())
|
||||
result += '\n';
|
||||
} else {
|
||||
// Strip indentation
|
||||
size_t skip = std::min(min_indent, line.size());
|
||||
result += line.substr(skip);
|
||||
if (i + 1 < lines.size())
|
||||
result += '\n';
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void tokenize_path() {
|
||||
size_t start = pos;
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']') {
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
std::string path = input.substr(start, pos - start);
|
||||
|
|
@ -385,6 +536,22 @@ private:
|
|||
col += path.size();
|
||||
}
|
||||
|
||||
void tokenize_home_path() {
|
||||
size_t start = pos;
|
||||
pos++; // Skip ~
|
||||
if (pos < input.size() && input[pos] == '/') {
|
||||
// Home-relative path ~/something
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
std::string path = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::PATH, path, line, col});
|
||||
col += path.size();
|
||||
}
|
||||
|
||||
void tokenize_int() {
|
||||
size_t start = pos;
|
||||
if (input[pos] == '-')
|
||||
|
|
@ -587,6 +754,45 @@ public:
|
|||
return std::make_shared<Node>(IfNode(cond, then, else_));
|
||||
}
|
||||
if (consume(Token::LET)) {
|
||||
// Check for ancient let syntax: let { x = 1; body = x; }
|
||||
if (current().type == Token::LBRACE) {
|
||||
advance(); // consume {
|
||||
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
|
||||
std::shared_ptr<Node> body_expr;
|
||||
|
||||
while (current().type != Token::RBRACE && current().type != Token::EOF_) {
|
||||
if (current().type != Token::IDENT && current().type != Token::STRING &&
|
||||
current().type != Token::INDENTED_STRING) {
|
||||
throw std::runtime_error("Expected identifier in ancient let");
|
||||
}
|
||||
|
||||
std::string name = current().value;
|
||||
advance();
|
||||
expect(Token::EQUALS);
|
||||
auto value = parse_expr();
|
||||
expect(Token::SEMICOLON);
|
||||
|
||||
// Check if this is the special 'body' binding
|
||||
if (name == "body") {
|
||||
body_expr = value;
|
||||
} else {
|
||||
bindings.push_back({name, value});
|
||||
}
|
||||
}
|
||||
|
||||
expect(Token::RBRACE);
|
||||
|
||||
if (!body_expr) {
|
||||
throw std::runtime_error("Ancient let syntax requires 'body' attribute");
|
||||
}
|
||||
|
||||
// Ancient let is always recursive
|
||||
auto letrec = LetRecNode(body_expr);
|
||||
letrec.bindings = std::move(bindings);
|
||||
return std::make_shared<Node>(std::move(letrec));
|
||||
}
|
||||
|
||||
// Modern let syntax: let x = 1; in x
|
||||
bool is_rec = consume(Token::REC);
|
||||
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
|
||||
parse_bindings(bindings);
|
||||
|
|
@ -653,11 +859,23 @@ public:
|
|||
// Continue loop to handle multi-dot selections (a.b.c)
|
||||
continue;
|
||||
}
|
||||
// If we get here, the token after DOT was not IDENT or LBRACE
|
||||
// If we get here, the token after DOT was not IDENT
|
||||
// This is a parse error, but we'll just return what we have
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for 'or' default value: a.b or default
|
||||
// This is checked after all selections, so works for any selection depth
|
||||
// 'or' is contextual - only special after a selection expression
|
||||
if (left->get_if<SelectNode>() && current().type == Token::IDENT && current().value == "or") {
|
||||
advance();
|
||||
// Parse default as a primary expression
|
||||
auto default_expr = parse_expr3();
|
||||
// Update the SelectNode with the default expression
|
||||
auto* select = left->get_if<SelectNode>();
|
||||
select->default_expr = default_expr;
|
||||
}
|
||||
|
||||
return left;
|
||||
}
|
||||
|
||||
|
|
@ -748,6 +966,17 @@ public:
|
|||
return parse_string_interp(str_token.value);
|
||||
}
|
||||
|
||||
if (t.type == Token::INDENTED_STRING) {
|
||||
advance();
|
||||
return std::make_shared<Node>(ConstStringNode(t.value));
|
||||
}
|
||||
|
||||
if (t.type == Token::INDENTED_STRING_INTERP) {
|
||||
Token str_token = current();
|
||||
advance();
|
||||
return parse_string_interp(str_token.value);
|
||||
}
|
||||
|
||||
if (t.type == Token::PATH) {
|
||||
advance();
|
||||
return std::make_shared<Node>(ConstPathNode(t.value));
|
||||
|
|
@ -808,18 +1037,61 @@ public:
|
|||
continue;
|
||||
}
|
||||
|
||||
if (current().type == Token::IDENT || current().type == Token::STRING) {
|
||||
Token key = current();
|
||||
// Check for dynamic attribute name: ${expr} = value
|
||||
if (current().type == Token::STRING_INTERP ||
|
||||
current().type == Token::INDENTED_STRING_INTERP) {
|
||||
Token str_token = current();
|
||||
advance();
|
||||
std::string key_str = key.value;
|
||||
auto name_expr = parse_string_interp(str_token.value);
|
||||
|
||||
if (consume(Token::EQUALS)) {
|
||||
auto value = parse_expr();
|
||||
attrs.attrs.push_back({key_str, value});
|
||||
// For dynamic attrs, we use special marker in key and store expr as value
|
||||
// This will need runtime evaluation - store as special node
|
||||
// For now, convert to string at parse time if possible
|
||||
// TODO: Full dynamic attr support needs IR node for dynamic keys
|
||||
attrs.attrs.push_back({"__dynamic__", value});
|
||||
}
|
||||
} else if (current().type == Token::IDENT || current().type == Token::STRING ||
|
||||
current().type == Token::INDENTED_STRING) {
|
||||
// Parse attribute path: a.b.c = value
|
||||
std::vector<std::string> path;
|
||||
path.push_back(current().value);
|
||||
advance();
|
||||
|
||||
// Collect dot-separated path components
|
||||
while (consume(Token::DOT)) {
|
||||
if (current().type == Token::IDENT || current().type == Token::STRING ||
|
||||
current().type == Token::INDENTED_STRING) {
|
||||
path.push_back(current().value);
|
||||
advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (consume(Token::EQUALS)) {
|
||||
auto value = parse_expr();
|
||||
|
||||
// Desugar nested paths: a.b.c = v becomes a = { b = { c = v; }; }
|
||||
if (path.size() == 1) {
|
||||
// Simple case: just one key
|
||||
attrs.attrs.push_back({path[0], value});
|
||||
} else {
|
||||
// Nested case: build nested attrsets from right to left
|
||||
auto nested = value;
|
||||
for (int i = path.size() - 1; i > 0; i--) {
|
||||
auto inner_attrs = AttrsetNode(false);
|
||||
inner_attrs.attrs.push_back({path[i], nested});
|
||||
nested = std::make_shared<Node>(std::move(inner_attrs));
|
||||
}
|
||||
attrs.attrs.push_back({path[0], nested});
|
||||
}
|
||||
} else if (consume(Token::AT)) {
|
||||
// @ pattern - not affected by nested paths
|
||||
auto pattern = parse_expr();
|
||||
auto value = parse_expr();
|
||||
attrs.attrs.push_back({key_str, value});
|
||||
attrs.attrs.push_back({path[0], value});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue