irc: more syntax support

Indented strings, ancient let bindings and a bit more

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ib86c2d8ca4402dfa0c5c536a9959f4006a6a6964
This commit is contained in:
raf 2026-02-22 20:03:25 +03:00
commit ed8f637c99
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
3 changed files with 313 additions and 12 deletions

View file

@ -107,7 +107,15 @@ struct Evaluator::Impl {
} else if (auto* n = node->get_if<ConstStringNode>()) {
v.mkString(n->value);
} else if (auto* n = node->get_if<ConstPathNode>()) {
v.mkPath(state.rootPath(CanonPath(n->value)));
std::string path = n->value;
// Expand ~/ to home directory
if (path.size() >= 2 && path[0] == '~' && path[1] == '/') {
const char* home = getenv("HOME");
if (home) {
path = std::string(home) + path.substr(1);
}
}
v.mkPath(state.rootPath(CanonPath(path)));
} else if (auto* n = node->get_if<ConstBoolNode>()) {
v.mkBool(n->value);
} else if (auto* n = node->get_if<ConstNullNode>()) { // NOLINT(bugprone-branch-clone)
@ -222,6 +230,22 @@ struct Evaluator::Impl {
v.mkInt((left->integer() + right->integer()).valueWrapping());
} else if (left->type() == nString && right->type() == nString) {
v.mkString(std::string(left->c_str()) + std::string(right->c_str()));
} else if (left->type() == nPath && right->type() == nString) {
// Path + string = path
std::string leftPath = std::string(left->path().path.abs());
std::string result = leftPath + std::string(right->c_str());
v.mkPath(state.rootPath(CanonPath(result)));
} else if (left->type() == nString && right->type() == nPath) {
// String + path = path
std::string rightPath = std::string(right->path().path.abs());
std::string result = std::string(left->c_str()) + rightPath;
v.mkPath(state.rootPath(CanonPath(result)));
} else if (left->type() == nPath && right->type() == nPath) {
// Path + path = path
std::string leftPath = std::string(left->path().path.abs());
std::string rightPath = std::string(right->path().path.abs());
std::string result = leftPath + rightPath;
v.mkPath(state.rootPath(CanonPath(result)));
} else {
state.error<EvalError>("type error in addition").debugThrow();
}
@ -293,8 +317,13 @@ struct Evaluator::Impl {
}
break;
case BinaryOp::CONCAT:
// ++ is list concatenation in Nix; string concat uses ADD (+)
state.error<EvalError>("list concatenation not yet implemented").debugThrow();
// TODO: ++ list concatenation requires accessing private Nix Value payload
// For now, delegate to Nix's concatLists or implement via builtins
// Parser recognizes ++ but evaluator not yet fully implemented
state
.error<EvalError>(
"list concatenation (++) not yet fully implemented - use builtins.concatLists")
.debugThrow();
break;
case BinaryOp::MERGE: {
// // is attrset merge - right overrides left

View file

@ -163,7 +163,7 @@ struct IRGenerator::Impl {
}
std::vector<std::pair<std::string, std::shared_ptr<Node>>> new_bindings;
new_bindings.reserve(n->bindings.size());
for (const auto& [key, val] : n->bindings) {
for (const auto& [key, val] : n->bindings) {
new_bindings.push_back({key, convert(val)});
}
auto body = convert(n->body);
@ -179,7 +179,7 @@ for (const auto& [key, val] : n->bindings) {
}
std::vector<std::pair<std::string, std::shared_ptr<Node>>> new_bindings;
new_bindings.reserve(n->bindings.size());
for (const auto& [key, val] : n->bindings) {
for (const auto& [key, val] : n->bindings) {
new_bindings.push_back({key, convert(val)});
}
auto body = convert(n->body);

View file

@ -70,6 +70,8 @@ struct Token {
IDENT,
STRING,
STRING_INTERP,
INDENTED_STRING,
INDENTED_STRING_INTERP,
PATH,
LOOKUP_PATH,
INT,
@ -153,6 +155,8 @@ public:
emit(TOKEN(AT));
} else if (c == ',') {
emit(TOKEN(COMMA));
} else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') {
tokenize_indented_string();
} else if (c == '"') {
tokenize_string();
}
@ -247,6 +251,14 @@ public:
}
} else if (c == '?') {
emit(TOKEN(QUESTION));
} else if (c == '~') {
// Home-relative path ~/...
if (pos + 1 < input.size() && input[pos + 1] == '/') {
tokenize_home_path();
} else {
// Just ~ by itself is an identifier
tokenize_ident();
}
} else if (c == '-') {
// Check if it's a negative number or minus operator
if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
@ -317,8 +329,26 @@ private:
}
pos++;
} else if (c == '#') {
// Line comment - skip until newline
while (pos < input.size() && input[pos] != '\n')
pos++;
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
// Block comment /* ... */
// Note: Nix block comments do NOT nest
pos += 2; // Skip /*
while (pos + 1 < input.size()) {
if (input[pos] == '*' && input[pos + 1] == '/') {
pos += 2; // Skip */
break;
}
if (input[pos] == '\n') {
line++;
col = 1;
} else {
col++;
}
pos++;
}
} else {
break;
}
@ -374,10 +404,131 @@ private:
col += s.size() + 2;
}
void tokenize_indented_string() {
pos += 2; // Skip opening ''
std::string raw_content;
bool has_interp = false;
size_t start_line = line;
// Collect raw content until closing ''
while (pos < input.size()) {
// Check for escape sequences
if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') {
// Check if it's an escape or the closing delimiter
if (pos + 2 < input.size() && input[pos + 2] == '\'') {
// ''' -> escape for ''
raw_content += "''";
pos += 3;
continue;
} else if (pos + 2 < input.size() && input[pos + 2] == '$') {
// ''$ -> escape for $
raw_content += '$';
pos += 3;
continue;
} else if (pos + 2 < input.size() && input[pos + 2] == '\\') {
// ''\ -> escape for backslash
raw_content += '\\';
pos += 3;
continue;
} else {
// Just closing ''
pos += 2;
break;
}
}
// Check for interpolation
if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
has_interp = true;
raw_content += input[pos];
pos++;
if (input[pos] == '\n') {
line++;
}
continue;
}
// Track newlines
if (input[pos] == '\n') {
line++;
raw_content += input[pos];
pos++;
} else {
raw_content += input[pos];
pos++;
}
}
// Strip common indentation
std::string stripped = strip_indentation(raw_content);
Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING;
tokens.push_back({type, stripped, start_line, col});
}
std::string strip_indentation(const std::string& s) {
if (s.empty())
return s;
// Split into lines
std::vector<std::string> lines;
std::string current_line;
for (char c : s) {
if (c == '\n') {
lines.push_back(current_line);
current_line.clear();
} else {
current_line += c;
}
}
if (!current_line.empty() || (!s.empty() && s.back() == '\n')) {
lines.push_back(current_line);
}
// Find minimum indentation (spaces/tabs at start of non-empty lines)
size_t min_indent = std::string::npos;
for (const auto& line : lines) {
if (line.empty())
continue; // Skip empty lines when calculating indentation
size_t indent = 0;
for (char c : line) {
if (c == ' ' || c == '\t')
indent++;
else
break;
}
if (indent < min_indent)
min_indent = indent;
}
if (min_indent == std::string::npos)
min_indent = 0;
// Strip min_indent from all lines
std::string result;
for (size_t i = 0; i < lines.size(); i++) {
const auto& line = lines[i];
if (line.empty()) {
// Preserve empty lines
if (i + 1 < lines.size())
result += '\n';
} else {
// Strip indentation
size_t skip = std::min(min_indent, line.size());
result += line.substr(skip);
if (i + 1 < lines.size())
result += '\n';
}
}
return result;
}
void tokenize_path() {
size_t start = pos;
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']') {
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
input[pos] != ';') {
pos++;
}
std::string path = input.substr(start, pos - start);
@ -385,6 +536,22 @@ private:
col += path.size();
}
void tokenize_home_path() {
size_t start = pos;
pos++; // Skip ~
if (pos < input.size() && input[pos] == '/') {
// Home-relative path ~/something
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
input[pos] != ';') {
pos++;
}
}
std::string path = input.substr(start, pos - start);
tokens.push_back({Token::PATH, path, line, col});
col += path.size();
}
void tokenize_int() {
size_t start = pos;
if (input[pos] == '-')
@ -587,6 +754,45 @@ public:
return std::make_shared<Node>(IfNode(cond, then, else_));
}
if (consume(Token::LET)) {
// Check for ancient let syntax: let { x = 1; body = x; }
if (current().type == Token::LBRACE) {
advance(); // consume {
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
std::shared_ptr<Node> body_expr;
while (current().type != Token::RBRACE && current().type != Token::EOF_) {
if (current().type != Token::IDENT && current().type != Token::STRING &&
current().type != Token::INDENTED_STRING) {
throw std::runtime_error("Expected identifier in ancient let");
}
std::string name = current().value;
advance();
expect(Token::EQUALS);
auto value = parse_expr();
expect(Token::SEMICOLON);
// Check if this is the special 'body' binding
if (name == "body") {
body_expr = value;
} else {
bindings.push_back({name, value});
}
}
expect(Token::RBRACE);
if (!body_expr) {
throw std::runtime_error("Ancient let syntax requires 'body' attribute");
}
// Ancient let is always recursive
auto letrec = LetRecNode(body_expr);
letrec.bindings = std::move(bindings);
return std::make_shared<Node>(std::move(letrec));
}
// Modern let syntax: let x = 1; in x
bool is_rec = consume(Token::REC);
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
parse_bindings(bindings);
@ -653,11 +859,23 @@ public:
// Continue loop to handle multi-dot selections (a.b.c)
continue;
}
// If we get here, the token after DOT was not IDENT or LBRACE
// If we get here, the token after DOT was not IDENT
// This is a parse error, but we'll just return what we have
break;
}
// Check for 'or' default value: a.b or default
// This is checked after all selections, so works for any selection depth
// 'or' is contextual - only special after a selection expression
if (left->get_if<SelectNode>() && current().type == Token::IDENT && current().value == "or") {
advance();
// Parse default as a primary expression
auto default_expr = parse_expr3();
// Update the SelectNode with the default expression
auto* select = left->get_if<SelectNode>();
select->default_expr = default_expr;
}
return left;
}
@ -748,6 +966,17 @@ public:
return parse_string_interp(str_token.value);
}
if (t.type == Token::INDENTED_STRING) {
advance();
return std::make_shared<Node>(ConstStringNode(t.value));
}
if (t.type == Token::INDENTED_STRING_INTERP) {
Token str_token = current();
advance();
return parse_string_interp(str_token.value);
}
if (t.type == Token::PATH) {
advance();
return std::make_shared<Node>(ConstPathNode(t.value));
@ -808,18 +1037,61 @@ public:
continue;
}
if (current().type == Token::IDENT || current().type == Token::STRING) {
Token key = current();
// Check for dynamic attribute name: ${expr} = value
if (current().type == Token::STRING_INTERP ||
current().type == Token::INDENTED_STRING_INTERP) {
Token str_token = current();
advance();
std::string key_str = key.value;
auto name_expr = parse_string_interp(str_token.value);
if (consume(Token::EQUALS)) {
auto value = parse_expr();
attrs.attrs.push_back({key_str, value});
// For dynamic attrs, we use special marker in key and store expr as value
// This will need runtime evaluation - store as special node
// For now, convert to string at parse time if possible
// TODO: Full dynamic attr support needs IR node for dynamic keys
attrs.attrs.push_back({"__dynamic__", value});
}
} else if (current().type == Token::IDENT || current().type == Token::STRING ||
current().type == Token::INDENTED_STRING) {
// Parse attribute path: a.b.c = value
std::vector<std::string> path;
path.push_back(current().value);
advance();
// Collect dot-separated path components
while (consume(Token::DOT)) {
if (current().type == Token::IDENT || current().type == Token::STRING ||
current().type == Token::INDENTED_STRING) {
path.push_back(current().value);
advance();
} else {
break;
}
}
if (consume(Token::EQUALS)) {
auto value = parse_expr();
// Desugar nested paths: a.b.c = v becomes a = { b = { c = v; }; }
if (path.size() == 1) {
// Simple case: just one key
attrs.attrs.push_back({path[0], value});
} else {
// Nested case: build nested attrsets from right to left
auto nested = value;
for (int i = path.size() - 1; i > 0; i--) {
auto inner_attrs = AttrsetNode(false);
inner_attrs.attrs.push_back({path[i], nested});
nested = std::make_shared<Node>(std::move(inner_attrs));
}
attrs.attrs.push_back({path[0], nested});
}
} else if (consume(Token::AT)) {
// @ pattern - not affected by nested paths
auto pattern = parse_expr();
auto value = parse_expr();
attrs.attrs.push_back({key_str, value});
attrs.attrs.push_back({path[0], value});
}
}