irc: split parser into lexer and parser components
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I4e73459a02caff5335d690656fd6f1396a6a6964
This commit is contained in:
parent
feb247f64a
commit
0a5920adaf
3 changed files with 666 additions and 631 deletions
562
src/irc/lexer.cpp
Normal file
562
src/irc/lexer.cpp
Normal file
|
|
@ -0,0 +1,562 @@
|
|||
#include "lexer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace nix_irc {
|
||||
|
||||
Lexer::Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
|
||||
|
||||
std::vector<Token> Lexer::tokenize() {
|
||||
#define TOKEN(t) \
|
||||
Token { \
|
||||
Token::t, "", line, col \
|
||||
}
|
||||
|
||||
while (pos < input.size()) {
|
||||
skip_whitespace();
|
||||
if (pos >= input.size())
|
||||
break;
|
||||
|
||||
char c = input[pos];
|
||||
|
||||
if (c == '(') {
|
||||
emit(TOKEN(LPAREN));
|
||||
} else if (c == ')') {
|
||||
emit(TOKEN(RPAREN));
|
||||
} else if (c == '{') {
|
||||
emit(TOKEN(LBRACE));
|
||||
} else if (c == '}') {
|
||||
emit(TOKEN(RBRACE));
|
||||
} else if (c == '[') {
|
||||
emit(TOKEN(LBRACKET));
|
||||
} else if (c == ']') {
|
||||
emit(TOKEN(RBRACKET));
|
||||
} else if (c == ';') {
|
||||
emit(TOKEN(SEMICOLON));
|
||||
} else if (c == ':') {
|
||||
emit(TOKEN(COLON));
|
||||
} else if (c == '@') {
|
||||
emit(TOKEN(AT));
|
||||
} else if (c == ',') {
|
||||
emit(TOKEN(COMMA));
|
||||
} else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') {
|
||||
tokenize_indented_string();
|
||||
} else if (c == '"') {
|
||||
tokenize_string();
|
||||
}
|
||||
// Two-char operators
|
||||
else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(EQEQ));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '=') {
|
||||
emit(TOKEN(EQUALS));
|
||||
} else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(NE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(LE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(GE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') {
|
||||
tokens.push_back(TOKEN(CONCAT));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '/') {
|
||||
tokens.push_back(TOKEN(MERGE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') {
|
||||
tokens.push_back(TOKEN(AND));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') {
|
||||
tokens.push_back(TOKEN(OR));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') {
|
||||
tokens.push_back(TOKEN(IMPL));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
}
|
||||
// Single-char operators
|
||||
else if (c == '+') {
|
||||
emit(TOKEN(PLUS));
|
||||
} else if (c == '*') {
|
||||
emit(TOKEN(STAR));
|
||||
} else if (c == '/') {
|
||||
// Check if it's a path or division
|
||||
if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) {
|
||||
tokenize_path();
|
||||
} else {
|
||||
emit(TOKEN(SLASH));
|
||||
}
|
||||
} else if (c == '<') {
|
||||
// Check for lookup path <nixpkgs> vs comparison operator
|
||||
size_t end = pos + 1;
|
||||
bool is_lookup_path = false;
|
||||
|
||||
// Scan for valid lookup path characters until >
|
||||
while (end < input.size() && (isalnum(input[end]) || input[end] == '-' || input[end] == '_' ||
|
||||
input[end] == '/' || input[end] == '.')) {
|
||||
end++;
|
||||
}
|
||||
|
||||
// If we found > and there's content, it's a lookup path
|
||||
if (end < input.size() && input[end] == '>' && end > pos + 1) {
|
||||
std::string path = input.substr(pos + 1, end - pos - 1);
|
||||
tokens.push_back({Token::LOOKUP_PATH, path, line, col});
|
||||
pos = end + 1;
|
||||
col += (end - pos + 1);
|
||||
is_lookup_path = true;
|
||||
}
|
||||
|
||||
if (!is_lookup_path) {
|
||||
emit(TOKEN(LT));
|
||||
}
|
||||
} else if (c == '>') {
|
||||
emit(TOKEN(GT));
|
||||
} else if (c == '!') {
|
||||
emit(TOKEN(NOT));
|
||||
} else if (c == '.') {
|
||||
// Check for ellipsis (...)
|
||||
if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
|
||||
tokens.push_back(TOKEN(ELLIPSIS));
|
||||
pos += 3;
|
||||
col += 3;
|
||||
} else {
|
||||
emit(TOKEN(DOT));
|
||||
}
|
||||
} else if (c == '?') {
|
||||
emit(TOKEN(QUESTION));
|
||||
} else if (c == '~') {
|
||||
// Home-relative path ~/...
|
||||
if (pos + 1 < input.size() && input[pos + 1] == '/') {
|
||||
tokenize_home_path();
|
||||
} else {
|
||||
// Just ~ by itself is an identifier
|
||||
tokenize_ident();
|
||||
}
|
||||
} else if (c == '-') {
|
||||
// Check if it's a negative number or minus operator
|
||||
if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
|
||||
// Check for negative float
|
||||
if (pos + 2 < input.size() && input[pos + 2] == '.') {
|
||||
tokenize_float();
|
||||
} else {
|
||||
tokenize_int();
|
||||
}
|
||||
} else {
|
||||
emit(TOKEN(MINUS));
|
||||
}
|
||||
} else if (isdigit(c)) {
|
||||
// Check if it's a float (digit followed by '.')
|
||||
if (pos + 1 < input.size() && input[pos + 1] == '.') {
|
||||
tokenize_float();
|
||||
} else {
|
||||
tokenize_int();
|
||||
}
|
||||
} else if (isalpha(c)) {
|
||||
// Check if it's a URI (contains ://) - look ahead
|
||||
size_t lookahead = pos;
|
||||
while (lookahead < input.size() &&
|
||||
(isalnum(input[lookahead]) || input[lookahead] == '_' || input[lookahead] == '-' ||
|
||||
input[lookahead] == '+' || input[lookahead] == '.'))
|
||||
lookahead++;
|
||||
std::string potential_scheme = input.substr(pos, lookahead - pos);
|
||||
if (lookahead + 2 < input.size() && input[lookahead] == ':' && input[lookahead + 1] == '/' &&
|
||||
input[lookahead + 2] == '/') {
|
||||
// It's a URI, consume the whole thing
|
||||
tokenize_uri();
|
||||
} else {
|
||||
tokenize_ident();
|
||||
}
|
||||
} else {
|
||||
pos++;
|
||||
col++;
|
||||
}
|
||||
}
|
||||
tokens.push_back({Token::EOF_, "", line, col});
|
||||
|
||||
#undef TOKEN
|
||||
return tokens;
|
||||
}
|
||||
|
||||
void Lexer::emit(const Token& t) {
|
||||
tokens.push_back(t);
|
||||
pos++;
|
||||
col++;
|
||||
}
|
||||
|
||||
void Lexer::skip_whitespace() {
|
||||
while (pos < input.size()) {
|
||||
char c = input[pos];
|
||||
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
|
||||
if (c == '\n') {
|
||||
line++;
|
||||
col = 1;
|
||||
} else {
|
||||
col++;
|
||||
}
|
||||
pos++;
|
||||
} else if (c == '#') {
|
||||
// Line comment - skip until newline
|
||||
while (pos < input.size() && input[pos] != '\n')
|
||||
pos++;
|
||||
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
|
||||
// Block comment /* ... */
|
||||
// Note: Nix block comments do NOT nest
|
||||
pos += 2; // Skip /*
|
||||
while (pos + 1 < input.size()) {
|
||||
if (input[pos] == '*' && input[pos + 1] == '/') {
|
||||
pos += 2; // Skip */
|
||||
break;
|
||||
}
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
col = 1;
|
||||
} else {
|
||||
col++;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::tokenize_string() {
|
||||
pos++;
|
||||
std::string s;
|
||||
bool has_interp = false;
|
||||
|
||||
while (pos < input.size() && input[pos] != '"') {
|
||||
if (input[pos] == '\\' && pos + 1 < input.size()) {
|
||||
pos++;
|
||||
switch (input[pos]) {
|
||||
case 'n':
|
||||
s += '\n';
|
||||
break;
|
||||
case 't':
|
||||
s += '\t';
|
||||
break;
|
||||
case 'r':
|
||||
s += '\r';
|
||||
break;
|
||||
case '"':
|
||||
s += '"';
|
||||
break;
|
||||
case '\\':
|
||||
s += '\\';
|
||||
break;
|
||||
case '$':
|
||||
s += '$';
|
||||
break; // Escaped $
|
||||
default:
|
||||
s += input[pos];
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
} else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
||||
// Found interpolation marker
|
||||
has_interp = true;
|
||||
s += input[pos]; // Keep $ in raw string
|
||||
pos++;
|
||||
} else {
|
||||
s += input[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
|
||||
Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING;
|
||||
tokens.push_back({type, s, line, col});
|
||||
col += s.size() + 2;
|
||||
}
|
||||
|
||||
void Lexer::tokenize_indented_string() {
|
||||
pos += 2; // Skip opening ''
|
||||
std::string raw_content;
|
||||
bool has_interp = false;
|
||||
size_t start_line = line;
|
||||
|
||||
// Collect raw content until closing ''
|
||||
while (pos < input.size()) {
|
||||
// Check for escape sequences
|
||||
if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') {
|
||||
// Check if it's an escape or the closing delimiter
|
||||
if (pos + 2 < input.size() && input[pos + 2] == '\'') {
|
||||
// ''' -> escape for ''
|
||||
raw_content += "''";
|
||||
pos += 3;
|
||||
continue;
|
||||
} else if (pos + 2 < input.size() && input[pos + 2] == '$') {
|
||||
// ''$ -> escape for $
|
||||
raw_content += '$';
|
||||
pos += 3;
|
||||
continue;
|
||||
} else if (pos + 2 < input.size() && input[pos + 2] == '\\') {
|
||||
// ''\ -> check what follows
|
||||
if (pos + 3 < input.size()) {
|
||||
char next = input[pos + 3];
|
||||
if (next == 'n') {
|
||||
raw_content += '\n';
|
||||
pos += 4;
|
||||
continue;
|
||||
} else if (next == 'r') {
|
||||
raw_content += '\r';
|
||||
pos += 4;
|
||||
continue;
|
||||
} else if (next == 't') {
|
||||
raw_content += '\t';
|
||||
pos += 4;
|
||||
continue;
|
||||
} else if (next == ' ' || next == '\t') {
|
||||
// ''\ before whitespace - preserve the whitespace by prepending a marker
|
||||
// We use a special escape sequence that won't appear in normal text
|
||||
raw_content += "\x1F\x1F"; // Unit separator pair as marker for preserved whitespace
|
||||
raw_content += next;
|
||||
pos += 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Default: literal backslash
|
||||
raw_content += '\\';
|
||||
pos += 3;
|
||||
continue;
|
||||
} else {
|
||||
// Just closing ''
|
||||
pos += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for interpolation
|
||||
if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
||||
has_interp = true;
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Track newlines
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
} else {
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip common indentation
|
||||
std::string stripped = strip_indentation(raw_content);
|
||||
|
||||
Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING;
|
||||
tokens.push_back({type, stripped, start_line, col});
|
||||
}
|
||||
|
||||
std::string Lexer::strip_indentation(const std::string& s) {
|
||||
if (s.empty())
|
||||
return s;
|
||||
|
||||
// Split into lines
|
||||
std::vector<std::string> lines;
|
||||
std::string current_line;
|
||||
for (char c : s) {
|
||||
if (c == '\n') {
|
||||
lines.push_back(current_line);
|
||||
current_line.clear();
|
||||
} else {
|
||||
current_line += c;
|
||||
}
|
||||
}
|
||||
if (!current_line.empty() || (!s.empty() && s.back() == '\n')) {
|
||||
lines.push_back(current_line);
|
||||
}
|
||||
|
||||
// Find minimum indentation (spaces/tabs at start of non-empty lines)
|
||||
// \x1F\x1F marker indicates preserved whitespace (from ''\ escape)
|
||||
size_t min_indent = std::string::npos;
|
||||
for (const auto& line : lines) {
|
||||
if (line.empty())
|
||||
continue; // Skip empty lines when calculating indentation
|
||||
size_t indent = 0;
|
||||
for (size_t i = 0; i < line.size(); i++) {
|
||||
char c = line[i];
|
||||
// If we hit the preserved whitespace marker, stop counting indentation
|
||||
if (c == '\x1F' && i + 1 < line.size() && line[i + 1] == '\x1F') {
|
||||
break;
|
||||
}
|
||||
if (c == ' ' || c == '\t')
|
||||
indent++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (indent < min_indent)
|
||||
min_indent = indent;
|
||||
}
|
||||
|
||||
if (min_indent == std::string::npos)
|
||||
min_indent = 0;
|
||||
|
||||
// Strip min_indent from all lines and remove \x1F\x1F markers
|
||||
std::string result;
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
const auto& line = lines[i];
|
||||
if (line.empty()) {
|
||||
// Preserve empty lines
|
||||
if (i + 1 < lines.size())
|
||||
result += '\n';
|
||||
} else {
|
||||
// Strip indentation, being careful about \x1F\x1F markers
|
||||
size_t skip = 0;
|
||||
size_t pos = 0;
|
||||
while (skip < min_indent && pos < line.size()) {
|
||||
if (line[pos] == '\x1F' && pos + 1 < line.size() && line[pos + 1] == '\x1F') {
|
||||
// Hit preserved whitespace marker - don't strip any more
|
||||
break;
|
||||
}
|
||||
skip++;
|
||||
pos++;
|
||||
}
|
||||
|
||||
// Add the rest of the line, removing \x1F\x1F markers
|
||||
for (size_t j = pos; j < line.size(); j++) {
|
||||
if (line[j] == '\x1F' && j + 1 < line.size() && line[j + 1] == '\x1F') {
|
||||
j++; // Skip both marker bytes
|
||||
continue;
|
||||
}
|
||||
result += line[j];
|
||||
}
|
||||
|
||||
if (i + 1 < lines.size())
|
||||
result += '\n';
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void Lexer::tokenize_path() {
|
||||
size_t start = pos;
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
std::string path = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::PATH, path, line, col});
|
||||
col += path.size();
|
||||
}
|
||||
|
||||
void Lexer::tokenize_home_path() {
|
||||
size_t start = pos;
|
||||
pos++; // Skip ~
|
||||
if (pos < input.size() && input[pos] == '/') {
|
||||
// Home-relative path ~/something
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
std::string path = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::PATH, path, line, col});
|
||||
col += path.size();
|
||||
}
|
||||
|
||||
void Lexer::tokenize_int() {
|
||||
size_t start = pos;
|
||||
if (input[pos] == '-')
|
||||
pos++;
|
||||
while (pos < input.size() && isdigit(input[pos]))
|
||||
pos++;
|
||||
std::string num = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::INT, num, line, col});
|
||||
col += num.size();
|
||||
}
|
||||
|
||||
void Lexer::tokenize_float() {
|
||||
size_t start = pos;
|
||||
if (input[pos] == '-')
|
||||
pos++;
|
||||
while (pos < input.size() && isdigit(input[pos]))
|
||||
pos++;
|
||||
if (pos < input.size() && input[pos] == '.') {
|
||||
pos++;
|
||||
while (pos < input.size() && isdigit(input[pos]))
|
||||
pos++;
|
||||
}
|
||||
std::string num = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::FLOAT, num, line, col});
|
||||
col += num.size();
|
||||
}
|
||||
|
||||
void Lexer::tokenize_uri() {
|
||||
size_t start = pos;
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != ')' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
std::string uri = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::URI, uri, line, col});
|
||||
col += uri.size();
|
||||
}
|
||||
|
||||
void Lexer::tokenize_ident() {
|
||||
size_t start = pos;
|
||||
// Note: Don't include '.' here - it's used for selection (a.b.c)
|
||||
// URIs are handled separately by checking for '://' pattern
|
||||
while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-'))
|
||||
pos++;
|
||||
std::string ident = input.substr(start, pos - start);
|
||||
|
||||
// Check if it's a URI (contains ://)
|
||||
size_t scheme_end = ident.find("://");
|
||||
if (scheme_end != std::string::npos && scheme_end > 0) {
|
||||
tokens.push_back({Token::URI, ident, line, col});
|
||||
col += ident.size();
|
||||
return;
|
||||
}
|
||||
|
||||
Token::Type type = Token::IDENT;
|
||||
if (ident == "let")
|
||||
type = Token::LET;
|
||||
else if (ident == "in")
|
||||
type = Token::IN;
|
||||
else if (ident == "rec")
|
||||
type = Token::REC;
|
||||
else if (ident == "if")
|
||||
type = Token::IF;
|
||||
else if (ident == "then")
|
||||
type = Token::THEN;
|
||||
else if (ident == "else")
|
||||
type = Token::ELSE;
|
||||
else if (ident == "assert")
|
||||
type = Token::ASSERT;
|
||||
else if (ident == "with")
|
||||
type = Token::WITH;
|
||||
else if (ident == "inherit")
|
||||
type = Token::INHERIT;
|
||||
else if (ident == "import")
|
||||
type = Token::IMPORT;
|
||||
else if (ident == "true")
|
||||
type = Token::BOOL;
|
||||
else if (ident == "false")
|
||||
type = Token::BOOL;
|
||||
|
||||
tokens.push_back({type, ident, line, col});
|
||||
col += ident.size();
|
||||
}
|
||||
|
||||
} // namespace nix_irc
|
||||
94
src/irc/lexer.h
Normal file
94
src/irc/lexer.h
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace nix_irc {
|
||||
|
||||
struct Token {
|
||||
enum Type {
|
||||
LPAREN,
|
||||
RPAREN,
|
||||
LBRACE,
|
||||
RBRACE,
|
||||
LBRACKET,
|
||||
RBRACKET,
|
||||
IDENT,
|
||||
STRING,
|
||||
STRING_INTERP,
|
||||
INDENTED_STRING,
|
||||
INDENTED_STRING_INTERP,
|
||||
PATH,
|
||||
LOOKUP_PATH,
|
||||
INT,
|
||||
FLOAT,
|
||||
URI,
|
||||
BOOL,
|
||||
LET,
|
||||
IN,
|
||||
REC,
|
||||
IF,
|
||||
THEN,
|
||||
ELSE,
|
||||
ASSERT,
|
||||
WITH,
|
||||
INHERIT,
|
||||
IMPORT,
|
||||
DOT,
|
||||
SEMICOLON,
|
||||
COLON,
|
||||
EQUALS,
|
||||
AT,
|
||||
COMMA,
|
||||
QUESTION,
|
||||
ELLIPSIS,
|
||||
// Operators
|
||||
PLUS,
|
||||
MINUS,
|
||||
STAR,
|
||||
SLASH,
|
||||
CONCAT,
|
||||
MERGE,
|
||||
EQEQ,
|
||||
NE,
|
||||
LT,
|
||||
GT,
|
||||
LE,
|
||||
GE,
|
||||
AND,
|
||||
OR,
|
||||
IMPL,
|
||||
NOT,
|
||||
EOF_
|
||||
} type;
|
||||
std::string value;
|
||||
size_t line;
|
||||
size_t col;
|
||||
};
|
||||
|
||||
class Lexer {
|
||||
public:
|
||||
Lexer(const std::string& input);
|
||||
std::vector<Token> tokenize();
|
||||
|
||||
private:
|
||||
std::vector<Token> tokens;
|
||||
const std::string& input;
|
||||
size_t pos;
|
||||
size_t line;
|
||||
size_t col;
|
||||
|
||||
void emit(const Token& t);
|
||||
void skip_whitespace();
|
||||
void tokenize_string();
|
||||
void tokenize_indented_string();
|
||||
std::string strip_indentation(const std::string& s);
|
||||
void tokenize_path();
|
||||
void tokenize_home_path();
|
||||
void tokenize_int();
|
||||
void tokenize_float();
|
||||
void tokenize_uri();
|
||||
void tokenize_ident();
|
||||
};
|
||||
|
||||
} // namespace nix_irc
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
#include "parser.h"
|
||||
#include "lexer.h"
|
||||
#include <array>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
|
@ -59,628 +60,6 @@ static std::pair<std::string, std::string> run_command(const std::string& cmd) {
|
|||
return {result, ""};
|
||||
}
|
||||
|
||||
struct Token {
|
||||
enum Type {
|
||||
LPAREN,
|
||||
RPAREN,
|
||||
LBRACE,
|
||||
RBRACE,
|
||||
LBRACKET,
|
||||
RBRACKET,
|
||||
IDENT,
|
||||
STRING,
|
||||
STRING_INTERP,
|
||||
INDENTED_STRING,
|
||||
INDENTED_STRING_INTERP,
|
||||
PATH,
|
||||
LOOKUP_PATH,
|
||||
INT,
|
||||
FLOAT,
|
||||
URI,
|
||||
BOOL,
|
||||
LET,
|
||||
IN,
|
||||
REC,
|
||||
IF,
|
||||
THEN,
|
||||
ELSE,
|
||||
ASSERT,
|
||||
WITH,
|
||||
INHERIT,
|
||||
IMPORT,
|
||||
DOT,
|
||||
SEMICOLON,
|
||||
COLON,
|
||||
EQUALS,
|
||||
AT,
|
||||
COMMA,
|
||||
QUESTION,
|
||||
ELLIPSIS,
|
||||
// Operators
|
||||
PLUS,
|
||||
MINUS,
|
||||
STAR,
|
||||
SLASH,
|
||||
CONCAT,
|
||||
MERGE,
|
||||
EQEQ,
|
||||
NE,
|
||||
LT,
|
||||
GT,
|
||||
LE,
|
||||
GE,
|
||||
AND,
|
||||
OR,
|
||||
IMPL,
|
||||
NOT,
|
||||
EOF_
|
||||
} type;
|
||||
std::string value;
|
||||
size_t line;
|
||||
size_t col;
|
||||
};
|
||||
|
||||
class Lexer {
|
||||
public:
|
||||
Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
|
||||
|
||||
std::vector<Token> tokenize() {
|
||||
#define TOKEN(t) \
|
||||
Token { Token::t, "", line, col }
|
||||
|
||||
while (pos < input.size()) {
|
||||
skip_whitespace();
|
||||
if (pos >= input.size())
|
||||
break;
|
||||
|
||||
char c = input[pos];
|
||||
|
||||
if (c == '(') {
|
||||
emit(TOKEN(LPAREN));
|
||||
} else if (c == ')') {
|
||||
emit(TOKEN(RPAREN));
|
||||
} else if (c == '{') {
|
||||
emit(TOKEN(LBRACE));
|
||||
} else if (c == '}') {
|
||||
emit(TOKEN(RBRACE));
|
||||
} else if (c == '[') {
|
||||
emit(TOKEN(LBRACKET));
|
||||
} else if (c == ']') {
|
||||
emit(TOKEN(RBRACKET));
|
||||
} else if (c == ';') {
|
||||
emit(TOKEN(SEMICOLON));
|
||||
} else if (c == ':') {
|
||||
emit(TOKEN(COLON));
|
||||
} else if (c == '@') {
|
||||
emit(TOKEN(AT));
|
||||
} else if (c == ',') {
|
||||
emit(TOKEN(COMMA));
|
||||
} else if (c == '\'' && pos + 1 < input.size() && input[pos + 1] == '\'') {
|
||||
tokenize_indented_string();
|
||||
} else if (c == '"') {
|
||||
tokenize_string();
|
||||
}
|
||||
// Two-char operators
|
||||
else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(EQEQ));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '=') {
|
||||
emit(TOKEN(EQUALS));
|
||||
} else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(NE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(LE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') {
|
||||
tokens.push_back(TOKEN(GE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') {
|
||||
tokens.push_back(TOKEN(CONCAT));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '/') {
|
||||
tokens.push_back(TOKEN(MERGE));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') {
|
||||
tokens.push_back(TOKEN(AND));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') {
|
||||
tokens.push_back(TOKEN(OR));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
} else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') {
|
||||
tokens.push_back(TOKEN(IMPL));
|
||||
pos += 2;
|
||||
col += 2;
|
||||
}
|
||||
// Single-char operators
|
||||
else if (c == '+') {
|
||||
emit(TOKEN(PLUS));
|
||||
} else if (c == '*') {
|
||||
emit(TOKEN(STAR));
|
||||
} else if (c == '/') {
|
||||
// Check if it's a path or division
|
||||
if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) {
|
||||
tokenize_path();
|
||||
} else {
|
||||
emit(TOKEN(SLASH));
|
||||
}
|
||||
} else if (c == '<') {
|
||||
// Check for lookup path <nixpkgs> vs comparison operator
|
||||
size_t end = pos + 1;
|
||||
bool is_lookup_path = false;
|
||||
|
||||
// Scan for valid lookup path characters until >
|
||||
while (end < input.size() &&
|
||||
(isalnum(input[end]) || input[end] == '-' || input[end] == '_' ||
|
||||
input[end] == '/' || input[end] == '.')) {
|
||||
end++;
|
||||
}
|
||||
|
||||
// If we found > and there's content, it's a lookup path
|
||||
if (end < input.size() && input[end] == '>' && end > pos + 1) {
|
||||
std::string path = input.substr(pos + 1, end - pos - 1);
|
||||
tokens.push_back({Token::LOOKUP_PATH, path, line, col});
|
||||
pos = end + 1;
|
||||
col += (end - pos + 1);
|
||||
is_lookup_path = true;
|
||||
}
|
||||
|
||||
if (!is_lookup_path) {
|
||||
emit(TOKEN(LT));
|
||||
}
|
||||
} else if (c == '>') {
|
||||
emit(TOKEN(GT));
|
||||
} else if (c == '!') {
|
||||
emit(TOKEN(NOT));
|
||||
} else if (c == '.') {
|
||||
// Check for ellipsis (...)
|
||||
if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
|
||||
tokens.push_back(TOKEN(ELLIPSIS));
|
||||
pos += 3;
|
||||
col += 3;
|
||||
} else {
|
||||
emit(TOKEN(DOT));
|
||||
}
|
||||
} else if (c == '?') {
|
||||
emit(TOKEN(QUESTION));
|
||||
} else if (c == '~') {
|
||||
// Home-relative path ~/...
|
||||
if (pos + 1 < input.size() && input[pos + 1] == '/') {
|
||||
tokenize_home_path();
|
||||
} else {
|
||||
// Just ~ by itself is an identifier
|
||||
tokenize_ident();
|
||||
}
|
||||
} else if (c == '-') {
|
||||
// Check if it's a negative number or minus operator
|
||||
if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
|
||||
// Check for negative float
|
||||
if (pos + 2 < input.size() && input[pos + 2] == '.') {
|
||||
tokenize_float();
|
||||
} else {
|
||||
tokenize_int();
|
||||
}
|
||||
} else {
|
||||
emit(TOKEN(MINUS));
|
||||
}
|
||||
} else if (isdigit(c)) {
|
||||
// Check if it's a float (digit followed by '.')
|
||||
if (pos + 1 < input.size() && input[pos + 1] == '.') {
|
||||
tokenize_float();
|
||||
} else {
|
||||
tokenize_int();
|
||||
}
|
||||
} else if (isalpha(c)) {
|
||||
// Check if it's a URI (contains ://) - look ahead
|
||||
size_t lookahead = pos;
|
||||
while (lookahead < input.size() &&
|
||||
(isalnum(input[lookahead]) || input[lookahead] == '_' || input[lookahead] == '-' ||
|
||||
input[lookahead] == '+' || input[lookahead] == '.'))
|
||||
lookahead++;
|
||||
std::string potential_scheme = input.substr(pos, lookahead - pos);
|
||||
if (lookahead + 2 < input.size() && input[lookahead] == ':' &&
|
||||
input[lookahead + 1] == '/' && input[lookahead + 2] == '/') {
|
||||
// It's a URI, consume the whole thing
|
||||
tokenize_uri();
|
||||
} else {
|
||||
tokenize_ident();
|
||||
}
|
||||
} else {
|
||||
pos++;
|
||||
col++;
|
||||
}
|
||||
}
|
||||
tokens.push_back({Token::EOF_, "", line, col});
|
||||
|
||||
#undef TOKEN
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<Token> tokens;
|
||||
const std::string& input;
|
||||
size_t pos;
|
||||
size_t line;
|
||||
size_t col;
|
||||
|
||||
void emit(const Token& t) {
|
||||
tokens.push_back(t);
|
||||
pos++;
|
||||
col++;
|
||||
}
|
||||
|
||||
void skip_whitespace() {
|
||||
while (pos < input.size()) {
|
||||
char c = input[pos];
|
||||
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
|
||||
if (c == '\n') {
|
||||
line++;
|
||||
col = 1;
|
||||
} else {
|
||||
col++;
|
||||
}
|
||||
pos++;
|
||||
} else if (c == '#') {
|
||||
// Line comment - skip until newline
|
||||
while (pos < input.size() && input[pos] != '\n')
|
||||
pos++;
|
||||
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
|
||||
// Block comment /* ... */
|
||||
// Note: Nix block comments do NOT nest
|
||||
pos += 2; // Skip /*
|
||||
while (pos + 1 < input.size()) {
|
||||
if (input[pos] == '*' && input[pos + 1] == '/') {
|
||||
pos += 2; // Skip */
|
||||
break;
|
||||
}
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
col = 1;
|
||||
} else {
|
||||
col++;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void tokenize_string() {
|
||||
pos++;
|
||||
std::string s;
|
||||
bool has_interp = false;
|
||||
|
||||
while (pos < input.size() && input[pos] != '"') {
|
||||
if (input[pos] == '\\' && pos + 1 < input.size()) {
|
||||
pos++;
|
||||
switch (input[pos]) {
|
||||
case 'n':
|
||||
s += '\n';
|
||||
break;
|
||||
case 't':
|
||||
s += '\t';
|
||||
break;
|
||||
case 'r':
|
||||
s += '\r';
|
||||
break;
|
||||
case '"':
|
||||
s += '"';
|
||||
break;
|
||||
case '\\':
|
||||
s += '\\';
|
||||
break;
|
||||
case '$':
|
||||
s += '$';
|
||||
break; // Escaped $
|
||||
default:
|
||||
s += input[pos];
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
} else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
||||
// Found interpolation marker
|
||||
has_interp = true;
|
||||
s += input[pos]; // Keep $ in raw string
|
||||
pos++;
|
||||
} else {
|
||||
s += input[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
pos++;
|
||||
|
||||
Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING;
|
||||
tokens.push_back({type, s, line, col});
|
||||
col += s.size() + 2;
|
||||
}
|
||||
|
||||
void tokenize_indented_string() {
|
||||
pos += 2; // Skip opening ''
|
||||
std::string raw_content;
|
||||
bool has_interp = false;
|
||||
size_t start_line = line;
|
||||
|
||||
// Collect raw content until closing ''
|
||||
while (pos < input.size()) {
|
||||
// Check for escape sequences
|
||||
if (pos + 1 < input.size() && input[pos] == '\'' && input[pos + 1] == '\'') {
|
||||
// Check if it's an escape or the closing delimiter
|
||||
if (pos + 2 < input.size() && input[pos + 2] == '\'') {
|
||||
// ''' -> escape for ''
|
||||
raw_content += "''";
|
||||
pos += 3;
|
||||
continue;
|
||||
} else if (pos + 2 < input.size() && input[pos + 2] == '$') {
|
||||
// ''$ -> escape for $
|
||||
raw_content += '$';
|
||||
pos += 3;
|
||||
continue;
|
||||
} else if (pos + 2 < input.size() && input[pos + 2] == '\\') {
|
||||
// ''\ -> check what follows
|
||||
if (pos + 3 < input.size()) {
|
||||
char next = input[pos + 3];
|
||||
if (next == 'n') {
|
||||
raw_content += '\n';
|
||||
pos += 4;
|
||||
continue;
|
||||
} else if (next == 'r') {
|
||||
raw_content += '\r';
|
||||
pos += 4;
|
||||
continue;
|
||||
} else if (next == 't') {
|
||||
raw_content += '\t';
|
||||
pos += 4;
|
||||
continue;
|
||||
} else if (next == ' ' || next == '\t') {
|
||||
// ''\ before whitespace - preserve the whitespace (mark it specially)
|
||||
raw_content += "\x01"; // Use control char as marker for preserved whitespace
|
||||
raw_content += next;
|
||||
pos += 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Default: literal backslash
|
||||
raw_content += '\\';
|
||||
pos += 3;
|
||||
continue;
|
||||
} else {
|
||||
// Just closing ''
|
||||
pos += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for interpolation
|
||||
if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
||||
has_interp = true;
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Track newlines
|
||||
if (input[pos] == '\n') {
|
||||
line++;
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
} else {
|
||||
raw_content += input[pos];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip common indentation
|
||||
std::string stripped = strip_indentation(raw_content);
|
||||
|
||||
Token::Type type = has_interp ? Token::INDENTED_STRING_INTERP : Token::INDENTED_STRING;
|
||||
tokens.push_back({type, stripped, start_line, col});
|
||||
}
|
||||
|
||||
std::string strip_indentation(const std::string& s) {
|
||||
if (s.empty())
|
||||
return s;
|
||||
|
||||
// Split into lines
|
||||
std::vector<std::string> lines;
|
||||
std::string current_line;
|
||||
for (char c : s) {
|
||||
if (c == '\n') {
|
||||
lines.push_back(current_line);
|
||||
current_line.clear();
|
||||
} else {
|
||||
current_line += c;
|
||||
}
|
||||
}
|
||||
if (!current_line.empty() || (!s.empty() && s.back() == '\n')) {
|
||||
lines.push_back(current_line);
|
||||
}
|
||||
|
||||
// Find minimum indentation (spaces/tabs at start of non-empty lines)
|
||||
// \x01 marker indicates preserved whitespace (from ''\ escape)
|
||||
size_t min_indent = std::string::npos;
|
||||
for (const auto& line : lines) {
|
||||
if (line.empty())
|
||||
continue; // Skip empty lines when calculating indentation
|
||||
size_t indent = 0;
|
||||
for (size_t i = 0; i < line.size(); i++) {
|
||||
char c = line[i];
|
||||
// If we hit the preserved whitespace marker, stop counting indentation
|
||||
if (c == '\x01')
|
||||
break;
|
||||
if (c == ' ' || c == '\t')
|
||||
indent++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (indent < min_indent)
|
||||
min_indent = indent;
|
||||
}
|
||||
|
||||
if (min_indent == std::string::npos)
|
||||
min_indent = 0;
|
||||
|
||||
// Strip min_indent from all lines and remove \x01 markers
|
||||
std::string result;
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
const auto& line = lines[i];
|
||||
if (line.empty()) {
|
||||
// Preserve empty lines
|
||||
if (i + 1 < lines.size())
|
||||
result += '\n';
|
||||
} else {
|
||||
// Strip indentation, being careful about \x01 markers
|
||||
size_t skip = 0;
|
||||
size_t pos = 0;
|
||||
while (skip < min_indent && pos < line.size()) {
|
||||
if (line[pos] == '\x01') {
|
||||
// Hit preserved whitespace marker - don't strip any more
|
||||
break;
|
||||
}
|
||||
skip++;
|
||||
pos++;
|
||||
}
|
||||
|
||||
// Add the rest of the line, removing \x01 markers
|
||||
for (size_t j = pos; j < line.size(); j++) {
|
||||
if (line[j] != '\x01') {
|
||||
result += line[j];
|
||||
}
|
||||
}
|
||||
|
||||
if (i + 1 < lines.size())
|
||||
result += '\n';
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void tokenize_path() {
|
||||
size_t start = pos;
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
std::string path = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::PATH, path, line, col});
|
||||
col += path.size();
|
||||
}
|
||||
|
||||
void tokenize_home_path() {
|
||||
size_t start = pos;
|
||||
pos++; // Skip ~
|
||||
if (pos < input.size() && input[pos] == '/') {
|
||||
// Home-relative path ~/something
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != '(' && input[pos] != ')' &&
|
||||
input[pos] != '{' && input[pos] != '}' && input[pos] != '[' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
std::string path = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::PATH, path, line, col});
|
||||
col += path.size();
|
||||
}
|
||||
|
||||
void tokenize_int() {
|
||||
size_t start = pos;
|
||||
if (input[pos] == '-')
|
||||
pos++;
|
||||
while (pos < input.size() && isdigit(input[pos]))
|
||||
pos++;
|
||||
std::string num = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::INT, num, line, col});
|
||||
col += num.size();
|
||||
}
|
||||
|
||||
void tokenize_float() {
|
||||
size_t start = pos;
|
||||
if (input[pos] == '-')
|
||||
pos++;
|
||||
while (pos < input.size() && isdigit(input[pos]))
|
||||
pos++;
|
||||
if (pos < input.size() && input[pos] == '.') {
|
||||
pos++;
|
||||
while (pos < input.size() && isdigit(input[pos]))
|
||||
pos++;
|
||||
}
|
||||
std::string num = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::FLOAT, num, line, col});
|
||||
col += num.size();
|
||||
}
|
||||
|
||||
void tokenize_uri() {
|
||||
size_t start = pos;
|
||||
while (pos < input.size() && !isspace(input[pos]) && input[pos] != ')' && input[pos] != ']' &&
|
||||
input[pos] != ';') {
|
||||
pos++;
|
||||
}
|
||||
std::string uri = input.substr(start, pos - start);
|
||||
tokens.push_back({Token::URI, uri, line, col});
|
||||
col += uri.size();
|
||||
}
|
||||
|
||||
void tokenize_ident() {
|
||||
size_t start = pos;
|
||||
// Note: Don't include '.' here - it's used for selection (a.b.c)
|
||||
// URIs are handled separately by checking for '://' pattern
|
||||
while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-'))
|
||||
pos++;
|
||||
std::string ident = input.substr(start, pos - start);
|
||||
|
||||
// Check if it's a URI (contains ://)
|
||||
size_t scheme_end = ident.find("://");
|
||||
if (scheme_end != std::string::npos && scheme_end > 0) {
|
||||
tokens.push_back({Token::URI, ident, line, col});
|
||||
col += ident.size();
|
||||
return;
|
||||
}
|
||||
|
||||
Token::Type type = Token::IDENT;
|
||||
if (ident == "let")
|
||||
type = Token::LET;
|
||||
else if (ident == "in")
|
||||
type = Token::IN;
|
||||
else if (ident == "rec")
|
||||
type = Token::REC;
|
||||
else if (ident == "if")
|
||||
type = Token::IF;
|
||||
else if (ident == "then")
|
||||
type = Token::THEN;
|
||||
else if (ident == "else")
|
||||
type = Token::ELSE;
|
||||
else if (ident == "assert")
|
||||
type = Token::ASSERT;
|
||||
else if (ident == "with")
|
||||
type = Token::WITH;
|
||||
else if (ident == "inherit")
|
||||
type = Token::INHERIT;
|
||||
else if (ident == "import")
|
||||
type = Token::IMPORT;
|
||||
else if (ident == "true")
|
||||
type = Token::BOOL;
|
||||
else if (ident == "false")
|
||||
type = Token::BOOL;
|
||||
|
||||
tokens.push_back({type, ident, line, col});
|
||||
col += ident.size();
|
||||
}
|
||||
};
|
||||
|
||||
class Parser::Impl {
|
||||
public:
|
||||
std::vector<Token> tokens;
|
||||
|
|
@ -706,9 +85,9 @@ public:
|
|||
|
||||
bool expect(Token::Type type) {
|
||||
if (current().type != type) {
|
||||
std::cerr << "Expected token " << type << " but got " << current().type << " at "
|
||||
<< current().line << ":" << current().col << "\n";
|
||||
return false;
|
||||
throw std::runtime_error(
|
||||
"Expected token " + std::to_string(type) + " but got " + std::to_string(current().type) +
|
||||
" at " + std::to_string(current().line) + ":" + std::to_string(current().col));
|
||||
}
|
||||
advance();
|
||||
return true;
|
||||
|
|
@ -718,7 +97,7 @@ public:
|
|||
int get_precedence(Token::Type type) {
|
||||
switch (type) {
|
||||
case Token::MERGE:
|
||||
return 1; // Low precedence - binds loosely, but must be > 0 to be recognized as binary op
|
||||
return 1; // low precedence - binds loosely, but must be > 0 to be recognized as binary op
|
||||
case Token::OR:
|
||||
return 1;
|
||||
case Token::AND:
|
||||
|
|
@ -942,9 +321,10 @@ public:
|
|||
} else if (current().type == Token::IDENT || current().type == Token::INT ||
|
||||
current().type == Token::FLOAT || current().type == Token::BOOL ||
|
||||
current().type == Token::PATH || current().type == Token::LOOKUP_PATH ||
|
||||
current().type == Token::URI || current().type == Token::LBRACKET) {
|
||||
current().type == Token::URI || current().type == Token::LBRACKET ||
|
||||
current().type == Token::LBRACE) {
|
||||
// Juxtaposition application: f x
|
||||
// Parse the argument as a primary expression (which handles lists, etc.)
|
||||
// Parse the argument as a primary expression (which handles lists, attrsets, etc.)
|
||||
auto arg = parse_expr3();
|
||||
left = std::make_shared<Node>(AppNode(left, arg));
|
||||
} else {
|
||||
|
|
@ -1056,9 +436,8 @@ public:
|
|||
return std::make_shared<Node>(ConstBoolNode(t.value == "true"));
|
||||
}
|
||||
|
||||
std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n";
|
||||
advance();
|
||||
return std::make_shared<Node>(ConstNullNode());
|
||||
throw std::runtime_error("Unknown token: " + t.value + " (type " + std::to_string(t.type) +
|
||||
")");
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> parse_attrs() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue