irc/parser: fix lexer ownership, errors, and implication parsing
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I12a6b52ec1c0edff605d02393eafde896a6a6964
This commit is contained in:
parent
760094a2b7
commit
b319ef6f3f
4 changed files with 165 additions and 15 deletions
|
|
@ -1,9 +1,10 @@
|
|||
#include "lexer.h"
|
||||
#include <cctype>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace nix_irc {
|
||||
|
||||
Lexer::Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
|
||||
Lexer::Lexer(std::string input) : input(std::move(input)), pos(0), line(1), col(1) {}
|
||||
|
||||
std::vector<Token> Lexer::tokenize() {
|
||||
#define TOKEN(t) \
|
||||
|
|
@ -109,9 +110,10 @@ std::vector<Token> Lexer::tokenize() {
|
|||
// If we found > and there's content, it's a lookup path
|
||||
if (end < input.size() && input[end] == '>' && end > pos + 1) {
|
||||
std::string path = input.substr(pos + 1, end - pos - 1);
|
||||
size_t consumed = end - pos + 1;
|
||||
tokens.push_back({Token::LOOKUP_PATH, path, line, col});
|
||||
pos = end + 1;
|
||||
col += (end - pos + 1);
|
||||
col += consumed;
|
||||
is_lookup_path = true;
|
||||
}
|
||||
|
||||
|
|
@ -123,8 +125,14 @@ std::vector<Token> Lexer::tokenize() {
|
|||
} else if (c == '!') {
|
||||
emit(TOKEN(NOT));
|
||||
} else if (c == '.') {
|
||||
// Relative paths: ./foo and ../foo
|
||||
if (pos + 1 < input.size() && input[pos + 1] == '/') {
|
||||
tokenize_path();
|
||||
} else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '/') {
|
||||
tokenize_path();
|
||||
}
|
||||
// Check for ellipsis (...)
|
||||
if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
|
||||
else if (pos + 2 < input.size() && input[pos + 1] == '.' && input[pos + 2] == '.') {
|
||||
tokens.push_back(TOKEN(ELLIPSIS));
|
||||
pos += 3;
|
||||
col += 3;
|
||||
|
|
@ -176,8 +184,8 @@ std::vector<Token> Lexer::tokenize() {
|
|||
tokenize_ident();
|
||||
}
|
||||
} else {
|
||||
pos++;
|
||||
col++;
|
||||
throw std::runtime_error("Unexpected character '" + std::string(1, c) + "' at " +
|
||||
std::to_string(line) + ":" + std::to_string(col));
|
||||
}
|
||||
}
|
||||
tokens.push_back({Token::EOF_, "", line, col});
|
||||
|
|
@ -210,10 +218,16 @@ void Lexer::skip_whitespace() {
|
|||
} else if (c == '/' && pos + 1 < input.size() && input[pos + 1] == '*') {
|
||||
// Block comment /* ... */
|
||||
// Note: Nix block comments do NOT nest
|
||||
size_t start_line = line;
|
||||
size_t start_col = col;
|
||||
bool terminated = false;
|
||||
pos += 2; // Skip /*
|
||||
col += 2;
|
||||
while (pos + 1 < input.size()) {
|
||||
if (input[pos] == '*' && input[pos + 1] == '/') {
|
||||
pos += 2; // Skip */
|
||||
col += 2;
|
||||
terminated = true;
|
||||
break;
|
||||
}
|
||||
if (input[pos] == '\n') {
|
||||
|
|
@ -224,6 +238,10 @@ void Lexer::skip_whitespace() {
|
|||
}
|
||||
pos++;
|
||||
}
|
||||
if (!terminated) {
|
||||
throw std::runtime_error("Unterminated block comment at " + std::to_string(start_line) +
|
||||
":" + std::to_string(start_col));
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -231,13 +249,17 @@ void Lexer::skip_whitespace() {
|
|||
}
|
||||
|
||||
void Lexer::tokenize_string() {
|
||||
size_t start_line = line;
|
||||
size_t start_col = col;
|
||||
pos++;
|
||||
col++;
|
||||
std::string s;
|
||||
bool has_interp = false;
|
||||
|
||||
while (pos < input.size() && input[pos] != '"') {
|
||||
if (input[pos] == '\\' && pos + 1 < input.size()) {
|
||||
pos++;
|
||||
col++;
|
||||
switch (input[pos]) {
|
||||
case 'n':
|
||||
s += '\n';
|
||||
|
|
@ -262,21 +284,35 @@ void Lexer::tokenize_string() {
|
|||
break;
|
||||
}
|
||||
pos++;
|
||||
col++;
|
||||
} else if (input[pos] == '$' && pos + 1 < input.size() && input[pos + 1] == '{') {
|
||||
// Found interpolation marker
|
||||
has_interp = true;
|
||||
s += input[pos]; // Keep $ in raw string
|
||||
pos++;
|
||||
col++;
|
||||
} else {
|
||||
if (input[pos] == '\n') {
|
||||
s += input[pos];
|
||||
pos++;
|
||||
line++;
|
||||
col = 1;
|
||||
continue;
|
||||
}
|
||||
s += input[pos];
|
||||
pos++;
|
||||
col++;
|
||||
}
|
||||
}
|
||||
if (pos >= input.size()) {
|
||||
throw std::runtime_error("Unterminated string at " + std::to_string(start_line) + ":" +
|
||||
std::to_string(start_col));
|
||||
}
|
||||
pos++;
|
||||
col++;
|
||||
|
||||
Token::Type type = has_interp ? Token::STRING_INTERP : Token::STRING;
|
||||
tokens.push_back({type, s, line, col});
|
||||
col += s.size() + 2;
|
||||
tokens.push_back({type, s, start_line, start_col});
|
||||
}
|
||||
|
||||
void Lexer::tokenize_indented_string() {
|
||||
|
|
|
|||
|
|
@ -68,12 +68,12 @@ struct Token {
|
|||
|
||||
class Lexer {
|
||||
public:
|
||||
Lexer(const std::string& input);
|
||||
explicit Lexer(std::string input);
|
||||
std::vector<Token> tokenize();
|
||||
|
||||
private:
|
||||
std::vector<Token> tokens;
|
||||
const std::string& input;
|
||||
std::string input;
|
||||
size_t pos;
|
||||
size_t line;
|
||||
size_t col;
|
||||
|
|
|
|||
|
|
@ -18,6 +18,116 @@ static std::string trim(const std::string& s) {
|
|||
return s.substr(start, end - start + 1);
|
||||
}
|
||||
|
||||
static const char* token_type_name(Token::Type type) {
|
||||
switch (type) {
|
||||
case Token::LPAREN:
|
||||
return "LPAREN";
|
||||
case Token::RPAREN:
|
||||
return "RPAREN";
|
||||
case Token::LBRACE:
|
||||
return "LBRACE";
|
||||
case Token::RBRACE:
|
||||
return "RBRACE";
|
||||
case Token::LBRACKET:
|
||||
return "LBRACKET";
|
||||
case Token::RBRACKET:
|
||||
return "RBRACKET";
|
||||
case Token::IDENT:
|
||||
return "IDENT";
|
||||
case Token::STRING:
|
||||
return "STRING";
|
||||
case Token::STRING_INTERP:
|
||||
return "STRING_INTERP";
|
||||
case Token::INDENTED_STRING:
|
||||
return "INDENTED_STRING";
|
||||
case Token::INDENTED_STRING_INTERP:
|
||||
return "INDENTED_STRING_INTERP";
|
||||
case Token::PATH:
|
||||
return "PATH";
|
||||
case Token::LOOKUP_PATH:
|
||||
return "LOOKUP_PATH";
|
||||
case Token::INT:
|
||||
return "INT";
|
||||
case Token::FLOAT:
|
||||
return "FLOAT";
|
||||
case Token::URI:
|
||||
return "URI";
|
||||
case Token::BOOL:
|
||||
return "BOOL";
|
||||
case Token::LET:
|
||||
return "LET";
|
||||
case Token::IN:
|
||||
return "IN";
|
||||
case Token::REC:
|
||||
return "REC";
|
||||
case Token::IF:
|
||||
return "IF";
|
||||
case Token::THEN:
|
||||
return "THEN";
|
||||
case Token::ELSE:
|
||||
return "ELSE";
|
||||
case Token::ASSERT:
|
||||
return "ASSERT";
|
||||
case Token::WITH:
|
||||
return "WITH";
|
||||
case Token::INHERIT:
|
||||
return "INHERIT";
|
||||
case Token::IMPORT:
|
||||
return "IMPORT";
|
||||
case Token::DOT:
|
||||
return "DOT";
|
||||
case Token::SEMICOLON:
|
||||
return "SEMICOLON";
|
||||
case Token::COLON:
|
||||
return "COLON";
|
||||
case Token::EQUALS:
|
||||
return "EQUALS";
|
||||
case Token::AT:
|
||||
return "AT";
|
||||
case Token::COMMA:
|
||||
return "COMMA";
|
||||
case Token::QUESTION:
|
||||
return "QUESTION";
|
||||
case Token::ELLIPSIS:
|
||||
return "ELLIPSIS";
|
||||
case Token::PLUS:
|
||||
return "PLUS";
|
||||
case Token::MINUS:
|
||||
return "MINUS";
|
||||
case Token::STAR:
|
||||
return "STAR";
|
||||
case Token::SLASH:
|
||||
return "SLASH";
|
||||
case Token::CONCAT:
|
||||
return "CONCAT";
|
||||
case Token::MERGE:
|
||||
return "MERGE";
|
||||
case Token::EQEQ:
|
||||
return "EQEQ";
|
||||
case Token::NE:
|
||||
return "NE";
|
||||
case Token::LT:
|
||||
return "LT";
|
||||
case Token::GT:
|
||||
return "GT";
|
||||
case Token::LE:
|
||||
return "LE";
|
||||
case Token::GE:
|
||||
return "GE";
|
||||
case Token::AND:
|
||||
return "AND";
|
||||
case Token::OR:
|
||||
return "OR";
|
||||
case Token::IMPL:
|
||||
return "IMPL";
|
||||
case Token::NOT:
|
||||
return "NOT";
|
||||
case Token::EOF_:
|
||||
return "EOF";
|
||||
}
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
static std::string read_file(const std::string& path) {
|
||||
FILE* f = fopen(path.c_str(), "r");
|
||||
if (!f) {
|
||||
|
|
@ -85,14 +195,17 @@ public:
|
|||
|
||||
bool expect(Token::Type type) {
|
||||
if (current().type != type) {
|
||||
throw std::runtime_error(
|
||||
"Expected token " + std::to_string(type) + " but got " + std::to_string(current().type) +
|
||||
" at " + std::to_string(current().line) + ":" + std::to_string(current().col));
|
||||
throw std::runtime_error("Expected token " + std::string(token_type_name(type)) +
|
||||
" but got " + token_type_name(current().type) + " at " +
|
||||
std::to_string(current().line) + ":" +
|
||||
std::to_string(current().col));
|
||||
}
|
||||
advance();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_right_associative(Token::Type type) { return type == Token::IMPL; }
|
||||
|
||||
// Get operator precedence (higher = tighter binding)
|
||||
int get_precedence(Token::Type type) {
|
||||
switch (type) {
|
||||
|
|
@ -263,7 +376,8 @@ public:
|
|||
Token op_token = current();
|
||||
advance();
|
||||
|
||||
auto right = parse_binary_op(prec + 1);
|
||||
int next_prec = is_right_associative(op_token.type) ? prec : prec + 1;
|
||||
auto right = parse_binary_op(next_prec);
|
||||
left = std::make_shared<Node>(BinaryOpNode(token_to_binop(op_token.type), left, right));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ BinaryOpNode::BinaryOpNode(BinaryOp o, std::shared_ptr<Node> l, std::shared_ptr<
|
|||
: op(o), left(std::move(l)), right(std::move(r)), line(ln) {}
|
||||
|
||||
// UnaryOpNode constructor
|
||||
UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr<Node> operand, uint32_t l)
|
||||
: op(o), operand(std::move(operand)), line(l) {}
|
||||
UnaryOpNode::UnaryOpNode(UnaryOp o, std::shared_ptr<Node> operand_ptr, uint32_t l)
|
||||
: op(o), operand(std::move(operand_ptr)), line(l) {}
|
||||
|
||||
// SelectNode constructor
|
||||
SelectNode::SelectNode(std::shared_ptr<Node> e, std::shared_ptr<Node> a, uint32_t l)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue