initial commit

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I1ad48ade1bc8234b7d7c9fe3d976a5be6a6a6964
This commit is contained in:
raf 2026-02-20 22:01:32 +03:00
commit 618a58b2b8
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
14 changed files with 2119 additions and 0 deletions

621
src/irc/parser.cpp Normal file
View file

@ -0,0 +1,621 @@
#include "parser.h"
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <stdexcept>
#include <sstream>
#include <vector>
#include <regex>
#include <array>
namespace nix_irc {
static std::string trim(const std::string& s) {
size_t start = s.find_first_not_of(" \t\n\r");
if (start == std::string::npos) return "";
size_t end = s.find_last_not_of(" \t\n\r");
return s.substr(start, end - start + 1);
}
static std::string read_file(const std::string& path) {
FILE* f = fopen(path.c_str(), "r");
if (!f) {
throw std::runtime_error("Cannot open file: " + path);
}
fseek(f, 0, SEEK_END);
long size = ftell(f);
fseek(f, 0, SEEK_SET);
std::string content(size, '\0');
fread(content.data(), 1, size, f);
fclose(f);
return content;
}
static std::pair<std::string, std::string> run_command(const std::string& cmd) {
std::array<char, 256> buffer;
std::string result;
std::string error;
FILE* pipe = popen(cmd.c_str(), "r");
if (!pipe) throw std::runtime_error("popen failed");
while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {
result += buffer.data();
}
int status = pclose(pipe);
if (status != 0) {
throw std::runtime_error("Command failed: " + cmd);
}
return {result, error};
}
struct Token {
enum Type {
LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET,
IDENT, STRING, PATH, INT, BOOL,
LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH,
DOT, SEMICOLON, COLON, AT, COMMA,
// Operators
PLUS, MINUS, STAR, SLASH, CONCAT,
EQEQ, NE, LT, GT, LE, GE,
AND, OR, IMPL, NOT,
EOF_
} type;
std::string value;
size_t line;
size_t col;
};
class Lexer {
public:
Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
std::vector<Token> tokenize() {
#define TOKEN(t) Token{Token::t, "", line, col}
while (pos < input.size()) {
skip_whitespace();
if (pos >= input.size()) break;
char c = input[pos];
if (c == '(') { emit(TOKEN(LPAREN)); }
else if (c == ')') { emit(TOKEN(RPAREN)); }
else if (c == '{') { emit(TOKEN(LBRACE)); }
else if (c == '}') { emit(TOKEN(RBRACE)); }
else if (c == '[') { emit(TOKEN(LBRACKET)); }
else if (c == ']') { emit(TOKEN(RBRACKET)); }
else if (c == ';') { emit(TOKEN(SEMICOLON)); }
else if (c == ':') { emit(TOKEN(COLON)); }
else if (c == '@') { emit(TOKEN(AT)); }
else if (c == ',') { emit(TOKEN(COMMA)); }
else if (c == '"') { tokenize_string(); }
// Two-char operators
else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') {
tokens.push_back(TOKEN(EQEQ));
pos += 2; col += 2;
}
else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') {
tokens.push_back(TOKEN(NE));
pos += 2; col += 2;
}
else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') {
tokens.push_back(TOKEN(LE));
pos += 2; col += 2;
}
else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') {
tokens.push_back(TOKEN(GE));
pos += 2; col += 2;
}
else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') {
tokens.push_back(TOKEN(CONCAT));
pos += 2; col += 2;
}
else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') {
tokens.push_back(TOKEN(AND));
pos += 2; col += 2;
}
else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') {
tokens.push_back(TOKEN(OR));
pos += 2; col += 2;
}
else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') {
tokens.push_back(TOKEN(IMPL));
pos += 2; col += 2;
}
// Single-char operators
else if (c == '+') { emit(TOKEN(PLUS)); }
else if (c == '*') { emit(TOKEN(STAR)); }
else if (c == '/') {
// Check if it's a path or division
if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) {
tokenize_path();
} else {
emit(TOKEN(SLASH));
}
}
else if (c == '<') { emit(TOKEN(LT)); }
else if (c == '>') { emit(TOKEN(GT)); }
else if (c == '!') { emit(TOKEN(NOT)); }
else if (c == '.') { emit(TOKEN(DOT)); }
else if (c == '-') {
// Check if it's a negative number or minus operator
if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
tokenize_int();
} else {
emit(TOKEN(MINUS));
}
}
else if (isdigit(c)) { tokenize_int(); }
else if (isalpha(c) || c == '_') { tokenize_ident(); }
else { pos++; col++; }
}
tokens.push_back({Token::EOF_, "", line, col});
#undef TOKEN
return tokens;
}
private:
std::vector<Token> tokens;
const std::string& input;
size_t pos;
size_t line;
size_t col;
void emit(Token t) {
tokens.push_back(t);
pos++;
col++;
}
void skip_whitespace() {
while (pos < input.size()) {
char c = input[pos];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
if (c == '\n') { line++; col = 1; }
else { col++; }
pos++;
} else if (c == '#') {
while (pos < input.size() && input[pos] != '\n') pos++;
} else {
break;
}
}
}
void tokenize_string() {
pos++;
std::string s;
while (pos < input.size() && input[pos] != '"') {
if (input[pos] == '\\' && pos + 1 < input.size()) {
pos++;
switch (input[pos]) {
case 'n': s += '\n'; break;
case 't': s += '\t'; break;
case 'r': s += '\r'; break;
case '"': s += '"'; break;
case '\\': s += '\\'; break;
default: s += input[pos]; break;
}
} else {
s += input[pos];
}
pos++;
}
pos++;
tokens.push_back({Token::STRING, s, line, col});
col += s.size() + 2;
}
void tokenize_path() {
size_t start = pos;
while (pos < input.size() && !isspace(input[pos]) &&
input[pos] != '(' && input[pos] != ')' &&
input[pos] != '{' && input[pos] != '}' &&
input[pos] != '[' && input[pos] != ']') {
pos++;
}
std::string path = input.substr(start, pos - start);
tokens.push_back({Token::PATH, path, line, col});
col += path.size();
}
void tokenize_int() {
size_t start = pos;
if (input[pos] == '-') pos++;
while (pos < input.size() && isdigit(input[pos])) pos++;
std::string num = input.substr(start, pos - start);
tokens.push_back({Token::INT, num, line, col});
col += num.size();
}
void tokenize_ident() {
size_t start = pos;
while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++;
std::string ident = input.substr(start, pos - start);
Token::Type type = Token::IDENT;
if (ident == "let") type = Token::LET;
else if (ident == "in") type = Token::IN;
else if (ident == "rec") type = Token::REC;
else if (ident == "if") type = Token::IF;
else if (ident == "then") type = Token::THEN;
else if (ident == "else") type = Token::ELSE;
else if (ident == "assert") type = Token::ASSERT;
else if (ident == "with") type = Token::WITH;
else if (ident == "true") type = Token::BOOL;
else if (ident == "false") type = Token::BOOL;
tokens.push_back({type, ident, line, col});
col += ident.size();
}
};
class Parser::Impl {
public:
std::vector<Token> tokens;
size_t pos = 0;
std::string current_file;
const Token& current() {
if (pos < tokens.size()) return tokens[pos];
static Token eof{Token::EOF_, "", 0, 0};
return eof;
}
void advance() { pos++; }
bool consume(Token::Type type) {
if (current().type == type) {
advance();
return true;
}
return false;
}
bool expect(Token::Type type) {
if (current().type != type) {
std::cerr << "Expected token " << type << " but got " << current().type
<< " at " << current().line << ":" << current().col << "\n";
return false;
}
advance();
return true;
}
// Get operator precedence (higher = tighter binding)
int get_precedence(Token::Type type) {
switch (type) {
case Token::OR: return 1;
case Token::AND: return 2;
case Token::IMPL: return 3;
case Token::EQEQ: case Token::NE: return 4;
case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5;
case Token::CONCAT: return 6;
case Token::PLUS: case Token::MINUS: return 7;
case Token::STAR: case Token::SLASH: return 8;
default: return 0;
}
}
// Convert token type to binary operator
BinaryOp token_to_binop(Token::Type type) {
switch (type) {
case Token::PLUS: return BinaryOp::ADD;
case Token::MINUS: return BinaryOp::SUB;
case Token::STAR: return BinaryOp::MUL;
case Token::SLASH: return BinaryOp::DIV;
case Token::CONCAT: return BinaryOp::CONCAT;
case Token::EQEQ: return BinaryOp::EQ;
case Token::NE: return BinaryOp::NE;
case Token::LT: return BinaryOp::LT;
case Token::GT: return BinaryOp::GT;
case Token::LE: return BinaryOp::LE;
case Token::GE: return BinaryOp::GE;
case Token::AND: return BinaryOp::AND;
case Token::OR: return BinaryOp::OR;
case Token::IMPL: return BinaryOp::IMPL;
default: throw std::runtime_error("Invalid binary operator");
}
}
std::shared_ptr<Node> parse_expr() {
if (consume(Token::IF)) {
auto cond = parse_expr();
expect(Token::THEN);
auto then = parse_expr();
expect(Token::ELSE);
auto else_ = parse_expr();
return std::make_shared<Node>(IfNode(cond, then, else_));
}
if (consume(Token::LET)) {
bool is_rec = consume(Token::REC);
std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
parse_bindings(bindings);
expect(Token::IN);
auto body = parse_expr();
if (is_rec) {
auto letrec = LetRecNode(body);
letrec.bindings = std::move(bindings);
return std::make_shared<Node>(std::move(letrec));
} else {
auto let = LetNode(body);
let.bindings = std::move(bindings);
return std::make_shared<Node>(std::move(let));
}
}
if (consume(Token::ASSERT)) {
auto cond = parse_expr();
expect(Token::SEMICOLON);
auto body = parse_expr();
return std::make_shared<Node>(AssertNode(cond, body));
}
if (consume(Token::WITH)) {
auto attrs = parse_expr();
expect(Token::SEMICOLON);
auto body = parse_expr();
return std::make_shared<Node>(WithNode(attrs, body));
}
return parse_expr1();
}
std::shared_ptr<Node> parse_expr1() {
return parse_binary_op(0);
}
// Precedence climbing for binary operators
std::shared_ptr<Node> parse_binary_op(int min_prec) {
auto left = parse_selection();
while (true) {
int prec = get_precedence(current().type);
if (prec == 0 || prec < min_prec) break;
Token op_token = current();
advance();
auto right = parse_binary_op(prec + 1);
left = std::make_shared<Node>(BinaryOpNode(
token_to_binop(op_token.type),
left,
right
));
}
return left;
}
std::shared_ptr<Node> parse_selection() {
auto left = parse_expr2();
while (current().type == Token::DOT) {
advance();
Token name = current();
if (name.type == Token::IDENT) {
advance();
auto attr = std::make_shared<Node>(ConstStringNode(name.value));
auto result = std::make_shared<Node>(SelectNode(left, attr));
if (consume(Token::DOT)) {
Token name2 = current();
if (name2.type == Token::IDENT) {
advance();
auto attr2 = std::make_shared<Node>(ConstStringNode(name2.value));
auto* curr = result->get_if<SelectNode>();
while (curr && consume(Token::DOT)) {
Token n = current();
expect(Token::IDENT);
auto a = std::make_shared<Node>(ConstStringNode(n.value));
curr->attr = std::make_shared<Node>(AppNode(
std::make_shared<Node>(AppNode(curr->attr, a)),
std::make_shared<Node>(ConstNullNode())
));
}
}
}
return result;
} else if (consume(Token::LBRACE)) {
auto result = std::make_shared<Node>(SelectNode(left, std::make_shared<Node>(ConstStringNode(name.value))));
parse_expr_attrs(result);
expect(Token::RBRACE);
return result;
}
return left;
}
return left;
}
void parse_expr_attrs(std::shared_ptr<Node>&) {
// Extended selection syntax
}
std::shared_ptr<Node> parse_expr2() {
std::shared_ptr<Node> left = parse_expr3();
while (true) {
if (current().type == Token::LBRACKET) {
advance();
auto arg = parse_expr();
expect(Token::RBRACKET);
left = std::make_shared<Node>(AppNode(left, arg));
} else if (current().type == Token::STRING) {
Token s = current();
advance();
auto arg = std::make_shared<Node>(ConstStringNode(s.value));
left = std::make_shared<Node>(AppNode(left, arg));
} else {
break;
}
}
return left;
}
std::shared_ptr<Node> parse_expr3() {
// Handle unary operators
if (consume(Token::MINUS)) {
auto operand = parse_expr3();
return std::make_shared<Node>(UnaryOpNode(UnaryOp::NEG, operand));
}
if (consume(Token::NOT)) {
auto operand = parse_expr3();
return std::make_shared<Node>(UnaryOpNode(UnaryOp::NOT, operand));
}
if (consume(Token::LPAREN)) {
auto expr = parse_expr();
expect(Token::RPAREN);
return expr;
}
if (consume(Token::LBRACE)) {
return parse_attrs();
}
if (consume(Token::LBRACKET)) {
return parse_list();
}
Token t = current();
if (t.type == Token::IDENT) {
advance();
return std::make_shared<Node>(VarNode(0, t.value));
}
if (t.type == Token::INT) {
advance();
return std::make_shared<Node>(ConstIntNode(std::stoll(t.value)));
}
if (t.type == Token::STRING) {
advance();
return std::make_shared<Node>(ConstStringNode(t.value));
}
if (t.type == Token::PATH) {
advance();
return std::make_shared<Node>(ConstPathNode(t.value));
}
if (t.type == Token::BOOL) {
advance();
return std::make_shared<Node>(ConstBoolNode(t.value == "true"));
}
std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n";
advance();
return std::make_shared<Node>(ConstNullNode());
}
std::shared_ptr<Node> parse_attrs() {
auto attrs = AttrsetNode(false);
while (current().type != Token::RBRACE && current().type != Token::EOF_) {
if (consume(Token::REC)) {
attrs.recursive = true;
continue;
}
if (current().type == Token::IDENT || current().type == Token::STRING) {
Token key = current();
advance();
std::string key_str = key.value;
if (consume(Token::COLON)) {
auto value = parse_expr();
attrs.attrs.push_back({key_str, value});
} else if (consume(Token::AT)) {
auto pattern = parse_expr();
auto value = parse_expr();
attrs.attrs.push_back({key_str, value});
}
}
if (consume(Token::COMMA)) continue;
if (consume(Token::SEMICOLON)) continue;
}
expect(Token::RBRACE);
return std::make_shared<Node>(std::move(attrs));
}
std::shared_ptr<Node> parse_list() {
std::shared_ptr<Node> list = std::make_shared<Node>(ConstNullNode());
if (consume(Token::RBRACKET)) {
return list;
}
std::vector<std::shared_ptr<Node>> elements;
while (current().type != Token::RBRACKET) {
elements.push_back(parse_expr());
if (!consume(Token::COMMA)) break;
}
expect(Token::RBRACKET);
for (auto it = elements.rbegin(); it != elements.rend(); ++it) {
list = std::make_shared<Node>(AppNode(
std::make_shared<Node>(AppNode(
std::make_shared<Node>(VarNode(0, "__list")),
*it
)),
list
));
}
return list;
}
void parse_bindings(std::vector<std::pair<std::string, std::shared_ptr<Node>>>& bindings) {
while (current().type == Token::IDENT || current().type == Token::LBRACE) {
if (current().type == Token::LBRACE) {
auto inherit = parse_expr();
(void)inherit;
continue;
}
Token key = current();
expect(Token::IDENT);
if (consume(Token::AT)) {
auto pattern = parse_expr();
auto value = parse_expr();
bindings.push_back({key.value, value});
} else {
expect(Token::COLON);
auto value = parse_expr();
bindings.push_back({key.value, value});
}
if (!consume(Token::SEMICOLON)) break;
}
}
};
Parser::Parser() : pImpl(std::make_unique<Impl>()) {}
Parser::~Parser() = default;
std::shared_ptr<Node> Parser::parse(const std::string& source, const std::string& path) {
pImpl->current_file = path;
Lexer lexer(source);
pImpl->tokens = lexer.tokenize();
pImpl->pos = 0;
return pImpl->parse_expr();
}
std::shared_ptr<Node> Parser::parse_file(const std::string& path) {
std::string content = read_file(path);
return parse(content, path);
}
}