initial commit

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I1ad48ade1bc8234b7d7c9fe3d976a5be6a6a6964
2026-02-20 22:01:32 +03:00 · 2026-02-20 22:01:32 +03:00 · 618a58b2b8
commit 618a58b2b8
14 changed files with 2119 additions and 0 deletions
--- a/src/irc/parser.cpp
+++ b/src/irc/parser.cpp
@ -0,0 +1,621 @@
+#include "parser.h"
+#include <iostream>
+#include <cstdio>
+#include <cstdlib>
+#include <memory>
+#include <stdexcept>
+#include <sstream>
+#include <vector>
+#include <regex>
+#include <array>
+
+namespace nix_irc {
+
+static std::string trim(const std::string& s) {
+    size_t start = s.find_first_not_of(" \t\n\r");
+    if (start == std::string::npos) return "";
+    size_t end = s.find_last_not_of(" \t\n\r");
+    return s.substr(start, end - start + 1);
+}
+
+static std::string read_file(const std::string& path) {
+    FILE* f = fopen(path.c_str(), "r");
+    if (!f) {
+        throw std::runtime_error("Cannot open file: " + path);
+    }
+    fseek(f, 0, SEEK_END);
+    long size = ftell(f);
+    fseek(f, 0, SEEK_SET);
+    std::string content(size, '\0');
+    fread(content.data(), 1, size, f);
+    fclose(f);
+    return content;
+}
+
+static std::pair<std::string, std::string> run_command(const std::string& cmd) {
+    std::array<char, 256> buffer;
+    std::string result;
+    std::string error;
+
+    FILE* pipe = popen(cmd.c_str(), "r");
+    if (!pipe) throw std::runtime_error("popen failed");
+
+    while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) {
+        result += buffer.data();
+    }
+
+    int status = pclose(pipe);
+    if (status != 0) {
+        throw std::runtime_error("Command failed: " + cmd);
+    }
+    return {result, error};
+}
+
+struct Token {
+    enum Type {
+        LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET,
+        IDENT, STRING, PATH, INT, BOOL,
+        LET, IN, REC, IF, THEN, ELSE, ASSERT, WITH,
+        DOT, SEMICOLON, COLON, AT, COMMA,
+        // Operators
+        PLUS, MINUS, STAR, SLASH, CONCAT,
+        EQEQ, NE, LT, GT, LE, GE,
+        AND, OR, IMPL, NOT,
+        EOF_
+    } type;
+    std::string value;
+    size_t line;
+    size_t col;
+};
+
+class Lexer {
+public:
+    Lexer(const std::string& input) : input(input), pos(0), line(1), col(1) {}
+
+    std::vector<Token> tokenize() {
+        #define TOKEN(t) Token{Token::t, "", line, col}
+
+        while (pos < input.size()) {
+            skip_whitespace();
+            if (pos >= input.size()) break;
+
+            char c = input[pos];
+
+            if (c == '(') { emit(TOKEN(LPAREN)); }
+            else if (c == ')') { emit(TOKEN(RPAREN)); }
+            else if (c == '{') { emit(TOKEN(LBRACE)); }
+            else if (c == '}') { emit(TOKEN(RBRACE)); }
+            else if (c == '[') { emit(TOKEN(LBRACKET)); }
+            else if (c == ']') { emit(TOKEN(RBRACKET)); }
+            else if (c == ';') { emit(TOKEN(SEMICOLON)); }
+            else if (c == ':') { emit(TOKEN(COLON)); }
+            else if (c == '@') { emit(TOKEN(AT)); }
+            else if (c == ',') { emit(TOKEN(COMMA)); }
+            else if (c == '"') { tokenize_string(); }
+            // Two-char operators
+            else if (c == '=' && pos + 1 < input.size() && input[pos + 1] == '=') {
+                tokens.push_back(TOKEN(EQEQ));
+                pos += 2; col += 2;
+            }
+            else if (c == '!' && pos + 1 < input.size() && input[pos + 1] == '=') {
+                tokens.push_back(TOKEN(NE));
+                pos += 2; col += 2;
+            }
+            else if (c == '<' && pos + 1 < input.size() && input[pos + 1] == '=') {
+                tokens.push_back(TOKEN(LE));
+                pos += 2; col += 2;
+            }
+            else if (c == '>' && pos + 1 < input.size() && input[pos + 1] == '=') {
+                tokens.push_back(TOKEN(GE));
+                pos += 2; col += 2;
+            }
+            else if (c == '+' && pos + 1 < input.size() && input[pos + 1] == '+') {
+                tokens.push_back(TOKEN(CONCAT));
+                pos += 2; col += 2;
+            }
+            else if (c == '&' && pos + 1 < input.size() && input[pos + 1] == '&') {
+                tokens.push_back(TOKEN(AND));
+                pos += 2; col += 2;
+            }
+            else if (c == '|' && pos + 1 < input.size() && input[pos + 1] == '|') {
+                tokens.push_back(TOKEN(OR));
+                pos += 2; col += 2;
+            }
+            else if (c == '-' && pos + 1 < input.size() && input[pos + 1] == '>') {
+                tokens.push_back(TOKEN(IMPL));
+                pos += 2; col += 2;
+            }
+            // Single-char operators
+            else if (c == '+') { emit(TOKEN(PLUS)); }
+            else if (c == '*') { emit(TOKEN(STAR)); }
+            else if (c == '/') {
+                // Check if it's a path or division
+                if (pos + 1 < input.size() && (isalnum(input[pos + 1]) || input[pos + 1] == '.')) {
+                    tokenize_path();
+                } else {
+                    emit(TOKEN(SLASH));
+                }
+            }
+            else if (c == '<') { emit(TOKEN(LT)); }
+            else if (c == '>') { emit(TOKEN(GT)); }
+            else if (c == '!') { emit(TOKEN(NOT)); }
+            else if (c == '.') { emit(TOKEN(DOT)); }
+            else if (c == '-') {
+                // Check if it's a negative number or minus operator
+                if (pos + 1 < input.size() && isdigit(input[pos + 1])) {
+                    tokenize_int();
+                } else {
+                    emit(TOKEN(MINUS));
+                }
+            }
+            else if (isdigit(c)) { tokenize_int(); }
+            else if (isalpha(c) || c == '_') { tokenize_ident(); }
+            else { pos++; col++; }
+        }
+        tokens.push_back({Token::EOF_, "", line, col});
+
+        #undef TOKEN
+        return tokens;
+    }
+
+private:
+    std::vector<Token> tokens;
+    const std::string& input;
+    size_t pos;
+    size_t line;
+    size_t col;
+
+    void emit(Token t) {
+        tokens.push_back(t);
+        pos++;
+        col++;
+    }
+
+    void skip_whitespace() {
+        while (pos < input.size()) {
+            char c = input[pos];
+            if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
+                if (c == '\n') { line++; col = 1; }
+                else { col++; }
+                pos++;
+            } else if (c == '#') {
+                while (pos < input.size() && input[pos] != '\n') pos++;
+            } else {
+                break;
+            }
+        }
+    }
+
+    void tokenize_string() {
+        pos++;
+        std::string s;
+        while (pos < input.size() && input[pos] != '"') {
+            if (input[pos] == '\\' && pos + 1 < input.size()) {
+                pos++;
+                switch (input[pos]) {
+                    case 'n': s += '\n'; break;
+                    case 't': s += '\t'; break;
+                    case 'r': s += '\r'; break;
+                    case '"': s += '"'; break;
+                    case '\\': s += '\\'; break;
+                    default: s += input[pos]; break;
+                }
+            } else {
+                s += input[pos];
+            }
+            pos++;
+        }
+        pos++;
+        tokens.push_back({Token::STRING, s, line, col});
+        col += s.size() + 2;
+    }
+
+    void tokenize_path() {
+        size_t start = pos;
+        while (pos < input.size() && !isspace(input[pos]) &&
+               input[pos] != '(' && input[pos] != ')' &&
+               input[pos] != '{' && input[pos] != '}' &&
+               input[pos] != '[' && input[pos] != ']') {
+            pos++;
+        }
+        std::string path = input.substr(start, pos - start);
+        tokens.push_back({Token::PATH, path, line, col});
+        col += path.size();
+    }
+
+    void tokenize_int() {
+        size_t start = pos;
+        if (input[pos] == '-') pos++;
+        while (pos < input.size() && isdigit(input[pos])) pos++;
+        std::string num = input.substr(start, pos - start);
+        tokens.push_back({Token::INT, num, line, col});
+        col += num.size();
+    }
+
+    void tokenize_ident() {
+        size_t start = pos;
+        while (pos < input.size() && (isalnum(input[pos]) || input[pos] == '_' || input[pos] == '-')) pos++;
+        std::string ident = input.substr(start, pos - start);
+
+        Token::Type type = Token::IDENT;
+        if (ident == "let") type = Token::LET;
+        else if (ident == "in") type = Token::IN;
+        else if (ident == "rec") type = Token::REC;
+        else if (ident == "if") type = Token::IF;
+        else if (ident == "then") type = Token::THEN;
+        else if (ident == "else") type = Token::ELSE;
+        else if (ident == "assert") type = Token::ASSERT;
+        else if (ident == "with") type = Token::WITH;
+        else if (ident == "true") type = Token::BOOL;
+        else if (ident == "false") type = Token::BOOL;
+
+        tokens.push_back({type, ident, line, col});
+        col += ident.size();
+    }
+};
+
+class Parser::Impl {
+public:
+    std::vector<Token> tokens;
+    size_t pos = 0;
+    std::string current_file;
+
+    const Token& current() {
+        if (pos < tokens.size()) return tokens[pos];
+        static Token eof{Token::EOF_, "", 0, 0};
+        return eof;
+    }
+
+    void advance() { pos++; }
+
+    bool consume(Token::Type type) {
+        if (current().type == type) {
+            advance();
+            return true;
+        }
+        return false;
+    }
+
+    bool expect(Token::Type type) {
+        if (current().type != type) {
+            std::cerr << "Expected token " << type << " but got " << current().type
+                      << " at " << current().line << ":" << current().col << "\n";
+            return false;
+        }
+        advance();
+        return true;
+    }
+
+    // Get operator precedence (higher = tighter binding)
+    int get_precedence(Token::Type type) {
+        switch (type) {
+            case Token::OR: return 1;
+            case Token::AND: return 2;
+            case Token::IMPL: return 3;
+            case Token::EQEQ: case Token::NE: return 4;
+            case Token::LT: case Token::GT: case Token::LE: case Token::GE: return 5;
+            case Token::CONCAT: return 6;
+            case Token::PLUS: case Token::MINUS: return 7;
+            case Token::STAR: case Token::SLASH: return 8;
+            default: return 0;
+        }
+    }
+
+    // Convert token type to binary operator
+    BinaryOp token_to_binop(Token::Type type) {
+        switch (type) {
+            case Token::PLUS: return BinaryOp::ADD;
+            case Token::MINUS: return BinaryOp::SUB;
+            case Token::STAR: return BinaryOp::MUL;
+            case Token::SLASH: return BinaryOp::DIV;
+            case Token::CONCAT: return BinaryOp::CONCAT;
+            case Token::EQEQ: return BinaryOp::EQ;
+            case Token::NE: return BinaryOp::NE;
+            case Token::LT: return BinaryOp::LT;
+            case Token::GT: return BinaryOp::GT;
+            case Token::LE: return BinaryOp::LE;
+            case Token::GE: return BinaryOp::GE;
+            case Token::AND: return BinaryOp::AND;
+            case Token::OR: return BinaryOp::OR;
+            case Token::IMPL: return BinaryOp::IMPL;
+            default: throw std::runtime_error("Invalid binary operator");
+        }
+    }
+
+    std::shared_ptr<Node> parse_expr() {
+        if (consume(Token::IF)) {
+            auto cond = parse_expr();
+            expect(Token::THEN);
+            auto then = parse_expr();
+            expect(Token::ELSE);
+            auto else_ = parse_expr();
+            return std::make_shared<Node>(IfNode(cond, then, else_));
+        }
+        if (consume(Token::LET)) {
+            bool is_rec = consume(Token::REC);
+            std::vector<std::pair<std::string, std::shared_ptr<Node>>> bindings;
+            parse_bindings(bindings);
+            expect(Token::IN);
+            auto body = parse_expr();
+
+            if (is_rec) {
+                auto letrec = LetRecNode(body);
+                letrec.bindings = std::move(bindings);
+                return std::make_shared<Node>(std::move(letrec));
+            } else {
+                auto let = LetNode(body);
+                let.bindings = std::move(bindings);
+                return std::make_shared<Node>(std::move(let));
+            }
+        }
+        if (consume(Token::ASSERT)) {
+            auto cond = parse_expr();
+            expect(Token::SEMICOLON);
+            auto body = parse_expr();
+            return std::make_shared<Node>(AssertNode(cond, body));
+        }
+        if (consume(Token::WITH)) {
+            auto attrs = parse_expr();
+            expect(Token::SEMICOLON);
+            auto body = parse_expr();
+            return std::make_shared<Node>(WithNode(attrs, body));
+        }
+
+        return parse_expr1();
+    }
+
+    std::shared_ptr<Node> parse_expr1() {
+        return parse_binary_op(0);
+    }
+
+    // Precedence climbing for binary operators
+    std::shared_ptr<Node> parse_binary_op(int min_prec) {
+        auto left = parse_selection();
+
+        while (true) {
+            int prec = get_precedence(current().type);
+            if (prec == 0 || prec < min_prec) break;
+
+            Token op_token = current();
+            advance();
+
+            auto right = parse_binary_op(prec + 1);
+            left = std::make_shared<Node>(BinaryOpNode(
+                token_to_binop(op_token.type),
+                left,
+                right
+            ));
+        }
+
+        return left;
+    }
+
+    std::shared_ptr<Node> parse_selection() {
+        auto left = parse_expr2();
+
+        while (current().type == Token::DOT) {
+            advance();
+            Token name = current();
+            if (name.type == Token::IDENT) {
+                advance();
+                auto attr = std::make_shared<Node>(ConstStringNode(name.value));
+                auto result = std::make_shared<Node>(SelectNode(left, attr));
+
+                if (consume(Token::DOT)) {
+                    Token name2 = current();
+                    if (name2.type == Token::IDENT) {
+                        advance();
+                        auto attr2 = std::make_shared<Node>(ConstStringNode(name2.value));
+                        auto* curr = result->get_if<SelectNode>();
+                        while (curr && consume(Token::DOT)) {
+                            Token n = current();
+                            expect(Token::IDENT);
+                            auto a = std::make_shared<Node>(ConstStringNode(n.value));
+                            curr->attr = std::make_shared<Node>(AppNode(
+                                std::make_shared<Node>(AppNode(curr->attr, a)),
+                                std::make_shared<Node>(ConstNullNode())
+                            ));
+                        }
+                    }
+                }
+                return result;
+            } else if (consume(Token::LBRACE)) {
+                auto result = std::make_shared<Node>(SelectNode(left, std::make_shared<Node>(ConstStringNode(name.value))));
+                parse_expr_attrs(result);
+                expect(Token::RBRACE);
+                return result;
+            }
+            return left;
+        }
+
+        return left;
+    }
+
+    void parse_expr_attrs(std::shared_ptr<Node>&) {
+        // Extended selection syntax
+    }
+
+    std::shared_ptr<Node> parse_expr2() {
+        std::shared_ptr<Node> left = parse_expr3();
+
+        while (true) {
+            if (current().type == Token::LBRACKET) {
+                advance();
+                auto arg = parse_expr();
+                expect(Token::RBRACKET);
+                left = std::make_shared<Node>(AppNode(left, arg));
+            } else if (current().type == Token::STRING) {
+                Token s = current();
+                advance();
+                auto arg = std::make_shared<Node>(ConstStringNode(s.value));
+                left = std::make_shared<Node>(AppNode(left, arg));
+            } else {
+                break;
+            }
+        }
+
+        return left;
+    }
+
+    std::shared_ptr<Node> parse_expr3() {
+        // Handle unary operators
+        if (consume(Token::MINUS)) {
+            auto operand = parse_expr3();
+            return std::make_shared<Node>(UnaryOpNode(UnaryOp::NEG, operand));
+        }
+
+        if (consume(Token::NOT)) {
+            auto operand = parse_expr3();
+            return std::make_shared<Node>(UnaryOpNode(UnaryOp::NOT, operand));
+        }
+
+        if (consume(Token::LPAREN)) {
+            auto expr = parse_expr();
+            expect(Token::RPAREN);
+            return expr;
+        }
+
+        if (consume(Token::LBRACE)) {
+            return parse_attrs();
+        }
+
+        if (consume(Token::LBRACKET)) {
+            return parse_list();
+        }
+
+        Token t = current();
+
+        if (t.type == Token::IDENT) {
+            advance();
+            return std::make_shared<Node>(VarNode(0, t.value));
+        }
+
+        if (t.type == Token::INT) {
+            advance();
+            return std::make_shared<Node>(ConstIntNode(std::stoll(t.value)));
+        }
+
+        if (t.type == Token::STRING) {
+            advance();
+            return std::make_shared<Node>(ConstStringNode(t.value));
+        }
+
+        if (t.type == Token::PATH) {
+            advance();
+            return std::make_shared<Node>(ConstPathNode(t.value));
+        }
+
+        if (t.type == Token::BOOL) {
+            advance();
+            return std::make_shared<Node>(ConstBoolNode(t.value == "true"));
+        }
+
+        std::cerr << "Unknown token: " << t.value << " (type " << t.type << ")\n";
+        advance();
+        return std::make_shared<Node>(ConstNullNode());
+    }
+
+    std::shared_ptr<Node> parse_attrs() {
+        auto attrs = AttrsetNode(false);
+
+        while (current().type != Token::RBRACE && current().type != Token::EOF_) {
+            if (consume(Token::REC)) {
+                attrs.recursive = true;
+                continue;
+            }
+
+            if (current().type == Token::IDENT || current().type == Token::STRING) {
+                Token key = current();
+                advance();
+                std::string key_str = key.value;
+
+                if (consume(Token::COLON)) {
+                    auto value = parse_expr();
+                    attrs.attrs.push_back({key_str, value});
+                } else if (consume(Token::AT)) {
+                    auto pattern = parse_expr();
+                    auto value = parse_expr();
+                    attrs.attrs.push_back({key_str, value});
+                }
+            }
+
+            if (consume(Token::COMMA)) continue;
+            if (consume(Token::SEMICOLON)) continue;
+        }
+
+        expect(Token::RBRACE);
+        return std::make_shared<Node>(std::move(attrs));
+    }
+
+    std::shared_ptr<Node> parse_list() {
+        std::shared_ptr<Node> list = std::make_shared<Node>(ConstNullNode());
+
+        if (consume(Token::RBRACKET)) {
+            return list;
+        }
+
+        std::vector<std::shared_ptr<Node>> elements;
+        while (current().type != Token::RBRACKET) {
+            elements.push_back(parse_expr());
+            if (!consume(Token::COMMA)) break;
+        }
+        expect(Token::RBRACKET);
+
+        for (auto it = elements.rbegin(); it != elements.rend(); ++it) {
+            list = std::make_shared<Node>(AppNode(
+                std::make_shared<Node>(AppNode(
+                    std::make_shared<Node>(VarNode(0, "__list")),
+                    *it
+                )),
+                list
+            ));
+        }
+
+        return list;
+    }
+
+    void parse_bindings(std::vector<std::pair<std::string, std::shared_ptr<Node>>>& bindings) {
+        while (current().type == Token::IDENT || current().type == Token::LBRACE) {
+            if (current().type == Token::LBRACE) {
+                auto inherit = parse_expr();
+                (void)inherit;
+                continue;
+            }
+
+            Token key = current();
+            expect(Token::IDENT);
+
+            if (consume(Token::AT)) {
+                auto pattern = parse_expr();
+                auto value = parse_expr();
+                bindings.push_back({key.value, value});
+            } else {
+                expect(Token::COLON);
+                auto value = parse_expr();
+                bindings.push_back({key.value, value});
+            }
+
+            if (!consume(Token::SEMICOLON)) break;
+        }
+    }
+};
+
+Parser::Parser() : pImpl(std::make_unique<Impl>()) {}
+Parser::~Parser() = default;
+
+std::shared_ptr<Node> Parser::parse(const std::string& source, const std::string& path) {
+    pImpl->current_file = path;
+
+    Lexer lexer(source);
+    pImpl->tokens = lexer.tokenize();
+    pImpl->pos = 0;
+
+    return pImpl->parse_expr();
+}
+
+std::shared_ptr<Node> Parser::parse_file(const std::string& path) {
+    std::string content = read_file(path);
+    return parse(content, path);
+}
+
+}