cst: expand SyntaxKind enum to include additional nftables syntax variants

2025-06-02 12:32:33 +03:00 · 2025-06-02 12:32:33 +03:00 · f7f77a7e19
commit f7f77a7e19
parent 92735f3eee
2 changed files with 304 additions and 259 deletions
--- a/src/cst.rs
+++ b/src/cst.rs
@ -327,7 +327,120 @@ impl SyntaxKind {
        match raw.0 {
            0 => SyntaxKind::Root,
            1 => SyntaxKind::Table,
-            // ... other variants ...
+            2 => SyntaxKind::Chain,
+            3 => SyntaxKind::Rule,
+            4 => SyntaxKind::Set,
+            5 => SyntaxKind::Map,
+            6 => SyntaxKind::Element,
+            7 => SyntaxKind::Expression,
+            8 => SyntaxKind::BinaryExpr,
+            9 => SyntaxKind::UnaryExpr,
+            10 => SyntaxKind::CallExpr,
+            11 => SyntaxKind::SetExpr,
+            12 => SyntaxKind::RangeExpr,
+            13 => SyntaxKind::Statement,
+            14 => SyntaxKind::IncludeStmt,
+            15 => SyntaxKind::DefineStmt,
+            16 => SyntaxKind::FlushStmt,
+            17 => SyntaxKind::AddStmt,
+            18 => SyntaxKind::DeleteStmt,
+            19 => SyntaxKind::Identifier,
+            20 => SyntaxKind::StringLiteral,
+            21 => SyntaxKind::NumberLiteral,
+            22 => SyntaxKind::IpAddress,
+            23 => SyntaxKind::Ipv6Address,
+            24 => SyntaxKind::MacAddress,
+            25 => SyntaxKind::TableKw,
+            26 => SyntaxKind::ChainKw,
+            27 => SyntaxKind::RuleKw,
+            28 => SyntaxKind::SetKw,
+            29 => SyntaxKind::MapKw,
+            30 => SyntaxKind::ElementKw,
+            31 => SyntaxKind::IncludeKw,
+            32 => SyntaxKind::DefineKw,
+            33 => SyntaxKind::FlushKw,
+            34 => SyntaxKind::AddKw,
+            35 => SyntaxKind::DeleteKw,
+            36 => SyntaxKind::InsertKw,
+            37 => SyntaxKind::ReplaceKw,
+            38 => SyntaxKind::FilterKw,
+            39 => SyntaxKind::NatKw,
+            40 => SyntaxKind::RouteKw,
+            41 => SyntaxKind::InputKw,
+            42 => SyntaxKind::OutputKw,
+            43 => SyntaxKind::ForwardKw,
+            44 => SyntaxKind::PreroutingKw,
+            45 => SyntaxKind::PostroutingKw,
+            46 => SyntaxKind::IpKw,
+            47 => SyntaxKind::Ip6Kw,
+            48 => SyntaxKind::InetKw,
+            49 => SyntaxKind::ArpKw,
+            50 => SyntaxKind::BridgeKw,
+            51 => SyntaxKind::NetdevKw,
+            52 => SyntaxKind::TcpKw,
+            53 => SyntaxKind::UdpKw,
+            54 => SyntaxKind::IcmpKw,
+            55 => SyntaxKind::Icmpv6Kw,
+            56 => SyntaxKind::SportKw,
+            57 => SyntaxKind::DportKw,
+            58 => SyntaxKind::SaddrKw,
+            59 => SyntaxKind::DaddrKw,
+            60 => SyntaxKind::ProtocolKw,
+            61 => SyntaxKind::NexthdrKw,
+            62 => SyntaxKind::TypeKw,
+            63 => SyntaxKind::HookKw,
+            64 => SyntaxKind::PriorityKw,
+            65 => SyntaxKind::PolicyKw,
+            66 => SyntaxKind::IifnameKw,
+            67 => SyntaxKind::OifnameKw,
+            68 => SyntaxKind::CtKw,
+            69 => SyntaxKind::StateKw,
+            70 => SyntaxKind::AcceptKw,
+            71 => SyntaxKind::DropKw,
+            72 => SyntaxKind::RejectKw,
+            73 => SyntaxKind::ReturnKw,
+            74 => SyntaxKind::JumpKw,
+            75 => SyntaxKind::GotoKw,
+            76 => SyntaxKind::ContinueKw,
+            77 => SyntaxKind::LogKw,
+            78 => SyntaxKind::CommentKw,
+            79 => SyntaxKind::EstablishedKw,
+            80 => SyntaxKind::RelatedKw,
+            81 => SyntaxKind::NewKw,
+            82 => SyntaxKind::InvalidKw,
+            83 => SyntaxKind::EqOp,
+            84 => SyntaxKind::NeOp,
+            85 => SyntaxKind::LeOp,
+            86 => SyntaxKind::GeOp,
+            87 => SyntaxKind::LtOp,
+            88 => SyntaxKind::GtOp,
+            89 => SyntaxKind::LeftBrace,
+            90 => SyntaxKind::RightBrace,
+            91 => SyntaxKind::LeftParen,
+            92 => SyntaxKind::RightParen,
+            93 => SyntaxKind::LeftBracket,
+            94 => SyntaxKind::RightBracket,
+            95 => SyntaxKind::Comma,
+            96 => SyntaxKind::Semicolon,
+            97 => SyntaxKind::Colon,
+            98 => SyntaxKind::Assign,
+            99 => SyntaxKind::Dash,
+            100 => SyntaxKind::Slash,
+            101 => SyntaxKind::Dot,
+            102 => SyntaxKind::Whitespace,
+            103 => SyntaxKind::Newline,
+            104 => SyntaxKind::Comment,
+            105 => SyntaxKind::Shebang,
+            106 => SyntaxKind::Error,
+            107 => SyntaxKind::VmapKw,
+            108 => SyntaxKind::NdRouterAdvertKw,
+            109 => SyntaxKind::NdNeighborSolicitKw,
+            110 => SyntaxKind::NdNeighborAdvertKw,
+            111 => SyntaxKind::EchoRequestKw,
+            112 => SyntaxKind::DestUnreachableKw,
+            113 => SyntaxKind::RouterAdvertisementKw,
+            114 => SyntaxKind::TimeExceededKw,
+            115 => SyntaxKind::ParameterProblemKw,
            116 => SyntaxKind::PacketTooBigKw,
            _ => SyntaxKind::Error, // Fallback to Error for invalid values
        }
--- a/src/main.rs
+++ b/src/main.rs
@ -34,20 +34,6 @@ enum FormatterError {
        message: String,
        suggestion: Option<String>,
    },
-    #[error("Unsupported nftables syntax at line {line}, column {column}: {feature}")]
-    UnsupportedSyntax {
-        line: usize,
-        column: usize,
-        feature: String,
-        suggestion: Option<String>,
-    },
-    #[error("Invalid nftables syntax at line {line}, column {column}: {message}")]
-    InvalidSyntax {
-        line: usize,
-        column: usize,
-        message: String,
-        suggestion: Option<String>,
-    },
    #[error("IO error: {0}")]
    Io(#[from] io::Error),
 }
@ -289,7 +275,7 @@ fn process_single_file_format(
        let mut parser = NftablesParser::new(tokens.clone());
        parser
            .parse()
-            .map_err(|e| analyze_parse_error(&source, &tokens, &e.to_string()))?
+            .map_err(|e| convert_parse_error_to_formatter_error(&e, &source, &tokens))?
    };

    if debug {
@ -475,77 +461,203 @@ fn process_single_file_lint(
    Ok(())
 }

-/// Intelligent error analysis to categorize parse errors and provide location information
-fn analyze_parse_error(source: &str, tokens: &[Token], error: &str) -> FormatterError {
-    // Convert line/column position from token ranges
-    let lines: Vec<&str> = source.lines().collect();
+/// Convert parser errors to formatter errors with proper location information
+fn convert_parse_error_to_formatter_error(
+    error: &crate::parser::ParseError,
+    source: &str,
+    tokens: &[Token],
+) -> FormatterError {
+    use crate::parser::ParseError;

-    // Look for common error patterns and provide specific messages
-    if error.contains("unexpected token") || error.contains("expected") {
-        // Try to find the problematic token
-        if let Some(error_token) = find_error_token(tokens) {
-            let (line, column) = position_from_range(&error_token.range, source);
-
-            // Analyze the specific token to categorize the error
-            match categorize_syntax_error(&error_token, source, &lines) {
-                ErrorCategory::UnsupportedSyntax {
-                    feature,
-                    suggestion,
-                } => FormatterError::UnsupportedSyntax {
-                    line,
-                    column,
-                    feature,
-                    suggestion,
-                },
-                ErrorCategory::InvalidSyntax {
-                    message,
-                    suggestion,
-                } => FormatterError::InvalidSyntax {
-                    line,
-                    column,
-                    message,
-                    suggestion,
-                },
-                ErrorCategory::SyntaxError {
-                    message,
-                    suggestion,
-                } => FormatterError::SyntaxError {
-                    line,
-                    column,
-                    message,
-                    suggestion,
-                },
+    match error {
+        ParseError::UnexpectedToken {
+            line,
+            column,
+            expected,
+            found,
+        } => FormatterError::SyntaxError {
+            line: *line,
+            column: *column,
+            message: format!("Expected {}, found '{}'", expected, found),
+            suggestion: None,
+        },
+        ParseError::MissingToken { expected } => {
+            // Try to find current position from last token
+            let (line, column) = if let Some(last_token) = tokens.last() {
+                position_from_range(&last_token.range, source)
+            } else {
+                (1, 1)
+            };
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: format!("Missing token: expected {}", expected),
+                suggestion: None,
+            }
+        }
+        ParseError::InvalidExpression { message } => {
+            // Try to find the current token position
+            let (line, column) = find_current_parse_position(tokens, source);
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: format!("Invalid expression: {}", message),
+                suggestion: None,
+            }
+        }
+        ParseError::InvalidStatement { message } => {
+            let (line, column) = find_current_parse_position(tokens, source);
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: format!("Invalid statement: {}", message),
+                suggestion: None,
+            }
+        }
+        ParseError::SemanticError { message } => {
+            let (line, column) = find_current_parse_position(tokens, source);
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: format!("Semantic error: {}", message),
+                suggestion: None,
+            }
+        }
+        ParseError::LexError(lex_error) => {
+            // Convert lexical errors to formatter errors with location
+            convert_lex_error_to_formatter_error(lex_error, source)
+        }
+        ParseError::AnyhowError(anyhow_error) => {
+            // For anyhow errors, try to extract location from error message and context
+            let error_msg = anyhow_error.to_string();
+            let (line, column) = find_error_location_from_context(&error_msg, tokens, source);
+            let suggestion = generate_suggestion_for_error(&error_msg);
+
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: error_msg,
+                suggestion,
            }
-        } else {
-            // Fallback to generic parse error
-            FormatterError::ParseError(error.to_string())
        }
-    } else {
-        FormatterError::ParseError(error.to_string())
    }
 }

-#[derive(Debug)]
-enum ErrorCategory {
-    UnsupportedSyntax {
-        feature: String,
-        suggestion: Option<String>,
-    },
-    InvalidSyntax {
-        message: String,
-        suggestion: Option<String>,
-    },
-    SyntaxError {
-        message: String,
-        suggestion: Option<String>,
-    },
+/// Find the current parsing position from tokens
+fn find_current_parse_position(tokens: &[Token], source: &str) -> (usize, usize) {
+    // Look for the last non-whitespace, non-comment token
+    for token in tokens.iter().rev() {
+        match token.kind {
+            TokenKind::Newline | TokenKind::CommentLine(_) => continue,
+            _ => return position_from_range(&token.range, source),
+        }
+    }
+    (1, 1) // fallback
 }

-/// Find the first error token in the token stream
-fn find_error_token(tokens: &[Token]) -> Option<&Token> {
-    tokens
-        .iter()
-        .find(|token| matches!(token.kind, TokenKind::Error))
+/// Convert lexical errors to formatter errors
+fn convert_lex_error_to_formatter_error(
+    lex_error: &crate::lexer::LexError,
+    source: &str,
+) -> FormatterError {
+    use crate::lexer::LexError;
+
+    match lex_error {
+        LexError::InvalidToken { position, text } => {
+            let (line, column) = offset_to_line_column(*position, source);
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: format!("Invalid token: '{}'", text),
+                suggestion: None,
+            }
+        }
+        LexError::UnterminatedString { position } => {
+            let (line, column) = offset_to_line_column(*position, source);
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: "Unterminated string literal".to_string(),
+                suggestion: Some("Add closing quote".to_string()),
+            }
+        }
+        LexError::InvalidNumber { position, text } => {
+            let (line, column) = offset_to_line_column(*position, source);
+            FormatterError::SyntaxError {
+                line,
+                column,
+                message: format!("Invalid number: '{}'", text),
+                suggestion: Some("Check number format".to_string()),
+            }
+        }
+    }
+}
+
+/// Convert byte offset to line/column position
+fn offset_to_line_column(offset: usize, source: &str) -> (usize, usize) {
+    let mut line = 1;
+    let mut column = 1;
+
+    for (i, ch) in source.char_indices() {
+        if i >= offset {
+            break;
+        }
+        if ch == '\n' {
+            line += 1;
+            column = 1;
+        } else {
+            column += 1;
+        }
+    }
+
+    (line, column)
+}
+
+/// Find error location from context clues in the error message
+fn find_error_location_from_context(
+    error_msg: &str,
+    tokens: &[Token],
+    source: &str,
+) -> (usize, usize) {
+    // Look for context clues in the error message
+    if error_msg.contains("Expected string or identifier, got:") {
+        // Find the problematic token mentioned in the error
+        if let Some(bad_token_text) = extract_token_from_error_message(error_msg) {
+            // Find this token in the token stream
+            for token in tokens {
+                if token.text == bad_token_text {
+                    return position_from_range(&token.range, source);
+                }
+            }
+        }
+    }
+
+    // Fallback to finding last meaningful token
+    find_current_parse_position(tokens, source)
+}
+
+/// Extract the problematic token from error message
+fn extract_token_from_error_message(error_msg: &str) -> Option<String> {
+    // Parse messages like "Expected string or identifier, got: {"
+    if let Some(got_part) = error_msg.split("got: ").nth(1) {
+        Some(got_part.trim().to_string())
+    } else {
+        None
+    }
+}
+
+/// Generate helpful suggestions based on error message
+fn generate_suggestion_for_error(error_msg: &str) -> Option<String> {
+    if error_msg.contains("Expected string or identifier") {
+        Some(
+            "Check if you're missing quotes around a string value or have an unexpected character"
+                .to_string(),
+        )
+    } else if error_msg.contains("Expected") && error_msg.contains("got:") {
+        Some("Check syntax and ensure proper nftables structure".to_string())
+    } else {
+        None
+    }
 }

 /// Convert TextRange to line/column position
@ -566,186 +678,6 @@ fn position_from_range(range: &text_size::TextRange, source: &str) -> (usize, us
    (1, 1) // fallback
 }

-/// Categorize syntax errors based on token content and context
-fn categorize_syntax_error(token: &Token, source: &str, lines: &[&str]) -> ErrorCategory {
-    let token_text = &token.text;
-    let (line_num, _) = position_from_range(&token.range, source);
-    let line_content = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
-
-    // Check for unsupported nftables features
-    if is_unsupported_feature(token_text, line_content) {
-        let (feature, suggestion) = classify_unsupported_feature(token_text, line_content);
-        return ErrorCategory::UnsupportedSyntax {
-            feature,
-            suggestion,
-        };
-    }
-
-    // Check for invalid but supported syntax
-    if is_invalid_syntax(token_text, line_content) {
-        let (message, suggestion) = classify_invalid_syntax(token_text, line_content);
-        return ErrorCategory::InvalidSyntax {
-            message,
-            suggestion,
-        };
-    }
-
-    // Default to syntax error
-    ErrorCategory::SyntaxError {
-        message: format!("Unexpected token '{}'", token_text),
-        suggestion: suggest_correction(token_text, line_content),
-    }
-}
-
-/// Check if the token represents an unsupported nftables feature
-fn is_unsupported_feature(token_text: &str, line_content: &str) -> bool {
-    // List of advanced nftables features that might not be fully supported yet
-    let unsupported_keywords = [
-        "quota", "limit", "counter", "meter", "socket", "fib", "rt", "ipsec", "tunnel", "comp",
-        "dccp", "sctp", "gre", "esp", "ah", "vlan", "arp", "rateest", "osf", "netdev", "meta",
-        "exthdr", "payload", "lookup", "dynset", "flow", "hash", "jhash", "symhash", "crc32",
-    ];
-
-    unsupported_keywords
-        .iter()
-        .any(|&keyword| token_text.contains(keyword) || line_content.contains(keyword))
-}
-
-/// Check if the syntax is invalid (malformed but within supported features)
-fn is_invalid_syntax(token_text: &str, line_content: &str) -> bool {
-    // Check for common syntax mistakes
-    if token_text.contains("..") || token_text.contains("::") {
-        return true; // Double operators usually indicate mistakes
-    }
-
-    // Check for malformed addresses or ranges
-    if token_text.contains("/") && !is_valid_cidr(token_text) {
-        return true;
-    }
-
-    // Check for malformed brackets/braces
-    let open_braces = line_content.matches('{').count();
-    let close_braces = line_content.matches('}').count();
-    if open_braces != close_braces {
-        return true;
-    }
-
-    false
-}
-
-/// Classify unsupported feature and provide suggestion
-fn classify_unsupported_feature(token_text: &str, line_content: &str) -> (String, Option<String>) {
-    let feature = if token_text.contains("quota") {
-        (
-            "quota management".to_string(),
-            Some("Use explicit rule counting instead".to_string()),
-        )
-    } else if token_text.contains("limit") {
-        (
-            "rate limiting".to_string(),
-            Some("Consider using simpler rule-based rate limiting".to_string()),
-        )
-    } else if token_text.contains("counter") {
-        (
-            "packet counters".to_string(),
-            Some("Use rule-level statistics instead".to_string()),
-        )
-    } else if line_content.contains("meta") {
-        (
-            "meta expressions".to_string(),
-            Some("Use explicit protocol matching instead".to_string()),
-        )
-    } else {
-        (format!("advanced feature '{}'", token_text), None)
-    };
-
-    feature
-}
-
-/// Classify invalid syntax and provide suggestion
-fn classify_invalid_syntax(token_text: &str, line_content: &str) -> (String, Option<String>) {
-    if token_text.contains("/") && !is_valid_cidr(token_text) {
-        return (
-            "Invalid CIDR notation".to_string(),
-            Some("Use format like '192.168.1.0/24' or '::1/128'".to_string()),
-        );
-    }
-
-    if token_text.contains("..") {
-        return (
-            "Invalid range operator".to_string(),
-            Some("Use '-' for ranges like '1000-2000'".to_string()),
-        );
-    }
-
-    if line_content.contains('{') && !line_content.contains('}') {
-        return (
-            "Unmatched opening brace".to_string(),
-            Some("Ensure all '{' have matching '}'".to_string()),
-        );
-    }
-
-    (
-        format!("Malformed token '{}'", token_text),
-        Some("Check nftables syntax documentation".to_string()),
-    )
-}
-
-/// Suggest correction for common typos
-fn suggest_correction(token_text: &str, line_content: &str) -> Option<String> {
-    // Common typos and their corrections
-    let corrections = [
-        ("tabel", "table"),
-        ("cahin", "chain"),
-        ("accpet", "accept"),
-        ("rejct", "reject"),
-        ("prtocol", "protocol"),
-        ("addres", "address"),
-        ("pririty", "priority"),
-        ("poicy", "policy"),
-    ];
-
-    for (typo, correction) in &corrections {
-        if token_text.contains(typo) {
-            return Some(format!("Did you mean '{}'?", correction));
-        }
-    }
-
-    // Context-based suggestions
-    if line_content.contains("type") && line_content.contains("hook") {
-        if !line_content.contains("filter")
-            && !line_content.contains("nat")
-            && !line_content.contains("route")
-        {
-            return Some("Chain type should be 'filter', 'nat', or 'route'".to_string());
-        }
-    }
-
-    None
-}
-
-/// Validate CIDR notation
-fn is_valid_cidr(text: &str) -> bool {
-    if let Some(slash_pos) = text.find('/') {
-        let (addr, prefix) = text.split_at(slash_pos);
-        let prefix = &prefix[1..]; // Remove the '/'
-
-        // Check if prefix is a valid number
-        if let Ok(prefix_len) = prefix.parse::<u8>() {
-            // Basic validation - IPv4 should be <= 32, IPv6 <= 128
-            if addr.contains(':') {
-                prefix_len <= 128 // IPv6
-            } else {
-                prefix_len <= 32 // IPv4
-            }
-        } else {
-            false
-        }
-    } else {
-        false
-    }
-}
-
 fn main() -> Result<()> {
    let args = Args::parse();