import type { AnalysisResult, DiffBackendConfig, EngineBackend, WebhookEvent } from '../types.js'; import { fetchPRFiles } from '../github.js'; import { getLogger } from '../logger.js'; const TEST_FILE_PATTERN = /\b(test|spec|__tests__|_test|_spec|\.test\.|\.spec\.)\b/i; const GENERATED_FILE_PATTERN = /\b(package-lock|yarn\.lock|pnpm-lock|Cargo\.lock|go\.sum|composer\.lock|Gemfile\.lock|poetry\.lock|flake\.lock)\b|\.min\.(js|css)$|\/vendor\//i; const CONFIG_FILE_PATTERN = /\.(ya?ml|toml|ini|env(\.\w+)?|json)$|^\.[\w-]+(rc|ignore)$|Makefile$|Dockerfile$|^\.github\//i; const RISKY_FILE_PATTERN = /\b(migration|schema|seed|secret|credential|auth|permission|rbac|\.sql)\b/i; const DOC_FILE_PATTERN = /\.(md|mdx|txt|rst|adoc)$|^(README|CHANGELOG|LICENSE|CONTRIBUTING)/i; function categorizeFiles( files: { filename: string; additions: number; deletions: number; changes: number }[] ) { const src: typeof files = []; const tests: typeof files = []; const generated: typeof files = []; const config: typeof files = []; const docs: typeof files = []; const risky: typeof files = []; for (const f of files) { if (GENERATED_FILE_PATTERN.test(f.filename)) { generated.push(f); } else if (TEST_FILE_PATTERN.test(f.filename)) { tests.push(f); } else if (DOC_FILE_PATTERN.test(f.filename)) { docs.push(f); } else if (CONFIG_FILE_PATTERN.test(f.filename)) { config.push(f); } else { src.push(f); } // risky is non-exclusive - a file can be both src and risky if (RISKY_FILE_PATTERN.test(f.filename)) { risky.push(f); } } return { src, tests, generated, config, docs, risky }; } export class DiffBackend implements EngineBackend { name = 'diff'; constructor(private config: DiffBackendConfig) {} async analyze(event: WebhookEvent): Promise { if (event.type !== 'pull_request') { return { impact: 'neutral', confidence: 0, reasoning: 'Not a PR.' }; } let files; try { files = await fetchPRFiles(event.owner, event.repo, event.number); } catch (err) { getLogger().warn( `Failed to fetch PR files for ${event.owner}/${event.repo}#${event.number}`, err ); return { impact: 'neutral', confidence: 0, reasoning: 'Could not fetch PR diff.' }; } if (files.length === 0) { return { impact: 'neutral', confidence: 0.1, reasoning: 'Empty diff.' }; } const { src, tests, generated, config, docs, risky } = categorizeFiles(files); // Exclude generated files from change counts const meaningful = files.filter((f) => !GENERATED_FILE_PATTERN.test(f.filename)); const totalAdditions = meaningful.reduce((s, f) => s + f.additions, 0); const totalDeletions = meaningful.reduce((s, f) => s + f.deletions, 0); const totalChanges = totalAdditions + totalDeletions; const signals: { name: string; positive: boolean; weight: number }[] = []; // --- Size signals --- if (totalChanges <= 50) { signals.push({ name: 'tiny PR', positive: true, weight: 1.2 }); } else if (totalChanges <= 200) { signals.push({ name: 'small PR', positive: true, weight: 1 }); } else if (totalChanges <= 500) { // medium - no signal either way } else if (totalChanges <= this.config.maxChanges) { signals.push({ name: `large PR (${totalChanges} lines)`, positive: false, weight: 0.8 }); } else { signals.push({ name: `very large PR (${totalChanges} lines, exceeds limit)`, positive: false, weight: 1.5, }); } // --- Focus signals --- if (src.length <= 3 && src.length > 0) { signals.push({ name: 'tightly focused', positive: true, weight: 1.2 }); } else if (meaningful.length <= 10) { signals.push({ name: 'focused changeset', positive: true, weight: 0.8 }); } else if (meaningful.length > 30) { signals.push({ name: `sprawling changeset (${meaningful.length} files)`, positive: false, weight: 1.2, }); } else if (meaningful.length > 20) { signals.push({ name: `broad changeset (${meaningful.length} files)`, positive: false, weight: 0.6, }); } // --- Test coverage --- if (tests.length > 0 && src.length > 0) { const testRatio = tests.length / src.length; if (testRatio >= 0.5) { signals.push({ name: 'good test coverage in diff', positive: true, weight: 1.5 }); } else { signals.push({ name: 'includes tests', positive: true, weight: 1 }); } } else if (tests.length > 0 && src.length === 0) { signals.push({ name: 'test-only change', positive: true, weight: 1.2 }); } else if (this.config.requireTests && src.length > 0 && totalChanges > 50) { signals.push({ name: 'no test changes for non-trivial PR', positive: false, weight: 1.3 }); } // --- Net deletion --- if (totalDeletions > totalAdditions && totalDeletions > 10) { const ratio = totalDeletions / Math.max(totalAdditions, 1); if (ratio > 3) { signals.push({ name: 'significant code removal', positive: true, weight: 1.3 }); } else { signals.push({ name: 'net code removal', positive: true, weight: 1 }); } } // --- Churn detection (files with high add+delete suggesting rewrites) --- const highChurnFiles = src.filter( (f) => f.additions > 50 && f.deletions > 50 && Math.min(f.additions, f.deletions) / Math.max(f.additions, f.deletions) > 0.6 ); if (highChurnFiles.length >= 3) { signals.push({ name: `high churn in ${highChurnFiles.length} files (possible refactor)`, positive: false, weight: 0.5, }); } // --- Risky files --- if (risky.length > 0) { signals.push({ name: `touches sensitive files (${risky.map((f) => f.filename.split('/').pop()).join(', ')})`, positive: false, weight: 0.7, }); } // --- Documentation --- if (docs.length > 0 && src.length > 0) { signals.push({ name: 'includes docs updates', positive: true, weight: 0.6 }); } else if (docs.length > 0 && src.length === 0) { signals.push({ name: 'docs-only change', positive: true, weight: 1 }); } // --- Config-only --- if (config.length > 0 && src.length === 0 && tests.length === 0) { signals.push({ name: 'config/infra only', positive: true, weight: 0.8 }); } // --- Generated file noise --- if (generated.length > 0) { const genChanges = generated.reduce((s, f) => s + f.changes, 0); if (genChanges > totalChanges * 2) { signals.push({ name: 'dominated by generated file changes', positive: false, weight: 0.4 }); } } // --- Scoring with weights --- const positiveWeight = signals.filter((s) => s.positive).reduce((s, x) => s + x.weight, 0); const negativeWeight = signals.filter((s) => !s.positive).reduce((s, x) => s + x.weight, 0); let impact: AnalysisResult['impact']; if (positiveWeight > negativeWeight * 1.1) { impact = 'positive'; } else if (negativeWeight > positiveWeight * 1.1) { impact = 'negative'; } else { impact = 'neutral'; } const totalSignalWeight = positiveWeight + negativeWeight; const confidence = signals.length > 0 ? Math.min( 1, (Math.abs(positiveWeight - negativeWeight) / Math.max(totalSignalWeight, 1)) * 0.6 + 0.25 ) : 0; // Build reasoning const breakdown: string[] = []; if (src.length > 0) breakdown.push(`${src.length} source`); if (tests.length > 0) breakdown.push(`${tests.length} test`); if (config.length > 0) breakdown.push(`${config.length} config`); if (docs.length > 0) breakdown.push(`${docs.length} docs`); if (generated.length > 0) breakdown.push(`${generated.length} generated`); const fileSummary = `${meaningful.length} files (${breakdown.join(', ')})`; const reasoning = signals.length > 0 ? `Diff: ${signals.map((s) => `${s.positive ? '+' : '-'} ${s.name}`).join(', ')}. ${totalAdditions}+ ${totalDeletions}- across ${fileSummary}.` : 'No diff signals.'; return { impact, confidence, reasoning }; } }