troutbot/src/engine/diff.ts
NotAShelf 374408834b
treewide: make less webhook-centric
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ifab58fcb523549ca9cb83dc8467be51e6a6a6964
2026-02-01 15:36:56 +03:00

227 lines
8.1 KiB
TypeScript

import type { AnalysisResult, DiffBackendConfig, EngineBackend, WebhookEvent } from '../types.js';
import { fetchPRFiles } from '../github.js';
import { getLogger } from '../logger.js';
const TEST_FILE_PATTERN = /\b(test|spec|__tests__|_test|_spec|\.test\.|\.spec\.)\b/i;
const GENERATED_FILE_PATTERN =
/\b(package-lock|yarn\.lock|pnpm-lock|Cargo\.lock|go\.sum|composer\.lock|Gemfile\.lock|poetry\.lock|flake\.lock)\b|\.min\.(js|css)$|\/vendor\//i;
const CONFIG_FILE_PATTERN =
/\.(ya?ml|toml|ini|env(\.\w+)?|json)$|^\.[\w-]+(rc|ignore)$|Makefile$|Dockerfile$|^\.github\//i;
const RISKY_FILE_PATTERN =
/\b(migration|schema|seed|secret|credential|auth|permission|rbac|\.sql)\b/i;
const DOC_FILE_PATTERN = /\.(md|mdx|txt|rst|adoc)$|^(README|CHANGELOG|LICENSE|CONTRIBUTING)/i;
function categorizeFiles(
files: { filename: string; additions: number; deletions: number; changes: number }[]
) {
const src: typeof files = [];
const tests: typeof files = [];
const generated: typeof files = [];
const config: typeof files = [];
const docs: typeof files = [];
const risky: typeof files = [];
for (const f of files) {
if (GENERATED_FILE_PATTERN.test(f.filename)) {
generated.push(f);
} else if (TEST_FILE_PATTERN.test(f.filename)) {
tests.push(f);
} else if (DOC_FILE_PATTERN.test(f.filename)) {
docs.push(f);
} else if (CONFIG_FILE_PATTERN.test(f.filename)) {
config.push(f);
} else {
src.push(f);
}
// risky is non-exclusive - a file can be both src and risky
if (RISKY_FILE_PATTERN.test(f.filename)) {
risky.push(f);
}
}
return { src, tests, generated, config, docs, risky };
}
export class DiffBackend implements EngineBackend {
name = 'diff';
constructor(private config: DiffBackendConfig) {}
async analyze(event: WebhookEvent): Promise<AnalysisResult> {
if (event.type !== 'pull_request') {
return { impact: 'neutral', confidence: 0, reasoning: 'Not a PR.' };
}
let files;
try {
files = await fetchPRFiles(event.owner, event.repo, event.number);
} catch (err) {
getLogger().warn(
`Failed to fetch PR files for ${event.owner}/${event.repo}#${event.number}`,
err
);
return { impact: 'neutral', confidence: 0, reasoning: 'Could not fetch PR diff.' };
}
if (files.length === 0) {
return { impact: 'neutral', confidence: 0.1, reasoning: 'Empty diff.' };
}
const { src, tests, generated, config, docs, risky } = categorizeFiles(files);
// Exclude generated files from change counts
const meaningful = files.filter((f) => !GENERATED_FILE_PATTERN.test(f.filename));
const totalAdditions = meaningful.reduce((s, f) => s + f.additions, 0);
const totalDeletions = meaningful.reduce((s, f) => s + f.deletions, 0);
const totalChanges = totalAdditions + totalDeletions;
const signals: { name: string; positive: boolean; weight: number }[] = [];
// --- Size signals ---
if (totalChanges <= 50) {
signals.push({ name: 'tiny PR', positive: true, weight: 1.2 });
} else if (totalChanges <= 200) {
signals.push({ name: 'small PR', positive: true, weight: 1 });
} else if (totalChanges <= 500) {
// medium - no signal either way
} else if (totalChanges <= this.config.maxChanges) {
signals.push({ name: `large PR (${totalChanges} lines)`, positive: false, weight: 0.8 });
} else {
signals.push({
name: `very large PR (${totalChanges} lines, exceeds limit)`,
positive: false,
weight: 1.5,
});
}
// --- Focus signals ---
if (src.length <= 3 && src.length > 0) {
signals.push({ name: 'tightly focused', positive: true, weight: 1.2 });
} else if (meaningful.length <= 10) {
signals.push({ name: 'focused changeset', positive: true, weight: 0.8 });
} else if (meaningful.length > 30) {
signals.push({
name: `sprawling changeset (${meaningful.length} files)`,
positive: false,
weight: 1.2,
});
} else if (meaningful.length > 20) {
signals.push({
name: `broad changeset (${meaningful.length} files)`,
positive: false,
weight: 0.6,
});
}
// --- Test coverage ---
if (tests.length > 0 && src.length > 0) {
const testRatio = tests.length / src.length;
if (testRatio >= 0.5) {
signals.push({ name: 'good test coverage in diff', positive: true, weight: 1.5 });
} else {
signals.push({ name: 'includes tests', positive: true, weight: 1 });
}
} else if (tests.length > 0 && src.length === 0) {
signals.push({ name: 'test-only change', positive: true, weight: 1.2 });
} else if (this.config.requireTests && src.length > 0 && totalChanges > 50) {
signals.push({ name: 'no test changes for non-trivial PR', positive: false, weight: 1.3 });
}
// --- Net deletion ---
if (totalDeletions > totalAdditions && totalDeletions > 10) {
const ratio = totalDeletions / Math.max(totalAdditions, 1);
if (ratio > 3) {
signals.push({ name: 'significant code removal', positive: true, weight: 1.3 });
} else {
signals.push({ name: 'net code removal', positive: true, weight: 1 });
}
}
// --- Churn detection (files with high add+delete suggesting rewrites) ---
const highChurnFiles = src.filter(
(f) =>
f.additions > 50 &&
f.deletions > 50 &&
Math.min(f.additions, f.deletions) / Math.max(f.additions, f.deletions) > 0.6
);
if (highChurnFiles.length >= 3) {
signals.push({
name: `high churn in ${highChurnFiles.length} files (possible refactor)`,
positive: false,
weight: 0.5,
});
}
// --- Risky files ---
if (risky.length > 0) {
signals.push({
name: `touches sensitive files (${risky.map((f) => f.filename.split('/').pop()).join(', ')})`,
positive: false,
weight: 0.7,
});
}
// --- Documentation ---
if (docs.length > 0 && src.length > 0) {
signals.push({ name: 'includes docs updates', positive: true, weight: 0.6 });
} else if (docs.length > 0 && src.length === 0) {
signals.push({ name: 'docs-only change', positive: true, weight: 1 });
}
// --- Config-only ---
if (config.length > 0 && src.length === 0 && tests.length === 0) {
signals.push({ name: 'config/infra only', positive: true, weight: 0.8 });
}
// --- Generated file noise ---
if (generated.length > 0) {
const genChanges = generated.reduce((s, f) => s + f.changes, 0);
if (genChanges > totalChanges * 2) {
signals.push({ name: 'dominated by generated file changes', positive: false, weight: 0.4 });
}
}
// --- Scoring with weights ---
const positiveWeight = signals.filter((s) => s.positive).reduce((s, x) => s + x.weight, 0);
const negativeWeight = signals.filter((s) => !s.positive).reduce((s, x) => s + x.weight, 0);
let impact: AnalysisResult['impact'];
if (positiveWeight > negativeWeight * 1.1) {
impact = 'positive';
} else if (negativeWeight > positiveWeight * 1.1) {
impact = 'negative';
} else {
impact = 'neutral';
}
const totalSignalWeight = positiveWeight + negativeWeight;
const confidence =
signals.length > 0
? Math.min(
1,
(Math.abs(positiveWeight - negativeWeight) / Math.max(totalSignalWeight, 1)) * 0.6 +
0.25
)
: 0;
// Build reasoning
const breakdown: string[] = [];
if (src.length > 0) breakdown.push(`${src.length} source`);
if (tests.length > 0) breakdown.push(`${tests.length} test`);
if (config.length > 0) breakdown.push(`${config.length} config`);
if (docs.length > 0) breakdown.push(`${docs.length} docs`);
if (generated.length > 0) breakdown.push(`${generated.length} generated`);
const fileSummary = `${meaningful.length} files (${breakdown.join(', ')})`;
const reasoning =
signals.length > 0
? `Diff: ${signals.map((s) => `${s.positive ? '+' : '-'} ${s.name}`).join(', ')}. ${totalAdditions}+ ${totalDeletions}- across ${fileSummary}.`
: 'No diff signals.';
return { impact, confidence, reasoning };
}
}