Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ifab58fcb523549ca9cb83dc8467be51e6a6a6964
227 lines
8.1 KiB
TypeScript
227 lines
8.1 KiB
TypeScript
import type { AnalysisResult, DiffBackendConfig, EngineBackend, WebhookEvent } from '../types.js';
|
|
import { fetchPRFiles } from '../github.js';
|
|
import { getLogger } from '../logger.js';
|
|
|
|
const TEST_FILE_PATTERN = /\b(test|spec|__tests__|_test|_spec|\.test\.|\.spec\.)\b/i;
|
|
|
|
const GENERATED_FILE_PATTERN =
|
|
/\b(package-lock|yarn\.lock|pnpm-lock|Cargo\.lock|go\.sum|composer\.lock|Gemfile\.lock|poetry\.lock|flake\.lock)\b|\.min\.(js|css)$|\/vendor\//i;
|
|
|
|
const CONFIG_FILE_PATTERN =
|
|
/\.(ya?ml|toml|ini|env(\.\w+)?|json)$|^\.[\w-]+(rc|ignore)$|Makefile$|Dockerfile$|^\.github\//i;
|
|
|
|
const RISKY_FILE_PATTERN =
|
|
/\b(migration|schema|seed|secret|credential|auth|permission|rbac|\.sql)\b/i;
|
|
|
|
const DOC_FILE_PATTERN = /\.(md|mdx|txt|rst|adoc)$|^(README|CHANGELOG|LICENSE|CONTRIBUTING)/i;
|
|
|
|
function categorizeFiles(
|
|
files: { filename: string; additions: number; deletions: number; changes: number }[]
|
|
) {
|
|
const src: typeof files = [];
|
|
const tests: typeof files = [];
|
|
const generated: typeof files = [];
|
|
const config: typeof files = [];
|
|
const docs: typeof files = [];
|
|
const risky: typeof files = [];
|
|
|
|
for (const f of files) {
|
|
if (GENERATED_FILE_PATTERN.test(f.filename)) {
|
|
generated.push(f);
|
|
} else if (TEST_FILE_PATTERN.test(f.filename)) {
|
|
tests.push(f);
|
|
} else if (DOC_FILE_PATTERN.test(f.filename)) {
|
|
docs.push(f);
|
|
} else if (CONFIG_FILE_PATTERN.test(f.filename)) {
|
|
config.push(f);
|
|
} else {
|
|
src.push(f);
|
|
}
|
|
// risky is non-exclusive - a file can be both src and risky
|
|
if (RISKY_FILE_PATTERN.test(f.filename)) {
|
|
risky.push(f);
|
|
}
|
|
}
|
|
|
|
return { src, tests, generated, config, docs, risky };
|
|
}
|
|
|
|
export class DiffBackend implements EngineBackend {
|
|
name = 'diff';
|
|
|
|
constructor(private config: DiffBackendConfig) {}
|
|
|
|
async analyze(event: WebhookEvent): Promise<AnalysisResult> {
|
|
if (event.type !== 'pull_request') {
|
|
return { impact: 'neutral', confidence: 0, reasoning: 'Not a PR.' };
|
|
}
|
|
|
|
let files;
|
|
try {
|
|
files = await fetchPRFiles(event.owner, event.repo, event.number);
|
|
} catch (err) {
|
|
getLogger().warn(
|
|
`Failed to fetch PR files for ${event.owner}/${event.repo}#${event.number}`,
|
|
err
|
|
);
|
|
return { impact: 'neutral', confidence: 0, reasoning: 'Could not fetch PR diff.' };
|
|
}
|
|
|
|
if (files.length === 0) {
|
|
return { impact: 'neutral', confidence: 0.1, reasoning: 'Empty diff.' };
|
|
}
|
|
|
|
const { src, tests, generated, config, docs, risky } = categorizeFiles(files);
|
|
|
|
// Exclude generated files from change counts
|
|
const meaningful = files.filter((f) => !GENERATED_FILE_PATTERN.test(f.filename));
|
|
const totalAdditions = meaningful.reduce((s, f) => s + f.additions, 0);
|
|
const totalDeletions = meaningful.reduce((s, f) => s + f.deletions, 0);
|
|
const totalChanges = totalAdditions + totalDeletions;
|
|
|
|
const signals: { name: string; positive: boolean; weight: number }[] = [];
|
|
|
|
// --- Size signals ---
|
|
if (totalChanges <= 50) {
|
|
signals.push({ name: 'tiny PR', positive: true, weight: 1.2 });
|
|
} else if (totalChanges <= 200) {
|
|
signals.push({ name: 'small PR', positive: true, weight: 1 });
|
|
} else if (totalChanges <= 500) {
|
|
// medium - no signal either way
|
|
} else if (totalChanges <= this.config.maxChanges) {
|
|
signals.push({ name: `large PR (${totalChanges} lines)`, positive: false, weight: 0.8 });
|
|
} else {
|
|
signals.push({
|
|
name: `very large PR (${totalChanges} lines, exceeds limit)`,
|
|
positive: false,
|
|
weight: 1.5,
|
|
});
|
|
}
|
|
|
|
// --- Focus signals ---
|
|
if (src.length <= 3 && src.length > 0) {
|
|
signals.push({ name: 'tightly focused', positive: true, weight: 1.2 });
|
|
} else if (meaningful.length <= 10) {
|
|
signals.push({ name: 'focused changeset', positive: true, weight: 0.8 });
|
|
} else if (meaningful.length > 30) {
|
|
signals.push({
|
|
name: `sprawling changeset (${meaningful.length} files)`,
|
|
positive: false,
|
|
weight: 1.2,
|
|
});
|
|
} else if (meaningful.length > 20) {
|
|
signals.push({
|
|
name: `broad changeset (${meaningful.length} files)`,
|
|
positive: false,
|
|
weight: 0.6,
|
|
});
|
|
}
|
|
|
|
// --- Test coverage ---
|
|
if (tests.length > 0 && src.length > 0) {
|
|
const testRatio = tests.length / src.length;
|
|
if (testRatio >= 0.5) {
|
|
signals.push({ name: 'good test coverage in diff', positive: true, weight: 1.5 });
|
|
} else {
|
|
signals.push({ name: 'includes tests', positive: true, weight: 1 });
|
|
}
|
|
} else if (tests.length > 0 && src.length === 0) {
|
|
signals.push({ name: 'test-only change', positive: true, weight: 1.2 });
|
|
} else if (this.config.requireTests && src.length > 0 && totalChanges > 50) {
|
|
signals.push({ name: 'no test changes for non-trivial PR', positive: false, weight: 1.3 });
|
|
}
|
|
|
|
// --- Net deletion ---
|
|
if (totalDeletions > totalAdditions && totalDeletions > 10) {
|
|
const ratio = totalDeletions / Math.max(totalAdditions, 1);
|
|
if (ratio > 3) {
|
|
signals.push({ name: 'significant code removal', positive: true, weight: 1.3 });
|
|
} else {
|
|
signals.push({ name: 'net code removal', positive: true, weight: 1 });
|
|
}
|
|
}
|
|
|
|
// --- Churn detection (files with high add+delete suggesting rewrites) ---
|
|
const highChurnFiles = src.filter(
|
|
(f) =>
|
|
f.additions > 50 &&
|
|
f.deletions > 50 &&
|
|
Math.min(f.additions, f.deletions) / Math.max(f.additions, f.deletions) > 0.6
|
|
);
|
|
if (highChurnFiles.length >= 3) {
|
|
signals.push({
|
|
name: `high churn in ${highChurnFiles.length} files (possible refactor)`,
|
|
positive: false,
|
|
weight: 0.5,
|
|
});
|
|
}
|
|
|
|
// --- Risky files ---
|
|
if (risky.length > 0) {
|
|
signals.push({
|
|
name: `touches sensitive files (${risky.map((f) => f.filename.split('/').pop()).join(', ')})`,
|
|
positive: false,
|
|
weight: 0.7,
|
|
});
|
|
}
|
|
|
|
// --- Documentation ---
|
|
if (docs.length > 0 && src.length > 0) {
|
|
signals.push({ name: 'includes docs updates', positive: true, weight: 0.6 });
|
|
} else if (docs.length > 0 && src.length === 0) {
|
|
signals.push({ name: 'docs-only change', positive: true, weight: 1 });
|
|
}
|
|
|
|
// --- Config-only ---
|
|
if (config.length > 0 && src.length === 0 && tests.length === 0) {
|
|
signals.push({ name: 'config/infra only', positive: true, weight: 0.8 });
|
|
}
|
|
|
|
// --- Generated file noise ---
|
|
if (generated.length > 0) {
|
|
const genChanges = generated.reduce((s, f) => s + f.changes, 0);
|
|
if (genChanges > totalChanges * 2) {
|
|
signals.push({ name: 'dominated by generated file changes', positive: false, weight: 0.4 });
|
|
}
|
|
}
|
|
|
|
// --- Scoring with weights ---
|
|
const positiveWeight = signals.filter((s) => s.positive).reduce((s, x) => s + x.weight, 0);
|
|
const negativeWeight = signals.filter((s) => !s.positive).reduce((s, x) => s + x.weight, 0);
|
|
|
|
let impact: AnalysisResult['impact'];
|
|
if (positiveWeight > negativeWeight * 1.1) {
|
|
impact = 'positive';
|
|
} else if (negativeWeight > positiveWeight * 1.1) {
|
|
impact = 'negative';
|
|
} else {
|
|
impact = 'neutral';
|
|
}
|
|
|
|
const totalSignalWeight = positiveWeight + negativeWeight;
|
|
const confidence =
|
|
signals.length > 0
|
|
? Math.min(
|
|
1,
|
|
(Math.abs(positiveWeight - negativeWeight) / Math.max(totalSignalWeight, 1)) * 0.6 +
|
|
0.25
|
|
)
|
|
: 0;
|
|
|
|
// Build reasoning
|
|
const breakdown: string[] = [];
|
|
if (src.length > 0) breakdown.push(`${src.length} source`);
|
|
if (tests.length > 0) breakdown.push(`${tests.length} test`);
|
|
if (config.length > 0) breakdown.push(`${config.length} config`);
|
|
if (docs.length > 0) breakdown.push(`${docs.length} docs`);
|
|
if (generated.length > 0) breakdown.push(`${generated.length} generated`);
|
|
const fileSummary = `${meaningful.length} files (${breakdown.join(', ')})`;
|
|
|
|
const reasoning =
|
|
signals.length > 0
|
|
? `Diff: ${signals.map((s) => `${s.positive ? '+' : '-'} ${s.name}`).join(', ')}. ${totalAdditions}+ ${totalDeletions}- across ${fileSummary}.`
|
|
: 'No diff signals.';
|
|
|
|
return { impact, confidence, reasoning };
|
|
}
|
|
}
|