initial commit
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Ic08e7c4b5b4f4072de9e2f9a701e977b6a6a6964
This commit is contained in:
commit
f8db097ba9
21 changed files with 4924 additions and 0 deletions
202
src/engine/diff.ts
Normal file
202
src/engine/diff.ts
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
import type { AnalysisResult, DiffBackendConfig, EngineBackend, WebhookEvent } from '../types.js';
|
||||
import { fetchPRFiles } from '../github.js';
|
||||
import { getLogger } from '../logger.js';
|
||||
|
||||
const TEST_FILE_PATTERN = /\b(test|spec|__tests__|_test|_spec|\.test\.|\.spec\.)\b/i;
|
||||
|
||||
const GENERATED_FILE_PATTERN =
|
||||
/\b(package-lock|yarn\.lock|pnpm-lock|Cargo\.lock|go\.sum|composer\.lock|Gemfile\.lock|poetry\.lock|flake\.lock)\b|\.min\.(js|css)$|\/vendor\//i;
|
||||
|
||||
const CONFIG_FILE_PATTERN =
|
||||
/\.(ya?ml|toml|ini|env(\.\w+)?|json)$|^\.[\w-]+(rc|ignore)$|Makefile$|Dockerfile$|^\.github\//i;
|
||||
|
||||
const RISKY_FILE_PATTERN =
|
||||
/\b(migration|schema|seed|secret|credential|auth|permission|rbac|\.sql)\b/i;
|
||||
|
||||
const DOC_FILE_PATTERN = /\.(md|mdx|txt|rst|adoc)$|^(README|CHANGELOG|LICENSE|CONTRIBUTING)/i;
|
||||
|
||||
function categorizeFiles(files: { filename: string; additions: number; deletions: number; changes: number }[]) {
|
||||
const src: typeof files = [];
|
||||
const tests: typeof files = [];
|
||||
const generated: typeof files = [];
|
||||
const config: typeof files = [];
|
||||
const docs: typeof files = [];
|
||||
const risky: typeof files = [];
|
||||
|
||||
for (const f of files) {
|
||||
if (GENERATED_FILE_PATTERN.test(f.filename)) {
|
||||
generated.push(f);
|
||||
} else if (TEST_FILE_PATTERN.test(f.filename)) {
|
||||
tests.push(f);
|
||||
} else if (DOC_FILE_PATTERN.test(f.filename)) {
|
||||
docs.push(f);
|
||||
} else if (CONFIG_FILE_PATTERN.test(f.filename)) {
|
||||
config.push(f);
|
||||
} else {
|
||||
src.push(f);
|
||||
}
|
||||
// risky is non-exclusive - a file can be both src and risky
|
||||
if (RISKY_FILE_PATTERN.test(f.filename)) {
|
||||
risky.push(f);
|
||||
}
|
||||
}
|
||||
|
||||
return { src, tests, generated, config, docs, risky };
|
||||
}
|
||||
|
||||
export class DiffBackend implements EngineBackend {
|
||||
name = 'diff';
|
||||
|
||||
constructor(private config: DiffBackendConfig) {}
|
||||
|
||||
async analyze(event: WebhookEvent): Promise<AnalysisResult> {
|
||||
if (event.type !== 'pull_request') {
|
||||
return { impact: 'neutral', confidence: 0, reasoning: 'Not a PR.' };
|
||||
}
|
||||
|
||||
let files;
|
||||
try {
|
||||
files = await fetchPRFiles(event.owner, event.repo, event.number);
|
||||
} catch (err) {
|
||||
getLogger().warn(
|
||||
`Failed to fetch PR files for ${event.owner}/${event.repo}#${event.number}`,
|
||||
err
|
||||
);
|
||||
return { impact: 'neutral', confidence: 0, reasoning: 'Could not fetch PR diff.' };
|
||||
}
|
||||
|
||||
if (files.length === 0) {
|
||||
return { impact: 'neutral', confidence: 0.1, reasoning: 'Empty diff.' };
|
||||
}
|
||||
|
||||
const { src, tests, generated, config, docs, risky } = categorizeFiles(files);
|
||||
|
||||
// Exclude generated files from change counts
|
||||
const meaningful = files.filter((f) => !GENERATED_FILE_PATTERN.test(f.filename));
|
||||
const totalAdditions = meaningful.reduce((s, f) => s + f.additions, 0);
|
||||
const totalDeletions = meaningful.reduce((s, f) => s + f.deletions, 0);
|
||||
const totalChanges = totalAdditions + totalDeletions;
|
||||
|
||||
const signals: { name: string; positive: boolean; weight: number }[] = [];
|
||||
|
||||
// --- Size signals ---
|
||||
if (totalChanges <= 50) {
|
||||
signals.push({ name: 'tiny PR', positive: true, weight: 1.2 });
|
||||
} else if (totalChanges <= 200) {
|
||||
signals.push({ name: 'small PR', positive: true, weight: 1 });
|
||||
} else if (totalChanges <= 500) {
|
||||
// medium - no signal either way
|
||||
} else if (totalChanges <= this.config.maxChanges) {
|
||||
signals.push({ name: `large PR (${totalChanges} lines)`, positive: false, weight: 0.8 });
|
||||
} else {
|
||||
signals.push({ name: `very large PR (${totalChanges} lines, exceeds limit)`, positive: false, weight: 1.5 });
|
||||
}
|
||||
|
||||
// --- Focus signals ---
|
||||
if (src.length <= 3 && src.length > 0) {
|
||||
signals.push({ name: 'tightly focused', positive: true, weight: 1.2 });
|
||||
} else if (meaningful.length <= 10) {
|
||||
signals.push({ name: 'focused changeset', positive: true, weight: 0.8 });
|
||||
} else if (meaningful.length > 30) {
|
||||
signals.push({ name: `sprawling changeset (${meaningful.length} files)`, positive: false, weight: 1.2 });
|
||||
} else if (meaningful.length > 20) {
|
||||
signals.push({ name: `broad changeset (${meaningful.length} files)`, positive: false, weight: 0.6 });
|
||||
}
|
||||
|
||||
// --- Test coverage ---
|
||||
if (tests.length > 0 && src.length > 0) {
|
||||
const testRatio = tests.length / src.length;
|
||||
if (testRatio >= 0.5) {
|
||||
signals.push({ name: 'good test coverage in diff', positive: true, weight: 1.5 });
|
||||
} else {
|
||||
signals.push({ name: 'includes tests', positive: true, weight: 1 });
|
||||
}
|
||||
} else if (tests.length > 0 && src.length === 0) {
|
||||
signals.push({ name: 'test-only change', positive: true, weight: 1.2 });
|
||||
} else if (this.config.requireTests && src.length > 0 && totalChanges > 50) {
|
||||
signals.push({ name: 'no test changes for non-trivial PR', positive: false, weight: 1.3 });
|
||||
}
|
||||
|
||||
// --- Net deletion ---
|
||||
if (totalDeletions > totalAdditions && totalDeletions > 10) {
|
||||
const ratio = totalDeletions / Math.max(totalAdditions, 1);
|
||||
if (ratio > 3) {
|
||||
signals.push({ name: 'significant code removal', positive: true, weight: 1.3 });
|
||||
} else {
|
||||
signals.push({ name: 'net code removal', positive: true, weight: 1 });
|
||||
}
|
||||
}
|
||||
|
||||
// --- Churn detection (files with high add+delete suggesting rewrites) ---
|
||||
const highChurnFiles = src.filter(
|
||||
(f) => f.additions > 50 && f.deletions > 50 && Math.min(f.additions, f.deletions) / Math.max(f.additions, f.deletions) > 0.6
|
||||
);
|
||||
if (highChurnFiles.length >= 3) {
|
||||
signals.push({ name: `high churn in ${highChurnFiles.length} files (possible refactor)`, positive: false, weight: 0.5 });
|
||||
}
|
||||
|
||||
// --- Risky files ---
|
||||
if (risky.length > 0) {
|
||||
signals.push({
|
||||
name: `touches sensitive files (${risky.map((f) => f.filename.split('/').pop()).join(', ')})`,
|
||||
positive: false,
|
||||
weight: 0.7,
|
||||
});
|
||||
}
|
||||
|
||||
// --- Documentation ---
|
||||
if (docs.length > 0 && src.length > 0) {
|
||||
signals.push({ name: 'includes docs updates', positive: true, weight: 0.6 });
|
||||
} else if (docs.length > 0 && src.length === 0) {
|
||||
signals.push({ name: 'docs-only change', positive: true, weight: 1 });
|
||||
}
|
||||
|
||||
// --- Config-only ---
|
||||
if (config.length > 0 && src.length === 0 && tests.length === 0) {
|
||||
signals.push({ name: 'config/infra only', positive: true, weight: 0.8 });
|
||||
}
|
||||
|
||||
// --- Generated file noise ---
|
||||
if (generated.length > 0) {
|
||||
const genChanges = generated.reduce((s, f) => s + f.changes, 0);
|
||||
if (genChanges > totalChanges * 2) {
|
||||
signals.push({ name: 'dominated by generated file changes', positive: false, weight: 0.4 });
|
||||
}
|
||||
}
|
||||
|
||||
// --- Scoring with weights ---
|
||||
const positiveWeight = signals.filter((s) => s.positive).reduce((s, x) => s + x.weight, 0);
|
||||
const negativeWeight = signals.filter((s) => !s.positive).reduce((s, x) => s + x.weight, 0);
|
||||
|
||||
let impact: AnalysisResult['impact'];
|
||||
if (positiveWeight > negativeWeight * 1.1) {
|
||||
impact = 'positive';
|
||||
} else if (negativeWeight > positiveWeight * 1.1) {
|
||||
impact = 'negative';
|
||||
} else {
|
||||
impact = 'neutral';
|
||||
}
|
||||
|
||||
const totalSignalWeight = positiveWeight + negativeWeight;
|
||||
const confidence =
|
||||
signals.length > 0
|
||||
? Math.min(1, Math.abs(positiveWeight - negativeWeight) / Math.max(totalSignalWeight, 1) * 0.6 + 0.25)
|
||||
: 0;
|
||||
|
||||
// Build reasoning
|
||||
const breakdown: string[] = [];
|
||||
if (src.length > 0) breakdown.push(`${src.length} source`);
|
||||
if (tests.length > 0) breakdown.push(`${tests.length} test`);
|
||||
if (config.length > 0) breakdown.push(`${config.length} config`);
|
||||
if (docs.length > 0) breakdown.push(`${docs.length} docs`);
|
||||
if (generated.length > 0) breakdown.push(`${generated.length} generated`);
|
||||
const fileSummary = `${meaningful.length} files (${breakdown.join(', ')})`;
|
||||
|
||||
const reasoning =
|
||||
signals.length > 0
|
||||
? `Diff: ${signals.map((s) => `${s.positive ? '+' : '-'} ${s.name}`).join(', ')}. ${totalAdditions}+ ${totalDeletions}- across ${fileSummary}.`
|
||||
: 'No diff signals.';
|
||||
|
||||
return { impact, confidence, reasoning };
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue