diff --git a/src/engine/context.ts b/src/engine/context.ts new file mode 100644 index 0000000..aa04373 --- /dev/null +++ b/src/engine/context.ts @@ -0,0 +1,288 @@ +import type { AnalysisResult, WebhookEvent } from "../types.js"; +import { readFileSync, writeFileSync, existsSync } from "fs"; +import { getLogger } from "../logger.js"; + +// Author reputation tracking +interface AuthorStats { + totalContributions: number; + positiveImpacts: number; + negativeImpacts: number; + neutralImpacts: number; + averageConfidence: number; + lastSeen: string; +} + +// Repository pattern tracking +interface RepoPatterns { + typicalPRSize: number; + typicalFileCount: number; + commonLabels: string[]; + activeAuthors: string[]; + lastUpdated: string; +} + +// Context data structure +interface ContextData { + authors: Record; + repositories: Record; + globalStats: { + totalAnalyses: number; + version: number; + }; +} + +const CONTEXT_VERSION = 1; +const MAX_ACTIVE_AUTHORS = 100; + +function createDefaultContext(): ContextData { + return { + authors: {}, + repositories: {}, + globalStats: { + totalAnalyses: 0, + version: CONTEXT_VERSION, + }, + }; +} + +let contextData: ContextData = createDefaultContext(); +let contextFile: string | null = null; + +export function initContext(stateFile?: string): void { + contextFile = stateFile || ".troutbot-context.json"; + loadContext(); +} + +function loadContext(): void { + if (!contextFile || !existsSync(contextFile)) { + contextData = createDefaultContext(); + return; + } + + try { + const data = readFileSync(contextFile, "utf-8"); + const parsed = JSON.parse(data); + + // Validate structure + if ( + parsed && + typeof parsed === "object" && + parsed.globalStats?.version === CONTEXT_VERSION && + typeof parsed.authors === "object" && + parsed.authors !== null && + !Array.isArray(parsed.authors) && + typeof parsed.repositories === "object" && + parsed.repositories !== null && + !Array.isArray(parsed.repositories) + ) { + contextData = parsed; + } else { + getLogger().warn("Invalid context format, resetting"); + contextData = createDefaultContext(); + } + } catch { + contextData = createDefaultContext(); + } +} + +function saveContext(): void { + if (!contextFile) return; + + try { + writeFileSync(contextFile, JSON.stringify(contextData, null, 2)); + } catch (err) { + getLogger().warn("Failed to save context", err); + } +} + +export function updateContext( + event: WebhookEvent, + result: AnalysisResult, +): void { + const author = event.author; + const repo = `${event.owner}/${event.repo}`; + + // Update author stats + if (!contextData.authors[author]) { + contextData.authors[author] = { + totalContributions: 0, + positiveImpacts: 0, + negativeImpacts: 0, + neutralImpacts: 0, + averageConfidence: 0, + lastSeen: new Date().toISOString(), + }; + } + + const authorStats = contextData.authors[author]; + authorStats.totalContributions++; + authorStats.lastSeen = new Date().toISOString(); + + if (result.impact === "positive") authorStats.positiveImpacts++; + else if (result.impact === "negative") authorStats.negativeImpacts++; + else authorStats.neutralImpacts++; + + // Update running average confidence + authorStats.averageConfidence = + (authorStats.averageConfidence * (authorStats.totalContributions - 1) + + result.confidence) / + authorStats.totalContributions; + + // Update repo patterns (simplified) + if (!contextData.repositories[repo]) { + contextData.repositories[repo] = { + typicalPRSize: 0, + typicalFileCount: 0, + commonLabels: [], + activeAuthors: [], + lastUpdated: new Date().toISOString(), + }; + } + + const repoPatterns = contextData.repositories[repo]; + if (!repoPatterns.activeAuthors.includes(author)) { + repoPatterns.activeAuthors.push(author); + // Enforce max size to prevent unbounded growth + if (repoPatterns.activeAuthors.length > MAX_ACTIVE_AUTHORS) { + repoPatterns.activeAuthors.shift(); + } + } + repoPatterns.lastUpdated = new Date().toISOString(); + + // Update global stats + contextData.globalStats.totalAnalyses++; + + saveContext(); +} + +export function getAuthorReputation(author: string): { + isTrusted: boolean; + isNew: boolean; + reputation: number; + history: string; +} { + const stats = contextData.authors[author]; + + if (!stats) { + return { + isTrusted: false, + isNew: true, + reputation: 0, + history: "First-time contributor", + }; + } + + const successRate = + stats.totalContributions > 0 + ? stats.positiveImpacts / stats.totalContributions + : 0; + + const reputation = Math.min( + 1, + successRate * 0.6 + + (Math.min(stats.totalContributions, 20) / 20) * 0.3 + + stats.averageConfidence * 0.1, + ); + + let history: string; + if (stats.totalContributions === 1) { + history = "1 contribution"; + } else if (stats.totalContributions < 5) { + history = `${stats.totalContributions} contributions, ${(successRate * 100).toFixed(0)}% positive`; + } else { + history = `${stats.totalContributions} contributions, ${stats.positiveImpacts}+/${stats.negativeImpacts}-/${stats.neutralImpacts}~`; + } + + return { + isTrusted: reputation > 0.7 && stats.totalContributions >= 5, + isNew: stats.totalContributions < 3, + reputation, + history, + }; +} + +export function getRepoContext( + owner: string, + repo: string, +): { + isActive: boolean; + communitySize: number; + maturity: string; +} { + const repoKey = `${owner}/${repo}`; + const patterns = contextData.repositories[repoKey]; + + if (!patterns) { + return { + isActive: false, + communitySize: 0, + maturity: "unknown", + }; + } + + const communitySize = patterns.activeAuthors.length; + let maturity: string; + + if (contextData.globalStats.totalAnalyses < 10) { + maturity = "new"; + } else if (communitySize < 3) { + maturity = "small-team"; + } else if (communitySize < 10) { + maturity = "growing"; + } else { + maturity = "established"; + } + + return { + isActive: true, + communitySize, + maturity, + }; +} + +export function getContextualInsights( + event: WebhookEvent, + backendResults: Record, +): string[] { + const insights: string[] = []; + const authorRep = getAuthorReputation(event.author); + const repoCtx = getRepoContext(event.owner, event.repo); + + // Author-based insights + if (authorRep.isNew) { + insights.push( + `Welcome ${event.author}! This appears to be your first contribution.`, + ); + } else if (authorRep.isTrusted) { + insights.push( + `${event.author} is a trusted contributor with ${authorRep.history}.`, + ); + } else if (authorRep.reputation < 0.3) { + insights.push( + `${event.author} has had mixed results recently (${authorRep.history}).`, + ); + } + + // Repo-based insights + if (repoCtx.maturity === "new") { + insights.push("This repository is still building up analysis history."); + } + + // Cross-backend pattern detection + const impacts = Object.values(backendResults).map((r) => r.impact); + const allPositive = impacts.every((i) => i === "positive"); + const allNegative = impacts.every((i) => i === "negative"); + const mixed = new Set(impacts).size > 1; + + if (allPositive && impacts.length >= 2) { + insights.push("All analysis backends agree: this looks solid."); + } else if (allNegative && impacts.length >= 2) { + insights.push("Multiple concerns detected across different dimensions."); + } else if (mixed) { + insights.push( + "Mixed signals - some aspects look good, others need attention.", + ); + } + + return insights; +} diff --git a/src/engine/index.ts b/src/engine/index.ts index a09e06f..4371c33 100644 --- a/src/engine/index.ts +++ b/src/engine/index.ts @@ -4,26 +4,35 @@ import type { EngineConfig, Impact, WebhookEvent, -} from '../types.js'; -import { ChecksBackend } from './checks.js'; -import { DiffBackend } from './diff.js'; -import { QualityBackend } from './quality.js'; -import { getLogger } from '../logger.js'; - -const impactToNumeric: Record = { - positive: 1, - neutral: 0, - negative: -1, -}; +} from "../types.js"; +import { ChecksBackend } from "./checks.js"; +import { DiffBackend } from "./diff.js"; +import { QualityBackend } from "./quality.js"; +import { getLogger } from "../logger.js"; +import { + initContext, + updateContext, + getAuthorReputation, + getRepoContext, +} from "./context.js"; interface WeightedBackend { backend: EngineBackend; weight: number; } +interface BackendResult { + backend: string; + impact: Impact; + confidence: number; + reasoning: string; + weight: number; +} + export class Engine { private backends: WeightedBackend[] = []; private confidenceThreshold: number; + private contextInitialized = false; constructor(config: EngineConfig) { this.confidenceThreshold = config.confidenceThreshold; @@ -47,68 +56,453 @@ export class Engine { } if (this.backends.length === 0) { - throw new Error('No engine backends enabled'); + throw new Error("No engine backends enabled"); + } + } + + initializeContext(stateFile?: string): void { + if (!this.contextInitialized) { + initContext(stateFile); + this.contextInitialized = true; } } async analyze(event: WebhookEvent): Promise { const logger = getLogger(); - const results = await Promise.all( + + if (!this.contextInitialized) { + this.initializeContext(); + } + + const authorRep = getAuthorReputation(event.author); + const repoCtx = getRepoContext(event.owner, event.repo); + + logger.debug( + `Analyzing ${event.type} #${event.number} by ${event.author} ` + + `(reputation: ${(authorRep.reputation * 100).toFixed(0)}%, repo: ${repoCtx.maturity})`, + ); + + // Run all backends + const backendResults: BackendResult[] = []; + const rawResults = await Promise.all( this.backends.map(async ({ backend, weight }) => { try { const result = await backend.analyze(event); logger.debug( - `Backend "${backend.name}": impact=${result.impact}, confidence=${result.confidence.toFixed(2)}` + `Backend "${backend.name}": impact=${result.impact}, confidence=${result.confidence.toFixed(2)}`, ); - return { result, weight }; + return { backend: backend.name, result, weight }; } catch (err) { logger.error(`Backend "${backend.name}" threw unexpectedly`, err); return { + backend: backend.name, result: { - impact: 'neutral' as Impact, + impact: "neutral" as Impact, confidence: 0, reasoning: `${backend.name}: error`, }, weight, }; } - }) + }), ); - // Filter to backends that actually produced a signal (confidence > 0) - const active = results.filter((r) => r.result.confidence > 0); + for (const r of rawResults) { + backendResults.push({ + backend: r.backend, + impact: r.result.impact, + confidence: r.result.confidence, + reasoning: r.result.reasoning, + weight: r.weight, + }); + } + + const active = backendResults.filter((r) => r.confidence > 0); if (active.length === 0) { - return { impact: 'neutral', confidence: 0, reasoning: 'No backends produced a signal.' }; + return { + impact: "neutral", + confidence: 0, + reasoning: `Insufficient data: no analysis backends produced signals for ${event.type} #${event.number}.`, + }; } - const totalWeight = active.reduce((s, r) => s + r.weight, 0); - const combinedScore = - active.reduce( - (s, r) => s + impactToNumeric[r.result.impact] * r.result.confidence * r.weight, - 0 - ) / totalWeight; - const combinedConfidence = - active.reduce((s, r) => s + r.result.confidence * r.weight, 0) / totalWeight; + // Calculate multi-dimensional scores + const dimensions = this.calculateDimensions(active); + // Detect correlations and patterns + const correlations = this.detectCorrelations(backendResults, dimensions); + + // Calculate overall score and confidence + const { score, confidence, uncertainty } = this.calculateOverall( + active, + dimensions, + correlations, + authorRep, + ); + + // Determine impact let impact: Impact; - if (combinedScore > 0.1) { - impact = 'positive'; - } else if (combinedScore < -0.1) { - impact = 'negative'; + if (score > 0.2) { + impact = "positive"; + } else if (score < -0.2) { + impact = "negative"; } else { - impact = 'neutral'; + impact = "neutral"; } - if (combinedConfidence < this.confidenceThreshold) { - impact = 'neutral'; + if (confidence < this.confidenceThreshold) { + impact = "neutral"; } - const reasoning = results - .filter((r) => r.result.confidence > 0) - .map((r) => r.result.reasoning) - .join(' '); + // Generate analytical reasoning + const reasoning = this.generateAnalyticalReasoning( + event, + backendResults, + active, + dimensions, + correlations, + score, + confidence, + uncertainty, + authorRep, + repoCtx, + ); - return { impact, confidence: combinedConfidence, reasoning }; + const result: AnalysisResult = { + impact, + confidence, + reasoning, + dimensions, + correlations, + uncertainty, + }; + + updateContext(event, result); + return result; + } + + private calculateDimensions( + active: BackendResult[], + ): NonNullable { + let correctnessScore = 0; + let correctnessWeight = 0; + let riskScore = 0; + let riskWeight = 0; + let maintainabilityScore = 0; + let maintainabilityWeight = 0; + let alignmentScore = 0; + let alignmentWeight = 0; + + for (const r of active) { + const impactScore = + r.impact === "positive" ? 1 : r.impact === "negative" ? -1 : 0; + const weightedImpact = impactScore * r.confidence * r.weight; + + switch (r.backend) { + case "checks": + correctnessScore += weightedImpact * 0.7; + correctnessWeight += r.weight * 0.7; + riskScore += weightedImpact * 0.3; + riskWeight += r.weight * 0.3; + break; + case "diff": + maintainabilityScore += weightedImpact * 0.6; + maintainabilityWeight += r.weight * 0.6; + riskScore += weightedImpact * 0.4; + riskWeight += r.weight * 0.4; + break; + case "quality": + alignmentScore += weightedImpact * 0.7; + alignmentWeight += r.weight * 0.7; + maintainabilityScore += weightedImpact * 0.3; + maintainabilityWeight += r.weight * 0.3; + break; + } + } + + return { + correctness: + correctnessWeight > 0 ? correctnessScore / correctnessWeight : 0, + risk: riskWeight > 0 ? riskScore / riskWeight : 0, + maintainability: + maintainabilityWeight > 0 + ? maintainabilityScore / maintainabilityWeight + : 0, + alignment: alignmentWeight > 0 ? alignmentScore / alignmentWeight : 0, + }; + } + + private detectCorrelations( + allResults: BackendResult[], + dimensions: NonNullable, + ): NonNullable { + const suspiciousPatterns: string[] = []; + const reinforcingSignals: string[] = []; + const contradictions: string[] = []; + + const active = allResults.filter((r) => r.confidence > 0); + const hasChecks = active.some((r) => r.backend === "checks"); + const hasDiff = active.some((r) => r.backend === "diff"); + const hasQuality = active.some((r) => r.backend === "quality"); + + // Check for suspicious patterns + if (hasChecks && hasDiff) { + const checksResult = active.find((r) => r.backend === "checks"); + const diffResult = active.find((r) => r.backend === "diff"); + + if ( + checksResult?.impact === "positive" && + diffResult?.impact === "negative" + ) { + suspiciousPatterns.push( + "Checks pass but diff analysis shows concerns (untested changes?)", + ); + } + + if ( + checksResult?.impact === "negative" && + diffResult?.impact === "positive" + ) { + suspiciousPatterns.push( + "Clean diff but failing checks (test failures?)", + ); + } + } + + if (hasDiff && hasQuality) { + const diffResult = active.find((r) => r.backend === "diff"); + const qualityResult = active.find((r) => r.backend === "quality"); + + if ( + diffResult?.impact === "positive" && + qualityResult?.impact === "negative" + ) { + suspiciousPatterns.push( + "Clean code changes but poor description (documentation debt)", + ); + } + } + + // Check for reinforcing signals + if (dimensions.correctness > 0.5 && dimensions.maintainability > 0.5) { + reinforcingSignals.push( + "High correctness and maintainability scores align", + ); + } + + if (dimensions.risk < -0.5 && dimensions.alignment < -0.3) { + reinforcingSignals.push("Risk and misalignment indicators converge"); + } + + // Check for contradictions + const positiveCount = active.filter((r) => r.impact === "positive").length; + const negativeCount = active.filter((r) => r.impact === "negative").length; + + if (positiveCount > 0 && negativeCount > 0) { + contradictions.push( + `Mixed backend signals: ${positiveCount} positive, ${negativeCount} negative`, + ); + } + + if (dimensions.correctness > 0.3 && dimensions.risk < -0.3) { + contradictions.push("Correct implementation but high risk profile"); + } + + return { + suspiciousPatterns, + reinforcingSignals, + contradictions, + }; + } + + private calculateOverall( + active: BackendResult[], + dimensions: NonNullable, + correlations: NonNullable, + authorRep: ReturnType, + ): { + score: number; + confidence: number; + uncertainty: NonNullable; + } { + const totalWeight = active.reduce((s, r) => s + r.weight, 0); + + // Calculate weighted average of individual backend scores + let baseScore = 0; + for (const r of active) { + const impactScore = + r.impact === "positive" ? 1 : r.impact === "negative" ? -1 : 0; + baseScore += impactScore * r.confidence * r.weight; + } + + // Guard against division by zero when all weights are 0 + if (totalWeight === 0) { + baseScore = 0; + getLogger().debug( + "All backend weights are zero, defaulting baseScore to 0", + ); + } else { + baseScore /= totalWeight; + } + + // Adjust score based on dimensions + const dimensionScore = + dimensions.correctness * 0.4 + + dimensions.risk * 0.2 + + dimensions.maintainability * 0.25 + + dimensions.alignment * 0.15; + + // Blend backend score with dimension score + let finalScore = baseScore * 0.6 + dimensionScore * 0.4; + + // Penalize for contradictions + if (correlations.contradictions.length > 0) { + finalScore *= 0.85; + } + + // Calculate confidence + let baseConfidence = 0; + if (totalWeight === 0) { + baseConfidence = 0; + getLogger().debug( + "All backend weights are zero, defaulting baseConfidence to 0", + ); + } else { + baseConfidence = + active.reduce((s, r) => s + r.confidence * r.weight, 0) / totalWeight; + } + + // Adjust confidence based on various factors + const uniqueImpacts = new Set(active.map((r) => r.impact)); + if (uniqueImpacts.size === 1) { + baseConfidence = Math.min(1, baseConfidence * 1.15); + } else if (uniqueImpacts.size === 3) { + baseConfidence *= 0.75; + } + + if (authorRep.isNew) { + baseConfidence *= 0.9; + } + + if (correlations.suspiciousPatterns.length > 0) { + baseConfidence *= 0.85; + } + + // Calculate uncertainty interval + const uncertaintyRange = 1 - baseConfidence; + const lowerBound = Math.max(0, baseConfidence - uncertaintyRange * 0.5); + const upperBound = Math.min(1, baseConfidence + uncertaintyRange * 0.5); + + // Determine primary uncertainty source + let primaryUncertaintySource = "Backend confidence variance"; + if (uniqueImpacts.size > 1) { + primaryUncertaintySource = "Mixed backend signals"; + } else if (authorRep.isNew) { + primaryUncertaintySource = "Limited author history"; + } else if (active.length < this.backends.length) { + primaryUncertaintySource = "Partial backend coverage"; + } + + return { + score: finalScore, + confidence: baseConfidence, + uncertainty: { + confidenceInterval: [lowerBound, upperBound], + primaryUncertaintySource, + }, + }; + } + + private generateAnalyticalReasoning( + event: WebhookEvent, + _allResults: BackendResult[], + activeResults: BackendResult[], + dimensions: NonNullable, + correlations: NonNullable, + score: number, + confidence: number, + uncertainty: NonNullable, + authorRep: ReturnType, + repoCtx: ReturnType, + ): string { + const parts: string[] = []; + + // Summary of dimensional analysis + parts.push(`Dimensional Analysis:`); + parts.push( + ` Correctness: ${(dimensions.correctness * 100).toFixed(0)}% | ` + + `Risk: ${(dimensions.risk * 100).toFixed(0)}% | ` + + `Maintainability: ${(dimensions.maintainability * 100).toFixed(0)}% | ` + + `Alignment: ${(dimensions.alignment * 100).toFixed(0)}%`, + ); + + // Backend breakdown + parts.push(`\nBackend Results:`); + for (const r of activeResults) { + const icon = + r.impact === "positive" + ? "[+]" + : r.impact === "negative" + ? "[-]" + : "[~]"; + parts.push( + ` ${icon} ${r.backend}: ${r.impact} (${(r.confidence * 100).toFixed(0)}%) - ${r.reasoning}`, + ); + } + + // Correlation analysis + const hasPatterns = + correlations.suspiciousPatterns.length > 0 || + correlations.reinforcingSignals.length > 0 || + correlations.contradictions.length > 0; + + if (hasPatterns) { + parts.push(`\nPattern Analysis:`); + + if (correlations.reinforcingSignals.length > 0) { + for (const signal of correlations.reinforcingSignals) { + parts.push(` [^] ${signal}`); + } + } + + if (correlations.suspiciousPatterns.length > 0) { + for (const pattern of correlations.suspiciousPatterns) { + parts.push(` [!] ${pattern}`); + } + } + + if (correlations.contradictions.length > 0) { + for (const contradiction of correlations.contradictions) { + parts.push(` [x] ${contradiction}`); + } + } + } + + // Context information + parts.push(`\nContext:`); + parts.push( + ` Author: ${event.author} (${authorRep.isNew ? "new" : `reputation: ${(authorRep.reputation * 100).toFixed(0)}%`})`, + ); + parts.push( + ` Repository: ${repoCtx.maturity} (${repoCtx.communitySize} active contributors)`, + ); + + // Confidence and uncertainty + parts.push(`\nConfidence Assessment:`); + parts.push(` Overall: ${(confidence * 100).toFixed(0)}%`); + parts.push( + ` Interval: [${(uncertainty.confidenceInterval[0] * 100).toFixed(0)}%, ${(uncertainty.confidenceInterval[1] * 100).toFixed(0)}%]`, + ); + parts.push( + ` Primary uncertainty: ${uncertainty.primaryUncertaintySource}`, + ); + + // Final assessment + parts.push( + `\nAssessment: ${score > 0.2 ? "POSITIVE" : score < -0.2 ? "NEGATIVE" : "NEUTRAL"} (score: ${score.toFixed(2)})`, + ); + + return parts.join("\n"); } } diff --git a/src/polling.ts b/src/polling.ts index bdeb33c..2d4edc1 100644 --- a/src/polling.ts +++ b/src/polling.ts @@ -1,6 +1,7 @@ -import type { Config, WebhookEvent, RepoConfig } from './types.js'; +import type { Config, WebhookEvent, RepoPattern } from "./types.js"; import { listRecentComments, + listAccessibleRepositories, fetchPR, fetchIssue, hasExistingComment, @@ -9,15 +10,16 @@ import { formatComment, createReaction, type RecentComment, -} from './github.js'; -import { createEngine } from './engine/index.js'; -import { getLogger } from './logger.js'; -import { recordEvent } from './events.js'; -import { readFileSync, writeFileSync, existsSync } from 'fs'; +} from "./github.js"; +import { createEngine } from "./engine/index.js"; +import { getLogger } from "./logger.js"; +import { recordEvent } from "./events.js"; +import { readFileSync, writeFileSync, existsSync } from "fs"; interface ProcessedComment { id: number; timestamp: number; + failures?: number; } interface PollingState { @@ -26,24 +28,27 @@ interface PollingState { const processedComments: Map = new Map(); const MAX_PROCESSED_CACHE = 1000; +const MAX_RETRY_ATTEMPTS = 3; let pollingState: PollingState = { lastProcessedAt: {} }; function loadPollingState(stateFile: string): void { if (existsSync(stateFile)) { try { - const data = readFileSync(stateFile, 'utf-8'); + const data = readFileSync(stateFile, "utf-8"); const parsed = JSON.parse(data); // Validate that parsed data has expected structure if ( parsed && - typeof parsed === 'object' && + typeof parsed === "object" && parsed.lastProcessedAt && - typeof parsed.lastProcessedAt === 'object' + typeof parsed.lastProcessedAt === "object" ) { pollingState = parsed; } else { - getLogger().warn('Invalid polling state format, resetting to empty state'); + getLogger().warn( + "Invalid polling state format, resetting to empty state", + ); pollingState = { lastProcessedAt: {} }; } } catch { @@ -57,7 +62,7 @@ function savePollingState(stateFile: string): void { try { writeFileSync(stateFile, JSON.stringify(pollingState, null, 2)); } catch (err) { - getLogger().warn('Failed to save polling state', err); + getLogger().warn("Failed to save polling state", err); } } @@ -86,7 +91,11 @@ function isProcessed(owner: string, repo: string, commentId: number): boolean { function markProcessed(owner: string, repo: string, commentId: number): void { const key = getCacheKey(owner, repo, commentId); - processedComments.set(key, { id: commentId, timestamp: Date.now() }); + processedComments.set(key, { + id: commentId, + timestamp: Date.now(), + failures: 0, + }); // Clean up old entries if cache is too large if (processedComments.size > MAX_PROCESSED_CACHE) { @@ -99,13 +108,31 @@ function markProcessed(owner: string, repo: string, commentId: number): void { } } +function recordFailure( + owner: string, + repo: string, + commentId: number, +): boolean { + const key = getCacheKey(owner, repo, commentId); + const existing = processedComments.get(key); + + const failures = (existing?.failures || 0) + 1; + processedComments.set(key, { + id: commentId, + timestamp: Date.now(), + failures, + }); + + return failures >= MAX_RETRY_ATTEMPTS; +} + function containsMention(body: string): boolean { - return body.includes('@troutbot'); + return body.includes("@troutbot"); } async function analyzeAndComment( event: WebhookEvent, - config: Config + config: Config, ): Promise> { const logger = getLogger(); const engine = createEngine(config.engine); @@ -113,36 +140,50 @@ async function analyzeAndComment( // Run analysis const analysis = await engine.analyze(event); logger.info( - `Analyzed ${event.owner}/${event.repo}#${event.number}: impact=${analysis.impact}, confidence=${analysis.confidence.toFixed(2)}` + `Analyzed ${event.owner}/${event.repo}#${event.number}: impact=${analysis.impact}, confidence=${analysis.confidence.toFixed(2)}`, ); // Check for existing comment const { commentMarker, allowUpdates } = config.response; - const existing = await hasExistingComment(event.owner, event.repo, event.number, commentMarker); + const existing = await hasExistingComment( + event.owner, + event.repo, + event.number, + commentMarker, + ); if (existing.exists && !allowUpdates) { - logger.info(`Already commented on ${event.owner}/${event.repo}#${event.number}, skipping`); - const result = { skipped: true, reason: 'Already commented' }; + logger.info( + `Already commented on ${event.owner}/${event.repo}#${event.number}, skipping`, + ); + const result = { skipped: true, reason: "Already commented" }; recordEvent(event, result, analysis); return result; } + // Generate comment with analysis const body = formatComment( config.response, event.type, analysis.impact, analysis.confidence, - analysis.reasoning + analysis.reasoning, ); if (existing.exists && allowUpdates && existing.commentId) { - logger.info(`Updating existing comment on ${event.owner}/${event.repo}#${event.number}`); + logger.info( + `Updating existing comment on ${event.owner}/${event.repo}#${event.number}`, + ); await updateComment(event.owner, event.repo, existing.commentId, body); } else { await postComment(event.owner, event.repo, event.number, body); } - const result = { processed: true, impact: analysis.impact, confidence: analysis.confidence }; + const result = { + processed: true, + impact: analysis.impact, + confidence: analysis.confidence, + }; recordEvent(event, result, analysis); return result; } @@ -155,18 +196,38 @@ function isAuthorized(username: string, authorizedUsers?: string[]): boolean { return authorizedUsers.some((u) => u.toLowerCase() === normalizedUsername); } -function isRepoAuthorized(owner: string, repo: string, pollingRepos?: RepoConfig[]): boolean { - if (!pollingRepos || pollingRepos.length === 0) { - return true; // No restrictions, use global repos +function isRepoAuthorized( + owner: string, + repo: string, + pollingPatterns?: RepoPattern[], +): boolean { + if (!pollingPatterns || pollingPatterns.length === 0) { + return true; // No restrictions, accept all repos } - return pollingRepos.some((r) => r.owner === owner && r.repo === repo); + + // Check if repo matches any pattern + for (const pattern of pollingPatterns) { + const ownerMatch = + pattern.owner === "*" || + pattern.owner.toLowerCase() === owner.toLowerCase(); + const repoMatch = + pattern.repo === "*" || + pattern.repo === undefined || + pattern.repo.toLowerCase() === repo.toLowerCase(); + + if (ownerMatch && repoMatch) { + return true; + } + } + + return false; } async function processComment( comment: RecentComment, owner: string, repo: string, - config: Config + config: Config, ): Promise { const logger = getLogger(); @@ -175,7 +236,9 @@ async function processComment( } if (isProcessed(owner, repo, comment.id)) { - logger.debug(`Comment ${owner}/${repo}#${comment.id} already processed, skipping`); + logger.debug( + `Comment ${owner}/${repo}#${comment.id} already processed, skipping`, + ); return; } @@ -183,9 +246,9 @@ async function processComment( const pollingRepos = config.polling?.repositories; if (!isRepoAuthorized(owner, repo, pollingRepos)) { logger.info( - `Unauthorized repo ${owner}/${repo} for polling, ignoring mention from ${comment.author}` + `Unauthorized repo ${owner}/${repo} for polling, ignoring mention from ${comment.author}`, ); - await createReaction(owner, repo, comment.id, 'thumbs_down'); + await createReaction(owner, repo, comment.id, "thumbs_down"); markProcessed(owner, repo, comment.id); return; } @@ -194,13 +257,15 @@ async function processComment( const authorizedUsers = config.polling?.authorizedUsers; if (!isAuthorized(comment.author, authorizedUsers)) { logger.info( - `Unauthorized user ${comment.author} attempted on-demand analysis in ${owner}/${repo}#${comment.issueNumber}` + `Unauthorized user ${comment.author} attempted on-demand analysis in ${owner}/${repo}#${comment.issueNumber}`, ); markProcessed(owner, repo, comment.id); return; } - logger.info(`Found @troutbot mention in ${owner}/${repo}#${comment.issueNumber}`); + logger.info( + `Found @troutbot mention in ${owner}/${repo}#${comment.issueNumber}`, + ); try { // First, try to fetch as a PR to check if it's a pull request @@ -211,8 +276,8 @@ async function processComment( if (prData) { // It's a pull request event = { - action: 'on_demand', - type: 'pull_request', + action: "on_demand", + type: "pull_request", number: comment.issueNumber, title: prData.title, body: prData.body, @@ -227,13 +292,15 @@ async function processComment( // It's an issue const issueData = await fetchIssue(owner, repo, comment.issueNumber); if (!issueData) { - logger.warn(`Could not fetch issue ${owner}/${repo}#${comment.issueNumber}`); + logger.warn( + `Could not fetch issue ${owner}/${repo}#${comment.issueNumber}`, + ); return; } event = { - action: 'on_demand', - type: 'issue', + action: "on_demand", + type: "issue", number: comment.issueNumber, title: issueData.title, body: issueData.body, @@ -248,10 +315,21 @@ async function processComment( markProcessed(owner, repo, comment.id); logger.info( - `Successfully processed on-demand analysis for ${owner}/${repo}#${comment.issueNumber}` + `Successfully processed on-demand analysis for ${owner}/${repo}#${comment.issueNumber}`, ); } catch (err) { - logger.error(`Failed to process mention in ${owner}/${repo}#${comment.issueNumber}`, err); + logger.error( + `Failed to process mention in ${owner}/${repo}#${comment.issueNumber}`, + err, + ); + // Track failures and mark as processed after max retries + const shouldStop = recordFailure(owner, repo, comment.id); + if (shouldStop) { + logger.warn( + `Max retry attempts (${MAX_RETRY_ATTEMPTS}) reached for comment ${comment.id}, marking as processed`, + ); + markProcessed(owner, repo, comment.id); + } } } @@ -260,13 +338,15 @@ async function pollRepository( repo: string, config: Config, since: Date, - stateFile?: string + stateFile?: string, ): Promise { const logger = getLogger(); try { const comments = await listRecentComments(owner, repo, since); - logger.debug(`Fetched ${comments.length} recent comments from ${owner}/${repo}`); + logger.debug( + `Fetched ${comments.length} recent comments from ${owner}/${repo}`, + ); let latestCommentDate = since; @@ -293,26 +373,60 @@ export async function startPolling(config: Config): Promise { const pollingConfig = config.polling; if (!pollingConfig || !pollingConfig.enabled) { - logger.info('Polling is disabled'); + logger.info("Polling is disabled"); return; } - if (config.repositories.length === 0) { - logger.warn('Polling enabled but no repositories configured'); + // Determine repos to poll + let reposToPoll: Array<{ owner: string; repo: string }>; + const pollingPatterns = pollingConfig.repositories; + + if (!pollingPatterns || pollingPatterns.length === 0) { + // No patterns configured - poll all accessible repos + reposToPoll = await listAccessibleRepositories(); + logger.info( + `Polling all accessible repositories (${reposToPoll.length} repos)`, + ); + } else { + // Build repo list from patterns + reposToPoll = []; + + // Start with explicitly configured global repos (for webhooks) + for (const repo of config.repositories) { + // Only include repos that match polling patterns + if (isRepoAuthorized(repo.owner, repo.repo, pollingPatterns)) { + reposToPoll.push({ owner: repo.owner, repo: repo.repo }); + } + } + + // If no explicit repos configured, fetch accessible ones and filter by patterns + if (config.repositories.length === 0) { + const accessibleRepos = await listAccessibleRepositories(); + for (const repo of accessibleRepos) { + if (isRepoAuthorized(repo.owner, repo.repo, pollingPatterns)) { + reposToPoll.push({ owner: repo.owner, repo: repo.repo }); + } + } + } + + logger.info(`Polling ${reposToPoll.length} repositories matching patterns`); + } + + if (reposToPoll.length === 0) { + logger.warn("No repositories match polling patterns"); return; } const intervalMs = pollingConfig.intervalMinutes * 60 * 1000; const lookbackMs = pollingConfig.lookbackMinutes * 60 * 1000; - logger.info(`Starting polling for ${config.repositories.length} repositories`); logger.info( - `Poll interval: ${pollingConfig.intervalMinutes} minutes, lookback: ${pollingConfig.lookbackMinutes} minutes` + `Poll interval: ${pollingConfig.intervalMinutes} minutes, lookback: ${pollingConfig.lookbackMinutes} minutes`, ); // Load persisted state if backfill is enabled const stateFile = pollingConfig.backfill - ? pollingConfig.stateFile || '.troutbot-polling-state.json' + ? pollingConfig.stateFile || ".troutbot-polling-state.json" : undefined; if (stateFile) { loadPollingState(stateFile); @@ -320,22 +434,28 @@ export async function startPolling(config: Config): Promise { } // Do an initial poll - use persisted timestamp if available, otherwise use lookback - for (const repo of config.repositories) { + for (const repo of reposToPoll) { const lastProcessed = getLastProcessedAt(repo.owner, repo.repo); const initialSince = lastProcessed || new Date(Date.now() - lookbackMs); if (lastProcessed) { logger.info( - `Resuming polling for ${repo.owner}/${repo.repo} from ${lastProcessed.toISOString()}` + `Resuming polling for ${repo.owner}/${repo.repo} from ${lastProcessed.toISOString()}`, ); } - await pollRepository(repo.owner, repo.repo, config, initialSince, stateFile); + await pollRepository( + repo.owner, + repo.repo, + config, + initialSince, + stateFile, + ); } // Set up recurring polling setInterval(async () => { const since = new Date(Date.now() - lookbackMs); - for (const repo of config.repositories) { + for (const repo of reposToPoll) { await pollRepository(repo.owner, repo.repo, config, since, stateFile); } }, intervalMs);