mirror of
https://github.com/NotAShelf/nvf.git
synced 2026-04-27 03:47:37 +00:00
deploy: 4139469926
This commit is contained in:
parent
5bd5a2a1d8
commit
d9b7e86e23
56 changed files with 84 additions and 246984 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because one or more lines are too long
|
|
@ -1,298 +0,0 @@
|
|||
const isWordBoundary = (char) =>
|
||||
/[A-Z]/.test(char) || /[-_\/.]/.test(char) || /\s/.test(char);
|
||||
|
||||
const isCaseTransition = (prev, curr) => {
|
||||
const prevIsUpper = prev.toLowerCase() !== prev;
|
||||
const currIsUpper = curr.toLowerCase() !== curr;
|
||||
return (
|
||||
prevIsUpper && currIsUpper && prev.toLowerCase() !== curr.toLowerCase()
|
||||
);
|
||||
};
|
||||
|
||||
const findBestSubsequenceMatch = (query, target) => {
|
||||
const n = query.length;
|
||||
const m = target.length;
|
||||
|
||||
if (n === 0 || m === 0) return null;
|
||||
|
||||
const positions = [];
|
||||
|
||||
const memo = new Map();
|
||||
const key = (qIdx, tIdx, gap) => `${qIdx}:${tIdx}:${gap}`;
|
||||
|
||||
const findBest = (qIdx, tIdx, currentGap) => {
|
||||
if (qIdx === n) {
|
||||
return { done: true, positions: [...positions], gap: currentGap };
|
||||
}
|
||||
|
||||
const memoKey = key(qIdx, tIdx, currentGap);
|
||||
if (memo.has(memoKey)) {
|
||||
return memo.get(memoKey);
|
||||
}
|
||||
|
||||
let bestResult = null;
|
||||
|
||||
for (let i = tIdx; i < m; i++) {
|
||||
if (target[i] === query[qIdx]) {
|
||||
positions.push(i);
|
||||
const gap = qIdx === 0 ? 0 : i - positions[positions.length - 2] - 1;
|
||||
const newGap = currentGap + gap;
|
||||
|
||||
if (newGap > m) {
|
||||
positions.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = findBest(qIdx + 1, i + 1, newGap);
|
||||
positions.pop();
|
||||
|
||||
if (result && (!bestResult || result.gap < bestResult.gap)) {
|
||||
bestResult = result;
|
||||
if (result.gap === 0) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memo.set(memoKey, bestResult);
|
||||
return bestResult;
|
||||
};
|
||||
|
||||
const result = findBest(0, 0, 0);
|
||||
if (!result) return null;
|
||||
|
||||
const consecutive = (() => {
|
||||
let c = 1;
|
||||
for (let i = 1; i < result.positions.length; i++) {
|
||||
if (result.positions[i] === result.positions[i - 1] + 1) {
|
||||
c++;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
})();
|
||||
|
||||
return {
|
||||
positions: result.positions,
|
||||
consecutive,
|
||||
score: calculateMatchScore(query, target, result.positions, consecutive),
|
||||
};
|
||||
};
|
||||
|
||||
const calculateMatchScore = (query, target, positions, consecutive) => {
|
||||
const n = positions.length;
|
||||
const m = target.length;
|
||||
|
||||
if (n === 0) return 0;
|
||||
|
||||
let score = 1.0;
|
||||
|
||||
const startBonus = (m - positions[0]) / m;
|
||||
score += startBonus * 0.5;
|
||||
|
||||
let gapPenalty = 0;
|
||||
for (let i = 1; i < n; i++) {
|
||||
const gap = positions[i] - positions[i - 1] - 1;
|
||||
if (gap > 0) {
|
||||
gapPenalty += Math.min(gap / m, 1.0) * 0.3;
|
||||
}
|
||||
}
|
||||
score -= gapPenalty;
|
||||
|
||||
const consecutiveBonus = consecutive / n;
|
||||
score += consecutiveBonus * 0.3;
|
||||
|
||||
let boundaryBonus = 0;
|
||||
for (let i = 0; i < n; i++) {
|
||||
const char = target[positions[i]];
|
||||
if (i === 0 || isWordBoundary(char)) {
|
||||
boundaryBonus += 0.05;
|
||||
}
|
||||
if (i > 0) {
|
||||
const prevChar = target[positions[i - 1]];
|
||||
if (isCaseTransition(prevChar, char)) {
|
||||
boundaryBonus += 0.03;
|
||||
}
|
||||
}
|
||||
}
|
||||
score = Math.min(1.0, score + boundaryBonus);
|
||||
|
||||
const lengthPenalty = Math.abs(query.length - n) / Math.max(query.length, m);
|
||||
score -= lengthPenalty * 0.2;
|
||||
|
||||
return Math.max(0, Math.min(1.0, score));
|
||||
};
|
||||
|
||||
const fuzzyMatch = (query, target) => {
|
||||
const lowerQuery = query.toLowerCase();
|
||||
const lowerTarget = target.toLowerCase();
|
||||
|
||||
if (lowerQuery.length === 0) return null;
|
||||
if (lowerTarget.length === 0) return null;
|
||||
|
||||
if (lowerTarget === lowerQuery) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
if (lowerTarget.includes(lowerQuery)) {
|
||||
const ratio = lowerQuery.length / lowerTarget.length;
|
||||
return 0.8 + ratio * 0.2;
|
||||
}
|
||||
|
||||
const match = findBestSubsequenceMatch(lowerQuery, lowerTarget);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return Math.min(1.0, match.score);
|
||||
};
|
||||
|
||||
self.onmessage = function (e) {
|
||||
const { messageId, type, data } = e.data;
|
||||
|
||||
const respond = (type, data) => {
|
||||
self.postMessage({ messageId, type, data });
|
||||
};
|
||||
|
||||
const respondError = (error) => {
|
||||
self.postMessage({
|
||||
messageId,
|
||||
type: "error",
|
||||
error: error.message || String(error),
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
if (type === "tokenize") {
|
||||
const text = typeof data === "string" ? data : "";
|
||||
const words = text.toLowerCase().match(/\b[a-zA-Z0-9_-]+\b/g) || [];
|
||||
const tokens = words.filter((word) => word.length > 2);
|
||||
const uniqueTokens = Array.from(new Set(tokens));
|
||||
respond("tokens", uniqueTokens);
|
||||
} else if (type === "search") {
|
||||
const { query, limit = 10 } = data;
|
||||
|
||||
if (!query || typeof query !== "string") {
|
||||
respond("results", []);
|
||||
return;
|
||||
}
|
||||
|
||||
const rawQuery = query.toLowerCase();
|
||||
const text = typeof query === "string" ? query : "";
|
||||
const words = text.toLowerCase().match(/\b[a-zA-Z0-9_-]+\b/g) || [];
|
||||
const searchTerms = words.filter((word) => word.length > 2);
|
||||
|
||||
let documents = [];
|
||||
if (typeof data.documents === "string") {
|
||||
documents = JSON.parse(data.documents);
|
||||
} else if (Array.isArray(data.documents)) {
|
||||
documents = data.documents;
|
||||
} else if (typeof data.transferables === "string") {
|
||||
documents = JSON.parse(data.transferables);
|
||||
}
|
||||
|
||||
if (!Array.isArray(documents) || documents.length === 0) {
|
||||
respond("results", []);
|
||||
return;
|
||||
}
|
||||
|
||||
const useFuzzySearch = rawQuery.length >= 3;
|
||||
|
||||
if (searchTerms.length === 0 && rawQuery.length < 3) {
|
||||
respond("results", []);
|
||||
return;
|
||||
}
|
||||
|
||||
const pageMatches = new Map();
|
||||
|
||||
// Pre-compute lower-case strings for each document
|
||||
const processedDocs = documents.map((doc, docId) => {
|
||||
const title = typeof doc.title === "string" ? doc.title : "";
|
||||
const content = typeof doc.content === "string" ? doc.content : "";
|
||||
|
||||
return {
|
||||
docId,
|
||||
doc,
|
||||
lowerTitle: title.toLowerCase(),
|
||||
lowerContent: content.toLowerCase(),
|
||||
};
|
||||
});
|
||||
|
||||
// First pass: Score pages with fuzzy matching
|
||||
processedDocs.forEach(({ docId, doc, lowerTitle, lowerContent }) => {
|
||||
let match = pageMatches.get(docId);
|
||||
if (!match) {
|
||||
match = { doc, pageScore: 0, matchingAnchors: [] };
|
||||
pageMatches.set(docId, match);
|
||||
}
|
||||
|
||||
if (useFuzzySearch) {
|
||||
const fuzzyTitleScore = fuzzyMatch(rawQuery, lowerTitle);
|
||||
if (fuzzyTitleScore !== null) {
|
||||
match.pageScore += fuzzyTitleScore * 100;
|
||||
}
|
||||
|
||||
const fuzzyContentScore = fuzzyMatch(rawQuery, lowerContent);
|
||||
if (fuzzyContentScore !== null) {
|
||||
match.pageScore += fuzzyContentScore * 30;
|
||||
}
|
||||
}
|
||||
|
||||
// Token-based exact matching
|
||||
searchTerms.forEach((term) => {
|
||||
if (lowerTitle.includes(term)) {
|
||||
match.pageScore += lowerTitle === term ? 20 : 10;
|
||||
}
|
||||
if (lowerContent.includes(term)) {
|
||||
match.pageScore += 2;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Second pass: Find matching anchors
|
||||
pageMatches.forEach((match) => {
|
||||
const doc = match.doc;
|
||||
if (
|
||||
!doc.anchors ||
|
||||
!Array.isArray(doc.anchors) ||
|
||||
doc.anchors.length === 0
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
doc.anchors.forEach((anchor) => {
|
||||
if (!anchor || !anchor.text) return;
|
||||
|
||||
const anchorText = anchor.text.toLowerCase();
|
||||
let anchorMatches = false;
|
||||
|
||||
if (useFuzzySearch) {
|
||||
const fuzzyScore = fuzzyMatch(rawQuery, anchorText);
|
||||
if (fuzzyScore !== null && fuzzyScore >= 0.4) {
|
||||
anchorMatches = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!anchorMatches) {
|
||||
searchTerms.forEach((term) => {
|
||||
if (anchorText.includes(term)) {
|
||||
anchorMatches = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (anchorMatches) {
|
||||
match.matchingAnchors.push(anchor);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const results = Array.from(pageMatches.values())
|
||||
.filter((m) => m.pageScore > 5)
|
||||
.sort((a, b) => b.pageScore - a.pageScore)
|
||||
.slice(0, limit);
|
||||
|
||||
respond("results", results);
|
||||
}
|
||||
} catch (error) {
|
||||
respondError(error);
|
||||
}
|
||||
};
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue