nvf/docs-preview-1356/assets/search-worker.js

const isWordBoundary = (char) =>
  /[A-Z]/.test(char) || /[-_\/.]/.test(char) || /\s/.test(char);

const isCaseTransition = (prev, curr) => {
  const prevIsUpper = prev.toLowerCase() !== prev;
  const currIsUpper = curr.toLowerCase() !== curr;
  return (
    prevIsUpper && currIsUpper && prev.toLowerCase() !== curr.toLowerCase()
  );
};

const findBestSubsequenceMatch = (query, target) => {
  const n = query.length;
  const m = target.length;

  if (n === 0 || m === 0) return null;

  const positions = [];

  const memo = new Map();
  const key = (qIdx, tIdx, gap) => `${qIdx}:${tIdx}:${gap}`;

  const findBest = (qIdx, tIdx, currentGap) => {
    if (qIdx === n) {
      return { done: true, positions: [...positions], gap: currentGap };
    }

    const memoKey = key(qIdx, tIdx, currentGap);
    if (memo.has(memoKey)) {
      return memo.get(memoKey);
    }

    let bestResult = null;

    for (let i = tIdx; i < m; i++) {
      if (target[i] === query[qIdx]) {
        positions.push(i);
        const gap = qIdx === 0 ? 0 : i - positions[positions.length - 2] - 1;
        const newGap = currentGap + gap;

        if (newGap > m) {
          positions.pop();
          continue;
        }

        const result = findBest(qIdx + 1, i + 1, newGap);
        positions.pop();

        if (result && (!bestResult || result.gap < bestResult.gap)) {
          bestResult = result;
          if (result.gap === 0) break;
        }
      }
    }

    memo.set(memoKey, bestResult);
    return bestResult;
  };

  const result = findBest(0, 0, 0);
  if (!result) return null;

  const consecutive = (() => {
    let c = 1;
    for (let i = 1; i < result.positions.length; i++) {
      if (result.positions[i] === result.positions[i - 1] + 1) {
        c++;
      }
    }
    return c;
  })();

  return {
    positions: result.positions,
    consecutive,
    score: calculateMatchScore(query, target, result.positions, consecutive),
  };
};

const calculateMatchScore = (query, target, positions, consecutive) => {
  const n = positions.length;
  const m = target.length;

  if (n === 0) return 0;

  let score = 1.0;

  const startBonus = (m - positions[0]) / m;
  score += startBonus * 0.5;

  let gapPenalty = 0;
  for (let i = 1; i < n; i++) {
    const gap = positions[i] - positions[i - 1] - 1;
    if (gap > 0) {
      gapPenalty += Math.min(gap / m, 1.0) * 0.3;
    }
  }
  score -= gapPenalty;

  const consecutiveBonus = consecutive / n;
  score += consecutiveBonus * 0.3;

  let boundaryBonus = 0;
  for (let i = 0; i < n; i++) {
    const char = target[positions[i]];
    if (i === 0 || isWordBoundary(char)) {
      boundaryBonus += 0.05;
    }
    if (i > 0) {
      const prevChar = target[positions[i - 1]];
      if (isCaseTransition(prevChar, char)) {
        boundaryBonus += 0.03;
      }
    }
  }
  score = Math.min(1.0, score + boundaryBonus);

  const lengthPenalty = Math.abs(query.length - n) / Math.max(query.length, m);
  score -= lengthPenalty * 0.2;

  return Math.max(0, Math.min(1.0, score));
};

const fuzzyMatch = (query, target) => {
  const lowerQuery = query.toLowerCase();
  const lowerTarget = target.toLowerCase();

  if (lowerQuery.length === 0) return null;
  if (lowerTarget.length === 0) return null;

  if (lowerTarget === lowerQuery) {
    return 1.0;
  }

  if (lowerTarget.includes(lowerQuery)) {
    const ratio = lowerQuery.length / lowerTarget.length;
    return 0.8 + ratio * 0.2;
  }

  const match = findBestSubsequenceMatch(lowerQuery, lowerTarget);
  if (!match) {
    return null;
  }

  return Math.min(1.0, match.score);
};

self.onmessage = function (e) {
  const { messageId, type, data } = e.data;

  const respond = (type, data) => {
    self.postMessage({ messageId, type, data });
  };

  const respondError = (error) => {
    self.postMessage({
      messageId,
      type: "error",
      error: error.message || String(error),
    });
  };

  try {
    if (type === "tokenize") {
      const text = typeof data === "string" ? data : "";
      const words = text.toLowerCase().match(/\b[a-zA-Z0-9_-]+\b/g) || [];
      const tokens = words.filter((word) => word.length > 2);
      const uniqueTokens = Array.from(new Set(tokens));
      respond("tokens", uniqueTokens);
    } else if (type === "search") {
      const { query, limit = 10 } = data;

      if (!query || typeof query !== "string") {
        respond("results", []);
        return;
      }

      const rawQuery = query.toLowerCase();
      const text = typeof query === "string" ? query : "";
      const words = text.toLowerCase().match(/\b[a-zA-Z0-9_-]+\b/g) || [];
      const searchTerms = words.filter((word) => word.length > 2);

      let documents = [];
      if (typeof data.documents === "string") {
        documents = JSON.parse(data.documents);
      } else if (Array.isArray(data.documents)) {
        documents = data.documents;
      } else if (typeof data.transferables === "string") {
        documents = JSON.parse(data.transferables);
      }

      if (!Array.isArray(documents) || documents.length === 0) {
        respond("results", []);
        return;
      }

      const useFuzzySearch = rawQuery.length >= 3;

      if (searchTerms.length === 0 && rawQuery.length < 3) {
        respond("results", []);
        return;
      }

      const pageMatches = new Map();

      // Pre-compute lower-case strings for each document
      const processedDocs = documents.map((doc, docId) => {
        const title = typeof doc.title === "string" ? doc.title : "";
        const content = typeof doc.content === "string" ? doc.content : "";

        return {
          docId,
          doc,
          lowerTitle: title.toLowerCase(),
          lowerContent: content.toLowerCase(),
        };
      });

      // First pass: Score pages with fuzzy matching
      processedDocs.forEach(({ docId, doc, lowerTitle, lowerContent }) => {
        let match = pageMatches.get(docId);
        if (!match) {
          match = { doc, pageScore: 0, matchingAnchors: [] };
          pageMatches.set(docId, match);
        }

        if (useFuzzySearch) {
          const fuzzyTitleScore = fuzzyMatch(rawQuery, lowerTitle);
          if (fuzzyTitleScore !== null) {
            match.pageScore += fuzzyTitleScore * 100;
          }

          const fuzzyContentScore = fuzzyMatch(rawQuery, lowerContent);
          if (fuzzyContentScore !== null) {
            match.pageScore += fuzzyContentScore * 30;
          }
        }

        // Token-based exact matching
        searchTerms.forEach((term) => {
          if (lowerTitle.includes(term)) {
            match.pageScore += lowerTitle === term ? 20 : 10;
          }
          if (lowerContent.includes(term)) {
            match.pageScore += 2;
          }
        });
      });

      // Second pass: Find matching anchors
      pageMatches.forEach((match) => {
        const doc = match.doc;
        if (
          !doc.anchors ||
          !Array.isArray(doc.anchors) ||
          doc.anchors.length === 0
        ) {
          return;
        }

        doc.anchors.forEach((anchor) => {
          if (!anchor || !anchor.text) return;

          const anchorText = anchor.text.toLowerCase();
          let anchorMatches = false;

          if (useFuzzySearch) {
            const fuzzyScore = fuzzyMatch(rawQuery, anchorText);
            if (fuzzyScore !== null && fuzzyScore >= 0.4) {
              anchorMatches = true;
            }
          }

          if (!anchorMatches) {
            searchTerms.forEach((term) => {
              if (anchorText.includes(term)) {
                anchorMatches = true;
              }
            });
          }

          if (anchorMatches) {
            match.matchingAnchors.push(anchor);
          }
        });
      });

      const results = Array.from(pageMatches.values())
        .filter((m) => m.pageScore > 5)
        .sort((a, b) => b.pageScore - a.pageScore)
        .slice(0, limit);

      respond("results", results);
    }
  } catch (error) {
    respondError(error);
  }
};