pinakes-core: update remaining modules and tests

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I9e0ff5ea33a5cf697473423e88f167ce6a6a6964
2026-03-08 00:42:29 +03:00 · 2026-03-08 00:42:29 +03:00 · 3d9f8933d2
commit 3d9f8933d2
parent c8425a4c34
44 changed files with 1207 additions and 578 deletions
--- a/crates/pinakes-core/src/links.rs
+++ b/crates/pinakes-core/src/links.rs
@ -15,18 +15,17 @@ use uuid::Uuid;

 use crate::model::{LinkType, MarkdownLink, MediaId};

-// Compile regexes once at startup to avoid recompilation on every call
-static WIKILINK_RE: LazyLock<Regex> = LazyLock::new(|| {
-  Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").expect("valid wikilink regex")
-});
+// Compile regexes once at startup to avoid recompilation on every call.
+// Stored as Option so that initialization failure is handled gracefully
+// rather than panicking.
+static WIKILINK_RE: LazyLock<Option<Regex>> =
+  LazyLock::new(|| Regex::new(r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").ok());

-static EMBED_RE: LazyLock<Regex> = LazyLock::new(|| {
-  Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").expect("valid embed regex")
-});
+static EMBED_RE: LazyLock<Option<Regex>> =
+  LazyLock::new(|| Regex::new(r"!\[\[([^\]|]+)(?:\|([^\]]+))?\]\]").ok());

-static MARKDOWN_LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
-  Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid markdown link regex")
-});
+static MARKDOWN_LINK_RE: LazyLock<Option<Regex>> =
+  LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").ok());

 /// Configuration for context extraction around links
 const CONTEXT_CHARS_BEFORE: usize = 50;
@ -38,6 +37,7 @@ const CONTEXT_CHARS_AFTER: usize = 50;
 /// - Wikilinks: `[[target]]` and `[[target|display text]]`
 /// - Embeds: `![[target]]`
 /// - Markdown links: `[text](path)` (internal paths only, no http/https)
+#[must_use]
 pub fn extract_links(
  source_media_id: MediaId,
  content: &str,
@ -63,10 +63,13 @@ fn extract_wikilinks(
  source_media_id: MediaId,
  content: &str,
 ) -> Vec<MarkdownLink> {
+  let Some(re) = WIKILINK_RE.as_ref() else {
+    return Vec::new();
+  };
  let mut links = Vec::new();

  for (line_num, line) in content.lines().enumerate() {
-    for cap in WIKILINK_RE.captures_iter(line) {
+    for cap in re.captures_iter(line) {
      let Some(full_match) = cap.get(0) else {
        continue;
      };
@ -100,7 +103,11 @@ fn extract_wikilinks(
        target_media_id: None, // Will be resolved later
        link_type: LinkType::Wikilink,
        link_text: display_text.or_else(|| Some(target.to_string())),
-        line_number: Some(line_num as i32 + 1), // 1-indexed
+        line_number: Some(
+          i32::try_from(line_num)
+            .unwrap_or(i32::MAX)
+            .saturating_add(1),
+        ), // 1-indexed
        context: Some(context),
        created_at: chrono::Utc::now(),
      });
@ -116,10 +123,13 @@ fn extract_embeds(
  source_media_id: MediaId,
  content: &str,
 ) -> Vec<MarkdownLink> {
+  let Some(re) = EMBED_RE.as_ref() else {
+    return Vec::new();
+  };
  let mut links = Vec::new();

  for (line_num, line) in content.lines().enumerate() {
-    for cap in EMBED_RE.captures_iter(line) {
+    for cap in re.captures_iter(line) {
      let Some(full_match) = cap.get(0) else {
        continue;
      };
@ -143,7 +153,11 @@ fn extract_embeds(
        target_media_id: None,
        link_type: LinkType::Embed,
        link_text: display_text.or_else(|| Some(target.to_string())),
-        line_number: Some(line_num as i32 + 1),
+        line_number: Some(
+          i32::try_from(line_num)
+            .unwrap_or(i32::MAX)
+            .saturating_add(1),
+        ),
        context: Some(context),
        created_at: chrono::Utc::now(),
      });
@ -159,10 +173,13 @@ fn extract_markdown_links(
  source_media_id: MediaId,
  content: &str,
 ) -> Vec<MarkdownLink> {
+  let Some(re) = MARKDOWN_LINK_RE.as_ref() else {
+    return Vec::new();
+  };
  let mut links = Vec::new();

  for (line_num, line) in content.lines().enumerate() {
-    for cap in MARKDOWN_LINK_RE.captures_iter(line) {
+    for cap in re.captures_iter(line) {
      let Some(full_match) = cap.get(0) else {
        continue;
      };
@ -215,7 +232,11 @@ fn extract_markdown_links(
        target_media_id: None,
        link_type: LinkType::MarkdownLink,
        link_text: Some(text.to_string()),
-        line_number: Some(line_num as i32 + 1),
+        line_number: Some(
+          i32::try_from(line_num)
+            .unwrap_or(i32::MAX)
+            .saturating_add(1),
+        ),
        context: Some(context),
        created_at: chrono::Utc::now(),
      });
@ -278,6 +299,7 @@ pub enum ResolutionStrategy {
 /// Resolve a link target to possible file paths.
 ///
 /// Returns a list of candidate paths to check, in order of preference.
+#[must_use]
 pub fn resolve_link_candidates(
  target: &str,
  source_path: &Path,
@ -307,7 +329,7 @@ pub fn resolve_link_candidates(
    candidates.push(relative.clone());

    // Also try with .md extension
-    if !target.ends_with(".md") {
+    if !target.to_ascii_lowercase().ends_with(".md") {
      candidates.push(relative.with_extension("md"));
      let mut with_md = relative.clone();
      with_md.set_file_name(format!(
@ -319,10 +341,10 @@ pub fn resolve_link_candidates(
  }

  // 3. Filename with .md extension in root dirs
-  let target_with_md = if target.ends_with(".md") {
+  let target_with_md = if target.to_ascii_lowercase().ends_with(".md") {
    target.to_string()
  } else {
-    format!("{}.md", target)
+    format!("{target}.md")
  };

  for root in root_dirs {
@ -340,6 +362,7 @@ pub fn resolve_link_candidates(
 ///
 /// Obsidian uses the `aliases` field in frontmatter to define alternative names
 /// for a note that can be used in wikilinks.
+#[must_use]
 pub fn extract_aliases(content: &str) -> Vec<String> {
  let Ok(parsed) =
    gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(content)
@ -441,7 +464,7 @@ mod tests {

  #[test]
  fn test_multiple_links() {
-    let content = r#"
+    let content = r"
 # My Note

 This links to [[Note A]] and also [[Note B|Note B Title]].
@ -449,7 +472,7 @@ This links to [[Note A]] and also [[Note B|Note B Title]].
 We also have a markdown link to [config](./config.md).

 And an embedded image: ![[diagram.png]]
-"#;
+";
    let links = extract_links(test_media_id(), content);

    assert_eq!(links.len(), 4);
@ -488,7 +511,7 @@ And an embedded image: ![[diagram.png]]

  #[test]
  fn test_extract_aliases() {
-    let content = r#"---
+    let content = r"---
 title: My Note
 aliases:
  - Alternative Name
@ -496,20 +519,20 @@ aliases:
 ---

 # Content here
-"#;
+";
    let aliases = extract_aliases(content);
    assert_eq!(aliases, vec!["Alternative Name", "Another Alias"]);
  }

  #[test]
  fn test_extract_single_alias() {
-    let content = r#"---
+    let content = r"---
 title: My Note
 aliases: Single Alias
 ---

 # Content
-"#;
+";
    let aliases = extract_aliases(content);
    assert_eq!(aliases, vec!["Single Alias"]);
  }
@ -538,7 +561,7 @@ aliases: Single Alias
  #[test]
  fn test_exclude_markdown_images() {
    // Test that markdown images ![alt](image.png) are NOT extracted as links
-    let content = r#"
+    let content = r"
 # My Note

 Here's a regular link: [documentation](docs/guide.md)
@ -551,15 +574,14 @@ Multiple images:
 ![Logo](logo.png) and ![Banner](banner.jpg)

 Mixed: [link](file.md) then ![image](pic.png) then [another](other.md)
-"#;
+";
    let links = extract_links(test_media_id(), content);

    // Should only extract the 4 markdown links, not the 4 images
    assert_eq!(
      links.len(),
      4,
-      "Should extract 4 links, not images. Got: {:#?}",
-      links
+      "Should extract 4 links, not images. Got: {links:#?}"
    );

    // Verify all extracted items are MarkdownLink type (not images)