pinakes-core: exclude markdown images from link extraction

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I6977f90d5ef845eeef099c1be4eb587b6a6a6964
2026-02-09 13:17:02 +03:00 · 2026-02-09 13:17:02 +03:00 · 9afe4a4f6a
commit 9afe4a4f6a
parent 3e51e4dcc1
1 changed files with 92 additions and 0 deletions
--- a/crates/pinakes-core/src/links.rs
+++ b/crates/pinakes-core/src/links.rs
@ -123,6 +123,14 @@ fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec<Markdo
    for (line_num, line) in content.lines().enumerate() {
        for cap in re.captures_iter(line) {
            let full_match = cap.get(0).unwrap();
+            let match_start = full_match.start();
+
+            // Skip markdown images: ![alt](image.png)
+            // Check if the character immediately before '[' is '!'
+            if match_start > 0 && line.as_bytes().get(match_start - 1) == Some(&b'!') {
+                continue;
+            }
+
            let text = cap.get(1).unwrap().as_str().trim();
            let path = cap.get(2).unwrap().as_str().trim();

@ -455,4 +463,88 @@ aliases: Single Alias
        assert_eq!(wikilinks[0].target_path, "note");
        assert_eq!(embeds[0].target_path, "image.png");
    }
+
+    #[test]
+    fn test_exclude_markdown_images() {
+        // Test that markdown images ![alt](image.png) are NOT extracted as links
+        let content = r#"
+# My Note
+
+Here's a regular link: [documentation](docs/guide.md)
+
+Here's an image: ![Screenshot](images/screenshot.png)
+
+Another link: [config](config.toml)
+
+Multiple images:
+![Logo](logo.png) and ![Banner](banner.jpg)
+
+Mixed: [link](file.md) then ![image](pic.png) then [another](other.md)
+"#;
+        let links = extract_links(test_media_id(), content);
+
+        // Should only extract the 4 markdown links, not the 4 images
+        assert_eq!(
+            links.len(),
+            4,
+            "Should extract 4 links, not images. Got: {:#?}",
+            links
+        );
+
+        // Verify all extracted items are MarkdownLink type (not images)
+        for link in &links {
+            assert_eq!(
+                link.link_type,
+                LinkType::MarkdownLink,
+                "Link '{}' should be MarkdownLink type",
+                link.target_path
+            );
+        }
+
+        // Verify correct targets were extracted (links, not images)
+        let targets: Vec<&str> = links.iter().map(|l| l.target_path.as_str()).collect();
+        assert!(
+            targets.contains(&"docs/guide.md"),
+            "Should contain docs/guide.md"
+        );
+        assert!(
+            targets.contains(&"config.toml"),
+            "Should contain config.toml"
+        );
+        assert!(targets.contains(&"file.md"), "Should contain file.md");
+        assert!(targets.contains(&"other.md"), "Should contain other.md");
+
+        // Verify images were NOT extracted
+        assert!(
+            !targets.contains(&"images/screenshot.png"),
+            "Should NOT contain screenshot.png (it's an image)"
+        );
+        assert!(
+            !targets.contains(&"logo.png"),
+            "Should NOT contain logo.png (it's an image)"
+        );
+        assert!(
+            !targets.contains(&"banner.jpg"),
+            "Should NOT contain banner.jpg (it's an image)"
+        );
+        assert!(
+            !targets.contains(&"pic.png"),
+            "Should NOT contain pic.png (it's an image)"
+        );
+    }
+
+    #[test]
+    fn test_edge_case_image_at_line_start() {
+        // Test edge case: image at the very start of a line
+        let content = "![Image at start](start.png)\n[Link](file.md)";
+        let links = extract_links(test_media_id(), content);
+
+        assert_eq!(
+            links.len(),
+            1,
+            "Should only extract the link, not the image"
+        );
+        assert_eq!(links[0].target_path, "file.md");
+        assert_eq!(links[0].link_type, LinkType::MarkdownLink);
+    }
 }