diff --git a/crates/pinakes-core/src/links.rs b/crates/pinakes-core/src/links.rs index aed73d8..6b7cfcf 100644 --- a/crates/pinakes-core/src/links.rs +++ b/crates/pinakes-core/src/links.rs @@ -123,6 +123,14 @@ fn extract_markdown_links(source_media_id: MediaId, content: &str) -> Vec 0 && line.as_bytes().get(match_start - 1) == Some(&b'!') { + continue; + } + let text = cap.get(1).unwrap().as_str().trim(); let path = cap.get(2).unwrap().as_str().trim(); @@ -455,4 +463,88 @@ aliases: Single Alias assert_eq!(wikilinks[0].target_path, "note"); assert_eq!(embeds[0].target_path, "image.png"); } + + #[test] + fn test_exclude_markdown_images() { + // Test that markdown images ![alt](image.png) are NOT extracted as links + let content = r#" +# My Note + +Here's a regular link: [documentation](docs/guide.md) + +Here's an image: ![Screenshot](images/screenshot.png) + +Another link: [config](config.toml) + +Multiple images: +![Logo](logo.png) and ![Banner](banner.jpg) + +Mixed: [link](file.md) then ![image](pic.png) then [another](other.md) +"#; + let links = extract_links(test_media_id(), content); + + // Should only extract the 4 markdown links, not the 4 images + assert_eq!( + links.len(), + 4, + "Should extract 4 links, not images. Got: {:#?}", + links + ); + + // Verify all extracted items are MarkdownLink type (not images) + for link in &links { + assert_eq!( + link.link_type, + LinkType::MarkdownLink, + "Link '{}' should be MarkdownLink type", + link.target_path + ); + } + + // Verify correct targets were extracted (links, not images) + let targets: Vec<&str> = links.iter().map(|l| l.target_path.as_str()).collect(); + assert!( + targets.contains(&"docs/guide.md"), + "Should contain docs/guide.md" + ); + assert!( + targets.contains(&"config.toml"), + "Should contain config.toml" + ); + assert!(targets.contains(&"file.md"), "Should contain file.md"); + assert!(targets.contains(&"other.md"), "Should contain other.md"); + + // Verify images were NOT extracted + assert!( + !targets.contains(&"images/screenshot.png"), + "Should NOT contain screenshot.png (it's an image)" + ); + assert!( + !targets.contains(&"logo.png"), + "Should NOT contain logo.png (it's an image)" + ); + assert!( + !targets.contains(&"banner.jpg"), + "Should NOT contain banner.jpg (it's an image)" + ); + assert!( + !targets.contains(&"pic.png"), + "Should NOT contain pic.png (it's an image)" + ); + } + + #[test] + fn test_edge_case_image_at_line_start() { + // Test edge case: image at the very start of a line + let content = "![Image at start](start.png)\n[Link](file.md)"; + let links = extract_links(test_media_id(), content); + + assert_eq!( + links.len(), + 1, + "Should only extract the link, not the image" + ); + assert_eq!(links[0].target_path, "file.md"); + assert_eq!(links[0].link_type, LinkType::MarkdownLink); + } }